From da736d8259331a8ef13bf4bbb10bbb8a5c0e5299 Mon Sep 17 00:00:00 2001 From: Florian Dold Date: Tue, 13 Aug 2019 12:29:07 +0200 Subject: remove node/v8 from source tree --- .../deps/icu-small/source/common/appendable.cpp | 74 - deps/node/deps/icu-small/source/common/bmpset.cpp | 741 --- deps/node/deps/icu-small/source/common/bmpset.h | 164 - deps/node/deps/icu-small/source/common/brkeng.cpp | 285 - deps/node/deps/icu-small/source/common/brkeng.h | 271 - deps/node/deps/icu-small/source/common/brkiter.cpp | 508 -- .../deps/icu-small/source/common/bytesinkutil.cpp | 161 - .../deps/icu-small/source/common/bytesinkutil.h | 83 - .../deps/icu-small/source/common/bytestream.cpp | 85 - .../deps/icu-small/source/common/bytestrie.cpp | 441 -- .../icu-small/source/common/bytestriebuilder.cpp | 504 -- .../icu-small/source/common/bytestrieiterator.cpp | 214 - deps/node/deps/icu-small/source/common/caniter.cpp | 586 -- .../source/common/characterproperties.cpp | 336 -- .../node/deps/icu-small/source/common/chariter.cpp | 100 - deps/node/deps/icu-small/source/common/charstr.cpp | 192 - deps/node/deps/icu-small/source/common/charstr.h | 154 - deps/node/deps/icu-small/source/common/cmemory.cpp | 138 - deps/node/deps/icu-small/source/common/cmemory.h | 681 --- deps/node/deps/icu-small/source/common/common.rc | 109 - deps/node/deps/icu-small/source/common/cpputils.h | 97 - deps/node/deps/icu-small/source/common/cstr.cpp | 54 - deps/node/deps/icu-small/source/common/cstr.h | 60 - deps/node/deps/icu-small/source/common/cstring.cpp | 341 -- deps/node/deps/icu-small/source/common/cstring.h | 124 - deps/node/deps/icu-small/source/common/cwchar.cpp | 54 - deps/node/deps/icu-small/source/common/cwchar.h | 58 - deps/node/deps/icu-small/source/common/dictbe.cpp | 1391 ----- deps/node/deps/icu-small/source/common/dictbe.h | 402 -- .../icu-small/source/common/dictionarydata.cpp | 242 - .../deps/icu-small/source/common/dictionarydata.h | 191 - deps/node/deps/icu-small/source/common/dtintrv.cpp | 62 - deps/node/deps/icu-small/source/common/edits.cpp | 803 --- .../deps/icu-small/source/common/errorcode.cpp | 42 - .../deps/icu-small/source/common/filteredbrk.cpp | 710 --- .../source/common/filterednormalizer2.cpp | 363 -- deps/node/deps/icu-small/source/common/hash.h | 247 - .../deps/icu-small/source/common/icudataver.cpp | 31 - deps/node/deps/icu-small/source/common/icuplug.cpp | 882 --- .../node/deps/icu-small/source/common/icuplugimp.h | 93 - .../source/common/loadednormalizer2impl.cpp | 418 -- deps/node/deps/icu-small/source/common/localsvc.h | 27 - .../deps/icu-small/source/common/locavailable.cpp | 180 - .../node/deps/icu-small/source/common/locbased.cpp | 55 - deps/node/deps/icu-small/source/common/locbased.h | 107 - .../deps/icu-small/source/common/locdispnames.cpp | 886 --- .../node/deps/icu-small/source/common/locdspnm.cpp | 1058 ---- deps/node/deps/icu-small/source/common/locid.cpp | 1556 ------ .../deps/icu-small/source/common/loclikely.cpp | 1385 ----- deps/node/deps/icu-small/source/common/locmap.cpp | 1320 ----- deps/node/deps/icu-small/source/common/locmap.h | 39 - .../deps/icu-small/source/common/locresdata.cpp | 225 - deps/node/deps/icu-small/source/common/locutil.cpp | 273 - deps/node/deps/icu-small/source/common/locutil.h | 39 - .../deps/icu-small/source/common/messageimpl.h | 65 - .../icu-small/source/common/messagepattern.cpp | 1233 ----- deps/node/deps/icu-small/source/common/msvcres.h | 25 - deps/node/deps/icu-small/source/common/mutex.h | 79 - .../deps/icu-small/source/common/norm2_nfc_data.h | 1139 ---- .../deps/icu-small/source/common/norm2allmodes.h | 369 -- .../deps/icu-small/source/common/normalizer2.cpp | 572 -- .../icu-small/source/common/normalizer2impl.cpp | 2660 --------- .../deps/icu-small/source/common/normalizer2impl.h | 978 ---- deps/node/deps/icu-small/source/common/normlzr.cpp | 529 -- .../node/deps/icu-small/source/common/parsepos.cpp | 23 - .../deps/icu-small/source/common/patternprops.cpp | 220 - .../deps/icu-small/source/common/patternprops.h | 91 - .../deps/icu-small/source/common/pluralmap.cpp | 43 - deps/node/deps/icu-small/source/common/pluralmap.h | 292 - .../node/deps/icu-small/source/common/propname.cpp | 328 -- deps/node/deps/icu-small/source/common/propname.h | 212 - .../deps/icu-small/source/common/propname_data.h | 1861 ------- .../node/deps/icu-small/source/common/propsvec.cpp | 529 -- deps/node/deps/icu-small/source/common/propsvec.h | 178 - .../node/deps/icu-small/source/common/punycode.cpp | 589 -- deps/node/deps/icu-small/source/common/punycode.h | 120 - deps/node/deps/icu-small/source/common/putil.cpp | 2444 --------- deps/node/deps/icu-small/source/common/putilimp.h | 669 --- deps/node/deps/icu-small/source/common/rbbi.cpp | 1274 ----- .../deps/icu-small/source/common/rbbi_cache.cpp | 658 --- .../node/deps/icu-small/source/common/rbbi_cache.h | 203 - deps/node/deps/icu-small/source/common/rbbicst.pl | 456 -- .../node/deps/icu-small/source/common/rbbidata.cpp | 425 -- deps/node/deps/icu-small/source/common/rbbidata.h | 197 - .../node/deps/icu-small/source/common/rbbinode.cpp | 372 -- deps/node/deps/icu-small/source/common/rbbinode.h | 126 - deps/node/deps/icu-small/source/common/rbbirb.cpp | 328 -- deps/node/deps/icu-small/source/common/rbbirb.h | 234 - deps/node/deps/icu-small/source/common/rbbirpt.h | 296 - .../node/deps/icu-small/source/common/rbbiscan.cpp | 1279 ----- deps/node/deps/icu-small/source/common/rbbiscan.h | 165 - .../node/deps/icu-small/source/common/rbbisetb.cpp | 684 --- deps/node/deps/icu-small/source/common/rbbisetb.h | 147 - .../node/deps/icu-small/source/common/rbbistbl.cpp | 270 - .../node/deps/icu-small/source/common/rbbitblb.cpp | 1684 ------ deps/node/deps/icu-small/source/common/rbbitblb.h | 216 - deps/node/deps/icu-small/source/common/resbund.cpp | 398 -- .../deps/icu-small/source/common/resbund_cnv.cpp | 57 - .../node/deps/icu-small/source/common/resource.cpp | 22 - deps/node/deps/icu-small/source/common/resource.h | 279 - .../node/deps/icu-small/source/common/ruleiter.cpp | 162 - deps/node/deps/icu-small/source/common/ruleiter.h | 233 - .../node/deps/icu-small/source/common/schriter.cpp | 119 - deps/node/deps/icu-small/source/common/serv.cpp | 982 ---- deps/node/deps/icu-small/source/common/serv.h | 995 ---- deps/node/deps/icu-small/source/common/servlk.cpp | 186 - deps/node/deps/icu-small/source/common/servlkf.cpp | 150 - deps/node/deps/icu-small/source/common/servloc.h | 550 -- deps/node/deps/icu-small/source/common/servls.cpp | 293 - .../node/deps/icu-small/source/common/servnotf.cpp | 119 - deps/node/deps/icu-small/source/common/servnotf.h | 125 - deps/node/deps/icu-small/source/common/servrbf.cpp | 94 - .../node/deps/icu-small/source/common/servslkf.cpp | 121 - .../deps/icu-small/source/common/sharedobject.cpp | 62 - .../deps/icu-small/source/common/sharedobject.h | 184 - .../icu-small/source/common/simpleformatter.cpp | 314 -- deps/node/deps/icu-small/source/common/sprpimpl.h | 130 - .../source/common/static_unicode_sets.cpp | 223 - .../icu-small/source/common/static_unicode_sets.h | 119 - .../deps/icu-small/source/common/stringpiece.cpp | 75 - .../icu-small/source/common/stringtriebuilder.cpp | 618 --- .../node/deps/icu-small/source/common/uarrsort.cpp | 288 - deps/node/deps/icu-small/source/common/uarrsort.h | 103 - deps/node/deps/icu-small/source/common/uassert.h | 32 - deps/node/deps/icu-small/source/common/ubidi.cpp | 3039 ----------- .../deps/icu-small/source/common/ubidi_props.cpp | 254 - .../deps/icu-small/source/common/ubidi_props.h | 148 - .../icu-small/source/common/ubidi_props_data.h | 894 --- deps/node/deps/icu-small/source/common/ubidiimp.h | 468 -- deps/node/deps/icu-small/source/common/ubidiln.cpp | 1349 ----- .../icu-small/source/common/ubiditransform.cpp | 530 -- .../node/deps/icu-small/source/common/ubidiwrt.cpp | 640 --- deps/node/deps/icu-small/source/common/ubrk.cpp | 357 -- deps/node/deps/icu-small/source/common/ubrkimpl.h | 15 - deps/node/deps/icu-small/source/common/ucase.cpp | 1571 ------ deps/node/deps/icu-small/source/common/ucase.h | 444 -- .../icu-small/source/common/ucase_props_data.h | 931 ---- .../node/deps/icu-small/source/common/ucasemap.cpp | 953 ---- .../deps/icu-small/source/common/ucasemap_imp.h | 282 - .../source/common/ucasemap_titlecase_brkiter.cpp | 134 - deps/node/deps/icu-small/source/common/ucat.cpp | 78 - deps/node/deps/icu-small/source/common/uchar.cpp | 725 --- .../icu-small/source/common/uchar_props_data.h | 3707 ------------- .../deps/icu-small/source/common/ucharstrie.cpp | 414 -- .../icu-small/source/common/ucharstriebuilder.cpp | 443 -- .../icu-small/source/common/ucharstrieiterator.cpp | 215 - .../node/deps/icu-small/source/common/uchriter.cpp | 367 -- deps/node/deps/icu-small/source/common/ucln.h | 91 - .../node/deps/icu-small/source/common/ucln_cmn.cpp | 113 - deps/node/deps/icu-small/source/common/ucln_cmn.h | 74 - deps/node/deps/icu-small/source/common/ucln_imp.h | 182 - .../node/deps/icu-small/source/common/ucmndata.cpp | 393 -- deps/node/deps/icu-small/source/common/ucmndata.h | 117 - deps/node/deps/icu-small/source/common/ucnv.cpp | 2910 ---------- .../node/deps/icu-small/source/common/ucnv2022.cpp | 3978 -------------- .../node/deps/icu-small/source/common/ucnv_bld.cpp | 1689 ------ deps/node/deps/icu-small/source/common/ucnv_bld.h | 296 - deps/node/deps/icu-small/source/common/ucnv_cb.cpp | 261 - .../node/deps/icu-small/source/common/ucnv_cnv.cpp | 182 - deps/node/deps/icu-small/source/common/ucnv_cnv.h | 323 -- deps/node/deps/icu-small/source/common/ucnv_ct.cpp | 646 --- .../node/deps/icu-small/source/common/ucnv_err.cpp | 486 -- .../node/deps/icu-small/source/common/ucnv_ext.cpp | 1143 ---- deps/node/deps/icu-small/source/common/ucnv_ext.h | 481 -- deps/node/deps/icu-small/source/common/ucnv_imp.h | 139 - deps/node/deps/icu-small/source/common/ucnv_io.cpp | 1360 ----- deps/node/deps/icu-small/source/common/ucnv_io.h | 127 - .../node/deps/icu-small/source/common/ucnv_lmb.cpp | 1386 ----- .../node/deps/icu-small/source/common/ucnv_set.cpp | 70 - .../node/deps/icu-small/source/common/ucnv_u16.cpp | 1579 ------ .../node/deps/icu-small/source/common/ucnv_u32.cpp | 1253 ----- deps/node/deps/icu-small/source/common/ucnv_u7.cpp | 1491 ----- deps/node/deps/icu-small/source/common/ucnv_u8.cpp | 944 ---- .../node/deps/icu-small/source/common/ucnvbocu.cpp | 1413 ----- .../node/deps/icu-small/source/common/ucnvdisp.cpp | 88 - deps/node/deps/icu-small/source/common/ucnvhz.cpp | 633 --- .../node/deps/icu-small/source/common/ucnvisci.cpp | 1635 ------ .../node/deps/icu-small/source/common/ucnvlat1.cpp | 756 --- .../node/deps/icu-small/source/common/ucnvmbcs.cpp | 5723 -------------------- deps/node/deps/icu-small/source/common/ucnvmbcs.h | 605 --- .../node/deps/icu-small/source/common/ucnvscsu.cpp | 2045 ------- deps/node/deps/icu-small/source/common/ucnvsel.cpp | 823 --- deps/node/deps/icu-small/source/common/ucol_data.h | 89 - .../node/deps/icu-small/source/common/ucol_swp.cpp | 615 --- deps/node/deps/icu-small/source/common/ucol_swp.h | 58 - deps/node/deps/icu-small/source/common/ucptrie.cpp | 590 -- .../deps/icu-small/source/common/ucptrie_impl.h | 289 - deps/node/deps/icu-small/source/common/ucurr.cpp | 2731 ---------- deps/node/deps/icu-small/source/common/ucurrimp.h | 78 - deps/node/deps/icu-small/source/common/udata.cpp | 1453 ----- .../node/deps/icu-small/source/common/udatamem.cpp | 161 - deps/node/deps/icu-small/source/common/udatamem.h | 61 - .../node/deps/icu-small/source/common/udataswp.cpp | 473 -- deps/node/deps/icu-small/source/common/udataswp.h | 404 -- deps/node/deps/icu-small/source/common/uelement.h | 91 - deps/node/deps/icu-small/source/common/uenum.cpp | 189 - deps/node/deps/icu-small/source/common/uenumimp.h | 155 - deps/node/deps/icu-small/source/common/uhash.cpp | 991 ---- deps/node/deps/icu-small/source/common/uhash.h | 718 --- .../node/deps/icu-small/source/common/uhash_us.cpp | 26 - deps/node/deps/icu-small/source/common/uidna.cpp | 922 ---- deps/node/deps/icu-small/source/common/uinit.cpp | 74 - .../node/deps/icu-small/source/common/uinvchar.cpp | 614 --- deps/node/deps/icu-small/source/common/uinvchar.h | 130 - deps/node/deps/icu-small/source/common/uiter.cpp | 1108 ---- .../icu-small/source/common/ulayout_props_data.h | 722 --- deps/node/deps/icu-small/source/common/ulist.cpp | 270 - deps/node/deps/icu-small/source/common/ulist.h | 50 - deps/node/deps/icu-small/source/common/uloc.cpp | 2672 --------- .../deps/icu-small/source/common/uloc_keytype.cpp | 590 -- .../node/deps/icu-small/source/common/uloc_tag.cpp | 2884 ---------- deps/node/deps/icu-small/source/common/ulocimp.h | 147 - .../node/deps/icu-small/source/common/umapfile.cpp | 528 -- deps/node/deps/icu-small/source/common/umapfile.h | 57 - deps/node/deps/icu-small/source/common/umath.cpp | 25 - .../icu-small/source/common/umutablecptrie.cpp | 1678 ------ deps/node/deps/icu-small/source/common/umutex.cpp | 378 -- deps/node/deps/icu-small/source/common/umutex.h | 458 -- deps/node/deps/icu-small/source/common/unames.cpp | 2101 ------- .../icu-small/source/common/unicode/appendable.h | 234 - .../deps/icu-small/source/common/unicode/brkiter.h | 660 --- .../icu-small/source/common/unicode/bytestream.h | 270 - .../icu-small/source/common/unicode/bytestrie.h | 520 -- .../source/common/unicode/bytestriebuilder.h | 182 - .../deps/icu-small/source/common/unicode/caniter.h | 210 - .../deps/icu-small/source/common/unicode/casemap.h | 492 -- .../icu-small/source/common/unicode/char16ptr.h | 308 -- .../icu-small/source/common/unicode/chariter.h | 728 --- .../deps/icu-small/source/common/unicode/dbbi.h | 42 - .../deps/icu-small/source/common/unicode/docmain.h | 227 - .../deps/icu-small/source/common/unicode/dtintrv.h | 160 - .../deps/icu-small/source/common/unicode/edits.h | 526 -- .../deps/icu-small/source/common/unicode/enumset.h | 69 - .../icu-small/source/common/unicode/errorcode.h | 139 - .../icu-small/source/common/unicode/filteredbrk.h | 145 - .../icu-small/source/common/unicode/icudataver.h | 43 - .../deps/icu-small/source/common/unicode/icuplug.h | 387 -- .../deps/icu-small/source/common/unicode/idna.h | 325 -- .../icu-small/source/common/unicode/localpointer.h | 524 -- .../icu-small/source/common/unicode/locdspnm.h | 207 - .../deps/icu-small/source/common/unicode/locid.h | 1181 ---- .../source/common/unicode/messagepattern.h | 945 ---- .../icu-small/source/common/unicode/normalizer2.h | 774 --- .../deps/icu-small/source/common/unicode/normlzr.h | 809 --- .../icu-small/source/common/unicode/parseerr.h | 94 - .../icu-small/source/common/unicode/parsepos.h | 232 - .../icu-small/source/common/unicode/platform.h | 855 --- .../deps/icu-small/source/common/unicode/ptypes.h | 130 - .../deps/icu-small/source/common/unicode/putil.h | 183 - .../deps/icu-small/source/common/unicode/rbbi.h | 699 --- .../deps/icu-small/source/common/unicode/rep.h | 263 - .../deps/icu-small/source/common/unicode/resbund.h | 492 -- .../icu-small/source/common/unicode/schriter.h | 189 - .../source/common/unicode/simpleformatter.h | 303 -- .../icu-small/source/common/unicode/std_string.h | 37 - .../deps/icu-small/source/common/unicode/strenum.h | 278 - .../source/common/unicode/stringoptions.h | 190 - .../icu-small/source/common/unicode/stringpiece.h | 224 - .../source/common/unicode/stringtriebuilder.h | 421 -- .../icu-small/source/common/unicode/symtable.h | 114 - .../deps/icu-small/source/common/unicode/ubidi.h | 2207 -------- .../source/common/unicode/ubiditransform.h | 323 -- .../deps/icu-small/source/common/unicode/ubrk.h | 628 --- .../icu-small/source/common/unicode/ucasemap.h | 385 -- .../deps/icu-small/source/common/unicode/ucat.h | 160 - .../deps/icu-small/source/common/unicode/uchar.h | 4023 -------------- .../icu-small/source/common/unicode/ucharstrie.h | 578 -- .../source/common/unicode/ucharstriebuilder.h | 187 - .../icu-small/source/common/unicode/uchriter.h | 387 -- .../deps/icu-small/source/common/unicode/uclean.h | 262 - .../deps/icu-small/source/common/unicode/ucnv.h | 2042 ------- .../deps/icu-small/source/common/unicode/ucnv_cb.h | 164 - .../icu-small/source/common/unicode/ucnv_err.h | 465 -- .../deps/icu-small/source/common/unicode/ucnvsel.h | 189 - .../deps/icu-small/source/common/unicode/uconfig.h | 444 -- .../deps/icu-small/source/common/unicode/ucpmap.h | 162 - .../deps/icu-small/source/common/unicode/ucptrie.h | 646 --- .../deps/icu-small/source/common/unicode/ucurr.h | 434 -- .../deps/icu-small/source/common/unicode/udata.h | 437 -- .../source/common/unicode/udisplaycontext.h | 172 - .../deps/icu-small/source/common/unicode/uenum.h | 208 - .../deps/icu-small/source/common/unicode/uidna.h | 772 --- .../deps/icu-small/source/common/unicode/uiter.h | 709 --- .../icu-small/source/common/unicode/uldnames.h | 304 -- .../deps/icu-small/source/common/unicode/uloc.h | 1272 ----- .../icu-small/source/common/unicode/umachine.h | 413 -- .../deps/icu-small/source/common/unicode/umisc.h | 62 - .../source/common/unicode/umutablecptrie.h | 241 - .../deps/icu-small/source/common/unicode/unifilt.h | 122 - .../icu-small/source/common/unicode/unifunct.h | 127 - .../icu-small/source/common/unicode/unimatch.h | 165 - .../deps/icu-small/source/common/unicode/uniset.h | 1722 ------ .../deps/icu-small/source/common/unicode/unistr.h | 4774 ---------------- .../deps/icu-small/source/common/unicode/unorm.h | 472 -- .../deps/icu-small/source/common/unicode/unorm2.h | 603 --- .../deps/icu-small/source/common/unicode/uobject.h | 319 -- .../deps/icu-small/source/common/unicode/urename.h | 1864 ------- .../deps/icu-small/source/common/unicode/urep.h | 157 - .../deps/icu-small/source/common/unicode/ures.h | 908 ---- .../deps/icu-small/source/common/unicode/uscript.h | 690 --- .../deps/icu-small/source/common/unicode/uset.h | 1134 ---- .../icu-small/source/common/unicode/usetiter.h | 320 -- .../deps/icu-small/source/common/unicode/ushape.h | 476 -- .../deps/icu-small/source/common/unicode/usprep.h | 271 - .../deps/icu-small/source/common/unicode/ustring.h | 1689 ------ .../icu-small/source/common/unicode/ustringtrie.h | 97 - .../deps/icu-small/source/common/unicode/utext.h | 1602 ------ .../deps/icu-small/source/common/unicode/utf.h | 225 - .../deps/icu-small/source/common/unicode/utf16.h | 733 --- .../deps/icu-small/source/common/unicode/utf32.h | 25 - .../deps/icu-small/source/common/unicode/utf8.h | 882 --- .../deps/icu-small/source/common/unicode/utf_old.h | 1204 ---- .../deps/icu-small/source/common/unicode/utrace.h | 379 -- .../deps/icu-small/source/common/unicode/utypes.h | 712 --- .../deps/icu-small/source/common/unicode/uvernum.h | 180 - .../icu-small/source/common/unicode/uversion.h | 201 - .../deps/icu-small/source/common/unifiedcache.cpp | 519 -- .../deps/icu-small/source/common/unifiedcache.h | 556 -- deps/node/deps/icu-small/source/common/unifilt.cpp | 71 - .../node/deps/icu-small/source/common/unifunct.cpp | 28 - deps/node/deps/icu-small/source/common/uniset.cpp | 2374 -------- .../icu-small/source/common/uniset_closure.cpp | 280 - .../deps/icu-small/source/common/uniset_props.cpp | 1214 ----- .../deps/icu-small/source/common/unisetspan.cpp | 1509 ------ .../node/deps/icu-small/source/common/unisetspan.h | 157 - deps/node/deps/icu-small/source/common/unistr.cpp | 1979 ------- .../deps/icu-small/source/common/unistr_case.cpp | 250 - .../icu-small/source/common/unistr_case_locale.cpp | 56 - .../deps/icu-small/source/common/unistr_cnv.cpp | 417 -- .../deps/icu-small/source/common/unistr_props.cpp | 77 - .../source/common/unistr_titlecase_brkiter.cpp | 57 - .../deps/icu-small/source/common/unistrappender.h | 90 - deps/node/deps/icu-small/source/common/unorm.cpp | 280 - .../node/deps/icu-small/source/common/unormcmp.cpp | 640 --- deps/node/deps/icu-small/source/common/unormimp.h | 488 -- deps/node/deps/icu-small/source/common/uobject.cpp | 105 - .../node/deps/icu-small/source/common/uposixdefs.h | 71 - deps/node/deps/icu-small/source/common/uprops.cpp | 665 --- deps/node/deps/icu-small/source/common/uprops.h | 477 -- .../node/deps/icu-small/source/common/ures_cnv.cpp | 78 - .../node/deps/icu-small/source/common/uresbund.cpp | 3025 ----------- .../node/deps/icu-small/source/common/uresdata.cpp | 1482 ----- deps/node/deps/icu-small/source/common/uresdata.h | 519 -- deps/node/deps/icu-small/source/common/uresimp.h | 278 - deps/node/deps/icu-small/source/common/ureslocs.h | 27 - .../node/deps/icu-small/source/common/usc_impl.cpp | 361 -- deps/node/deps/icu-small/source/common/usc_impl.h | 139 - deps/node/deps/icu-small/source/common/uscript.cpp | 144 - .../deps/icu-small/source/common/uscript_props.cpp | 294 - deps/node/deps/icu-small/source/common/uset.cpp | 641 --- deps/node/deps/icu-small/source/common/uset_imp.h | 61 - .../deps/icu-small/source/common/uset_props.cpp | 143 - .../node/deps/icu-small/source/common/usetiter.cpp | 152 - deps/node/deps/icu-small/source/common/ushape.cpp | 1728 ------ deps/node/deps/icu-small/source/common/usprep.cpp | 872 --- deps/node/deps/icu-small/source/common/ustack.cpp | 63 - .../node/deps/icu-small/source/common/ustr_cnv.cpp | 256 - deps/node/deps/icu-small/source/common/ustr_cnv.h | 51 - deps/node/deps/icu-small/source/common/ustr_imp.h | 143 - .../source/common/ustr_titlecase_brkiter.cpp | 245 - .../node/deps/icu-small/source/common/ustr_wcs.cpp | 535 -- .../node/deps/icu-small/source/common/ustrcase.cpp | 1818 ------- .../icu-small/source/common/ustrcase_locale.cpp | 94 - .../node/deps/icu-small/source/common/ustrenum.cpp | 398 -- deps/node/deps/icu-small/source/common/ustrenum.h | 86 - deps/node/deps/icu-small/source/common/ustrfmt.cpp | 59 - deps/node/deps/icu-small/source/common/ustrfmt.h | 19 - deps/node/deps/icu-small/source/common/ustring.cpp | 1519 ------ .../node/deps/icu-small/source/common/ustrtrns.cpp | 1451 ----- deps/node/deps/icu-small/source/common/utext.cpp | 2875 ---------- .../node/deps/icu-small/source/common/utf_impl.cpp | 329 -- deps/node/deps/icu-small/source/common/util.cpp | 411 -- deps/node/deps/icu-small/source/common/util.h | 248 - .../deps/icu-small/source/common/util_props.cpp | 216 - deps/node/deps/icu-small/source/common/utrace.cpp | 492 -- deps/node/deps/icu-small/source/common/utracimp.h | 372 -- deps/node/deps/icu-small/source/common/utrie.cpp | 1234 ----- deps/node/deps/icu-small/source/common/utrie.h | 792 --- deps/node/deps/icu-small/source/common/utrie2.cpp | 663 --- deps/node/deps/icu-small/source/common/utrie2.h | 955 ---- .../icu-small/source/common/utrie2_builder.cpp | 1481 ----- .../deps/icu-small/source/common/utrie2_impl.h | 175 - .../deps/icu-small/source/common/utrie_swap.cpp | 344 -- deps/node/deps/icu-small/source/common/uts46.cpp | 1484 ----- deps/node/deps/icu-small/source/common/utypeinfo.h | 32 - deps/node/deps/icu-small/source/common/utypes.cpp | 226 - deps/node/deps/icu-small/source/common/uvector.cpp | 566 -- deps/node/deps/icu-small/source/common/uvector.h | 415 -- .../node/deps/icu-small/source/common/uvectr32.cpp | 334 -- deps/node/deps/icu-small/source/common/uvectr32.h | 306 -- .../node/deps/icu-small/source/common/uvectr64.cpp | 213 - deps/node/deps/icu-small/source/common/uvectr64.h | 279 - deps/node/deps/icu-small/source/common/wintz.cpp | 126 - deps/node/deps/icu-small/source/common/wintz.h | 36 - 394 files changed, 227695 deletions(-) delete mode 100644 deps/node/deps/icu-small/source/common/appendable.cpp delete mode 100644 deps/node/deps/icu-small/source/common/bmpset.cpp delete mode 100644 deps/node/deps/icu-small/source/common/bmpset.h delete mode 100644 deps/node/deps/icu-small/source/common/brkeng.cpp delete mode 100644 deps/node/deps/icu-small/source/common/brkeng.h delete mode 100644 deps/node/deps/icu-small/source/common/brkiter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/bytesinkutil.cpp delete mode 100644 deps/node/deps/icu-small/source/common/bytesinkutil.h delete mode 100644 deps/node/deps/icu-small/source/common/bytestream.cpp delete mode 100644 deps/node/deps/icu-small/source/common/bytestrie.cpp delete mode 100644 deps/node/deps/icu-small/source/common/bytestriebuilder.cpp delete mode 100644 deps/node/deps/icu-small/source/common/bytestrieiterator.cpp delete mode 100644 deps/node/deps/icu-small/source/common/caniter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/characterproperties.cpp delete mode 100644 deps/node/deps/icu-small/source/common/chariter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/charstr.cpp delete mode 100644 deps/node/deps/icu-small/source/common/charstr.h delete mode 100644 deps/node/deps/icu-small/source/common/cmemory.cpp delete mode 100644 deps/node/deps/icu-small/source/common/cmemory.h delete mode 100644 deps/node/deps/icu-small/source/common/common.rc delete mode 100644 deps/node/deps/icu-small/source/common/cpputils.h delete mode 100644 deps/node/deps/icu-small/source/common/cstr.cpp delete mode 100644 deps/node/deps/icu-small/source/common/cstr.h delete mode 100644 deps/node/deps/icu-small/source/common/cstring.cpp delete mode 100644 deps/node/deps/icu-small/source/common/cstring.h delete mode 100644 deps/node/deps/icu-small/source/common/cwchar.cpp delete mode 100644 deps/node/deps/icu-small/source/common/cwchar.h delete mode 100644 deps/node/deps/icu-small/source/common/dictbe.cpp delete mode 100644 deps/node/deps/icu-small/source/common/dictbe.h delete mode 100644 deps/node/deps/icu-small/source/common/dictionarydata.cpp delete mode 100644 deps/node/deps/icu-small/source/common/dictionarydata.h delete mode 100644 deps/node/deps/icu-small/source/common/dtintrv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/edits.cpp delete mode 100644 deps/node/deps/icu-small/source/common/errorcode.cpp delete mode 100644 deps/node/deps/icu-small/source/common/filteredbrk.cpp delete mode 100644 deps/node/deps/icu-small/source/common/filterednormalizer2.cpp delete mode 100644 deps/node/deps/icu-small/source/common/hash.h delete mode 100644 deps/node/deps/icu-small/source/common/icudataver.cpp delete mode 100644 deps/node/deps/icu-small/source/common/icuplug.cpp delete mode 100644 deps/node/deps/icu-small/source/common/icuplugimp.h delete mode 100644 deps/node/deps/icu-small/source/common/loadednormalizer2impl.cpp delete mode 100644 deps/node/deps/icu-small/source/common/localsvc.h delete mode 100644 deps/node/deps/icu-small/source/common/locavailable.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locbased.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locbased.h delete mode 100644 deps/node/deps/icu-small/source/common/locdispnames.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locdspnm.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locid.cpp delete mode 100644 deps/node/deps/icu-small/source/common/loclikely.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locmap.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locmap.h delete mode 100644 deps/node/deps/icu-small/source/common/locresdata.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locutil.cpp delete mode 100644 deps/node/deps/icu-small/source/common/locutil.h delete mode 100644 deps/node/deps/icu-small/source/common/messageimpl.h delete mode 100644 deps/node/deps/icu-small/source/common/messagepattern.cpp delete mode 100644 deps/node/deps/icu-small/source/common/msvcres.h delete mode 100644 deps/node/deps/icu-small/source/common/mutex.h delete mode 100644 deps/node/deps/icu-small/source/common/norm2_nfc_data.h delete mode 100644 deps/node/deps/icu-small/source/common/norm2allmodes.h delete mode 100644 deps/node/deps/icu-small/source/common/normalizer2.cpp delete mode 100644 deps/node/deps/icu-small/source/common/normalizer2impl.cpp delete mode 100644 deps/node/deps/icu-small/source/common/normalizer2impl.h delete mode 100644 deps/node/deps/icu-small/source/common/normlzr.cpp delete mode 100644 deps/node/deps/icu-small/source/common/parsepos.cpp delete mode 100644 deps/node/deps/icu-small/source/common/patternprops.cpp delete mode 100644 deps/node/deps/icu-small/source/common/patternprops.h delete mode 100644 deps/node/deps/icu-small/source/common/pluralmap.cpp delete mode 100644 deps/node/deps/icu-small/source/common/pluralmap.h delete mode 100644 deps/node/deps/icu-small/source/common/propname.cpp delete mode 100644 deps/node/deps/icu-small/source/common/propname.h delete mode 100644 deps/node/deps/icu-small/source/common/propname_data.h delete mode 100644 deps/node/deps/icu-small/source/common/propsvec.cpp delete mode 100644 deps/node/deps/icu-small/source/common/propsvec.h delete mode 100644 deps/node/deps/icu-small/source/common/punycode.cpp delete mode 100644 deps/node/deps/icu-small/source/common/punycode.h delete mode 100644 deps/node/deps/icu-small/source/common/putil.cpp delete mode 100644 deps/node/deps/icu-small/source/common/putilimp.h delete mode 100644 deps/node/deps/icu-small/source/common/rbbi.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbi_cache.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbi_cache.h delete mode 100755 deps/node/deps/icu-small/source/common/rbbicst.pl delete mode 100644 deps/node/deps/icu-small/source/common/rbbidata.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbidata.h delete mode 100644 deps/node/deps/icu-small/source/common/rbbinode.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbinode.h delete mode 100644 deps/node/deps/icu-small/source/common/rbbirb.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbirb.h delete mode 100644 deps/node/deps/icu-small/source/common/rbbirpt.h delete mode 100644 deps/node/deps/icu-small/source/common/rbbiscan.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbiscan.h delete mode 100644 deps/node/deps/icu-small/source/common/rbbisetb.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbisetb.h delete mode 100644 deps/node/deps/icu-small/source/common/rbbistbl.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbitblb.cpp delete mode 100644 deps/node/deps/icu-small/source/common/rbbitblb.h delete mode 100644 deps/node/deps/icu-small/source/common/resbund.cpp delete mode 100644 deps/node/deps/icu-small/source/common/resbund_cnv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/resource.cpp delete mode 100644 deps/node/deps/icu-small/source/common/resource.h delete mode 100644 deps/node/deps/icu-small/source/common/ruleiter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ruleiter.h delete mode 100644 deps/node/deps/icu-small/source/common/schriter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/serv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/serv.h delete mode 100644 deps/node/deps/icu-small/source/common/servlk.cpp delete mode 100644 deps/node/deps/icu-small/source/common/servlkf.cpp delete mode 100644 deps/node/deps/icu-small/source/common/servloc.h delete mode 100644 deps/node/deps/icu-small/source/common/servls.cpp delete mode 100644 deps/node/deps/icu-small/source/common/servnotf.cpp delete mode 100644 deps/node/deps/icu-small/source/common/servnotf.h delete mode 100644 deps/node/deps/icu-small/source/common/servrbf.cpp delete mode 100644 deps/node/deps/icu-small/source/common/servslkf.cpp delete mode 100644 deps/node/deps/icu-small/source/common/sharedobject.cpp delete mode 100644 deps/node/deps/icu-small/source/common/sharedobject.h delete mode 100644 deps/node/deps/icu-small/source/common/simpleformatter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/sprpimpl.h delete mode 100644 deps/node/deps/icu-small/source/common/static_unicode_sets.cpp delete mode 100644 deps/node/deps/icu-small/source/common/static_unicode_sets.h delete mode 100644 deps/node/deps/icu-small/source/common/stringpiece.cpp delete mode 100644 deps/node/deps/icu-small/source/common/stringtriebuilder.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uarrsort.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uarrsort.h delete mode 100644 deps/node/deps/icu-small/source/common/uassert.h delete mode 100644 deps/node/deps/icu-small/source/common/ubidi.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ubidi_props.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ubidi_props.h delete mode 100644 deps/node/deps/icu-small/source/common/ubidi_props_data.h delete mode 100644 deps/node/deps/icu-small/source/common/ubidiimp.h delete mode 100644 deps/node/deps/icu-small/source/common/ubidiln.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ubiditransform.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ubidiwrt.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ubrk.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ubrkimpl.h delete mode 100644 deps/node/deps/icu-small/source/common/ucase.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucase.h delete mode 100644 deps/node/deps/icu-small/source/common/ucase_props_data.h delete mode 100644 deps/node/deps/icu-small/source/common/ucasemap.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucasemap_imp.h delete mode 100644 deps/node/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucat.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uchar.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uchar_props_data.h delete mode 100644 deps/node/deps/icu-small/source/common/ucharstrie.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucharstriebuilder.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucharstrieiterator.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uchriter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucln.h delete mode 100644 deps/node/deps/icu-small/source/common/ucln_cmn.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucln_cmn.h delete mode 100644 deps/node/deps/icu-small/source/common/ucln_imp.h delete mode 100644 deps/node/deps/icu-small/source/common/ucmndata.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucmndata.h delete mode 100644 deps/node/deps/icu-small/source/common/ucnv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv2022.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_bld.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_bld.h delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_cb.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_cnv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_cnv.h delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_ct.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_err.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_ext.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_ext.h delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_imp.h delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_io.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_io.h delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_lmb.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_set.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_u16.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_u32.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_u7.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnv_u8.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvbocu.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvdisp.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvhz.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvisci.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvlat1.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvmbcs.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvmbcs.h delete mode 100644 deps/node/deps/icu-small/source/common/ucnvscsu.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucnvsel.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucol_data.h delete mode 100644 deps/node/deps/icu-small/source/common/ucol_swp.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucol_swp.h delete mode 100644 deps/node/deps/icu-small/source/common/ucptrie.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucptrie_impl.h delete mode 100644 deps/node/deps/icu-small/source/common/ucurr.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ucurrimp.h delete mode 100644 deps/node/deps/icu-small/source/common/udata.cpp delete mode 100644 deps/node/deps/icu-small/source/common/udatamem.cpp delete mode 100644 deps/node/deps/icu-small/source/common/udatamem.h delete mode 100644 deps/node/deps/icu-small/source/common/udataswp.cpp delete mode 100644 deps/node/deps/icu-small/source/common/udataswp.h delete mode 100644 deps/node/deps/icu-small/source/common/uelement.h delete mode 100644 deps/node/deps/icu-small/source/common/uenum.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uenumimp.h delete mode 100644 deps/node/deps/icu-small/source/common/uhash.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uhash.h delete mode 100644 deps/node/deps/icu-small/source/common/uhash_us.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uidna.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uinit.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uinvchar.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uinvchar.h delete mode 100644 deps/node/deps/icu-small/source/common/uiter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ulayout_props_data.h delete mode 100644 deps/node/deps/icu-small/source/common/ulist.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ulist.h delete mode 100644 deps/node/deps/icu-small/source/common/uloc.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uloc_keytype.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uloc_tag.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ulocimp.h delete mode 100644 deps/node/deps/icu-small/source/common/umapfile.cpp delete mode 100644 deps/node/deps/icu-small/source/common/umapfile.h delete mode 100644 deps/node/deps/icu-small/source/common/umath.cpp delete mode 100644 deps/node/deps/icu-small/source/common/umutablecptrie.cpp delete mode 100644 deps/node/deps/icu-small/source/common/umutex.cpp delete mode 100644 deps/node/deps/icu-small/source/common/umutex.h delete mode 100644 deps/node/deps/icu-small/source/common/unames.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unicode/appendable.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/brkiter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/bytestream.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/bytestrie.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/bytestriebuilder.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/caniter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/casemap.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/char16ptr.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/chariter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/dbbi.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/docmain.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/dtintrv.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/edits.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/enumset.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/errorcode.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/filteredbrk.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/icudataver.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/icuplug.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/idna.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/localpointer.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/locdspnm.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/locid.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/messagepattern.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/normalizer2.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/normlzr.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/parseerr.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/parsepos.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/platform.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ptypes.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/putil.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/rbbi.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/rep.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/resbund.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/schriter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/simpleformatter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/std_string.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/strenum.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/stringoptions.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/stringpiece.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/stringtriebuilder.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/symtable.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ubidi.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ubiditransform.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ubrk.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucasemap.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucat.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uchar.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucharstrie.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucharstriebuilder.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uchriter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uclean.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucnv.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucnv_cb.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucnv_err.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucnvsel.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uconfig.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucpmap.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucptrie.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ucurr.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/udata.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/udisplaycontext.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uenum.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uidna.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uiter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uldnames.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uloc.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/umachine.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/umisc.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/umutablecptrie.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/unifilt.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/unifunct.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/unimatch.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uniset.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/unistr.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/unorm.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/unorm2.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uobject.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/urename.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/urep.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ures.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uscript.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uset.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/usetiter.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ushape.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/usprep.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ustring.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/ustringtrie.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utext.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utf.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utf16.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utf32.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utf8.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utf_old.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utrace.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/utypes.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uvernum.h delete mode 100644 deps/node/deps/icu-small/source/common/unicode/uversion.h delete mode 100644 deps/node/deps/icu-small/source/common/unifiedcache.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unifiedcache.h delete mode 100644 deps/node/deps/icu-small/source/common/unifilt.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unifunct.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uniset.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uniset_closure.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uniset_props.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unisetspan.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unisetspan.h delete mode 100644 deps/node/deps/icu-small/source/common/unistr.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unistr_case.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unistr_case_locale.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unistr_cnv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unistr_props.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unistrappender.h delete mode 100644 deps/node/deps/icu-small/source/common/unorm.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unormcmp.cpp delete mode 100644 deps/node/deps/icu-small/source/common/unormimp.h delete mode 100644 deps/node/deps/icu-small/source/common/uobject.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uposixdefs.h delete mode 100644 deps/node/deps/icu-small/source/common/uprops.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uprops.h delete mode 100644 deps/node/deps/icu-small/source/common/ures_cnv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uresbund.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uresdata.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uresdata.h delete mode 100644 deps/node/deps/icu-small/source/common/uresimp.h delete mode 100644 deps/node/deps/icu-small/source/common/ureslocs.h delete mode 100644 deps/node/deps/icu-small/source/common/usc_impl.cpp delete mode 100644 deps/node/deps/icu-small/source/common/usc_impl.h delete mode 100644 deps/node/deps/icu-small/source/common/uscript.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uscript_props.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uset.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uset_imp.h delete mode 100644 deps/node/deps/icu-small/source/common/uset_props.cpp delete mode 100644 deps/node/deps/icu-small/source/common/usetiter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ushape.cpp delete mode 100644 deps/node/deps/icu-small/source/common/usprep.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustack.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustr_cnv.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustr_cnv.h delete mode 100644 deps/node/deps/icu-small/source/common/ustr_imp.h delete mode 100644 deps/node/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustr_wcs.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustrcase.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustrcase_locale.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustrenum.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustrenum.h delete mode 100644 deps/node/deps/icu-small/source/common/ustrfmt.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustrfmt.h delete mode 100644 deps/node/deps/icu-small/source/common/ustring.cpp delete mode 100644 deps/node/deps/icu-small/source/common/ustrtrns.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utext.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utf_impl.cpp delete mode 100644 deps/node/deps/icu-small/source/common/util.cpp delete mode 100644 deps/node/deps/icu-small/source/common/util.h delete mode 100644 deps/node/deps/icu-small/source/common/util_props.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utrace.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utracimp.h delete mode 100644 deps/node/deps/icu-small/source/common/utrie.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utrie.h delete mode 100644 deps/node/deps/icu-small/source/common/utrie2.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utrie2.h delete mode 100644 deps/node/deps/icu-small/source/common/utrie2_builder.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utrie2_impl.h delete mode 100644 deps/node/deps/icu-small/source/common/utrie_swap.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uts46.cpp delete mode 100644 deps/node/deps/icu-small/source/common/utypeinfo.h delete mode 100644 deps/node/deps/icu-small/source/common/utypes.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uvector.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uvector.h delete mode 100644 deps/node/deps/icu-small/source/common/uvectr32.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uvectr32.h delete mode 100644 deps/node/deps/icu-small/source/common/uvectr64.cpp delete mode 100644 deps/node/deps/icu-small/source/common/uvectr64.h delete mode 100644 deps/node/deps/icu-small/source/common/wintz.cpp delete mode 100644 deps/node/deps/icu-small/source/common/wintz.h (limited to 'deps/node/deps/icu-small/source/common') diff --git a/deps/node/deps/icu-small/source/common/appendable.cpp b/deps/node/deps/icu-small/source/common/appendable.cpp deleted file mode 100644 index fca3c1e4..00000000 --- a/deps/node/deps/icu-small/source/common/appendable.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: appendable.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010dec07 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/appendable.h" -#include "unicode/utf16.h" - -U_NAMESPACE_BEGIN - -Appendable::~Appendable() {} - -UBool -Appendable::appendCodePoint(UChar32 c) { - if(c<=0xffff) { - return appendCodeUnit((UChar)c); - } else { - return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c)); - } -} - -UBool -Appendable::appendString(const UChar *s, int32_t length) { - if(length<0) { - UChar c; - while((c=*s++)!=0) { - if(!appendCodeUnit(c)) { - return FALSE; - } - } - } else if(length>0) { - const UChar *limit=s+length; - do { - if(!appendCodeUnit(*s++)) { - return FALSE; - } - } while(s>6; // Named for UTF-8 2-byte lead byte with upper 5 bits. - int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits. - - // Set one bit indicating an all-one block. - uint32_t bits=(uint32_t)1<>6; - int32_t limitTrail=limit&0x3f; - - if(lead==limitLead) { - // Partial vertical bit column. - while(trail0) { - do { - table[trail++]|=bits; - } while(trail<64); - ++lead; - } - if(lead=0x100) { - break; - } - do { - latin1Contains[start++]=1; - } while(start0x80) { - if(start<0x80) { - start=0x80; - } - break; - } - } - - // Set table7FF[]. - while(start<0x800) { - set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800); - if(limit>0x800) { - start=0x800; - break; - } - - start=list[listIndex++]; - if(listIndex0x10000) { - limit=0x10000; - } - - if(start>=6; - bmpBlockBits[start&0x3f]|=0x10001<<(start>>6); - start=(start+1)<<6; // Round up to the next block boundary. - minStart=start; // Ignore further ranges in this block. - } - if(start>6, limit>>6); - } - - if(limit&0x3f) { - // Mixed-value block of 64 code points. - limit>>=6; - bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6); - limit=(limit+1)<<6; // Round up to the next block boundary. - minStart=limit; // Ignore further ranges in this block. - } - } - } - - if(limit==0x10000) { - break; - } - - start=list[listIndex++]; - if(listIndex(~(0x10001<<0xd)); // Lead byte 0xED. - bits=1<<0xd; - for(i=32; i<64; ++i) { // Second half of 4k block. - bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits; - } - } else { - mask= static_cast(~(0x10001<<0xd)); // Lead byte 0xED. - for(i=32; i<64; ++i) { // Second half of 4k block. - bmpBlockBits[i]&=mask; - } - } -} - -int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const { - /* Examples: - findCodePoint(c) - set list[] c=0 1 3 4 7 8 - === ============== =========== - [] [110000] 0 0 0 0 0 0 - [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 - [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 - [:Any:] [0, 110000] 1 1 1 1 1 1 - */ - - // Return the smallest i such that c < list[i]. Assume - // list[len - 1] == HIGH and that c is legal (0..HIGH-1). - if (c < list[lo]) - return lo; - // High runner test. c is often after the last range, so an - // initial check for this condition pays off. - if (lo >= hi || c >= list[hi-1]) - return hi; - // invariant: c >= list[lo] - // invariant: c < list[hi] - for (;;) { - int32_t i = (lo + hi) >> 1; - if (i == lo) { - break; // Found! - } else if (c < list[i]) { - hi = i; - } else { - lo = i; - } - } - return hi; -} - -UBool -BMPSet::contains(UChar32 c) const { - if((uint32_t)c<=0xff) { - return (UBool)latin1Contains[c]; - } else if((uint32_t)c<=0x7ff) { - return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0); - } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) { - int lead=c>>12; - uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; - if(twoBits<=1) { - // All 64 code points with the same bits 15..6 - // are either in the set or not. - return (UBool)twoBits; - } else { - // Look up the code point in its 4k block of code points. - return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]); - } - } else if((uint32_t)c<=0x10ffff) { - // surrogate or supplementary code point - return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]); - } else { - // Out-of-range code points get FALSE, consistent with long-standing - // behavior of UnicodeSet::contains(c). - return FALSE; - } -} - -/* - * Check for sufficient length for trail unit for each surrogate pair. - * Handle single surrogates as surrogate code points as usual in ICU. - */ -const UChar * -BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const { - UChar c, c2; - - if(spanCondition) { - // span - do { - c=*s; - if(c<=0xff) { - if(!latin1Contains[c]) { - break; - } - } else if(c<=0x7ff) { - if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) { - break; - } - } else if(c<0xd800 || c>=0xe000) { - int lead=c>>12; - uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; - if(twoBits<=1) { - // All 64 code points with the same bits 15..6 - // are either in the set or not. - if(twoBits==0) { - break; - } - } else { - // Look up the code point in its 4k block of code points. - if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { - break; - } - } - } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) { - // surrogate code point - if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { - break; - } - } else { - // surrogate pair - if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) { - break; - } - ++s; - } - } while(++s>6)))!=0) { - break; - } - } else if(c<0xd800 || c>=0xe000) { - int lead=c>>12; - uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; - if(twoBits<=1) { - // All 64 code points with the same bits 15..6 - // are either in the set or not. - if(twoBits!=0) { - break; - } - } else { - // Look up the code point in its 4k block of code points. - if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { - break; - } - } - } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) { - // surrogate code point - if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { - break; - } - } else { - // surrogate pair - if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) { - break; - } - ++s; - } - } while(++s>6)))==0) { - break; - } - } else if(c<0xd800 || c>=0xe000) { - int lead=c>>12; - uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; - if(twoBits<=1) { - // All 64 code points with the same bits 15..6 - // are either in the set or not. - if(twoBits==0) { - break; - } - } else { - // Look up the code point in its 4k block of code points. - if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { - break; - } - } - } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) { - // surrogate code point - if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { - break; - } - } else { - // surrogate pair - if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) { - break; - } - --limit; - } - if(s==limit) { - return s; - } - } - } else { - // span not - for(;;) { - c=*(--limit); - if(c<=0xff) { - if(latin1Contains[c]) { - break; - } - } else if(c<=0x7ff) { - if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) { - break; - } - } else if(c<0xd800 || c>=0xe000) { - int lead=c>>12; - uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; - if(twoBits<=1) { - // All 64 code points with the same bits 15..6 - // are either in the set or not. - if(twoBits!=0) { - break; - } - } else { - // Look up the code point in its 4k block of code points. - if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { - break; - } - } - } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) { - // surrogate code point - if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { - break; - } - } else { - // surrogate pair - if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) { - break; - } - --limit; - } - if(s==limit) { - return s; - } - } - } - return limit+1; -} - -/* - * Precheck for sufficient trail bytes at end of string only once per span. - * Check validity. - */ -const uint8_t * -BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { - const uint8_t *limit=s+length; - uint8_t b=*s; - if(U8_IS_SINGLE(b)) { - // Initial all-ASCII span. - if(spanCondition) { - do { - if(!latin1Contains[b] || ++s==limit) { - return s; - } - b=*s; - } while(U8_IS_SINGLE(b)); - } else { - do { - if(latin1Contains[b] || ++s==limit) { - return s; - } - b=*s; - } while(U8_IS_SINGLE(b)); - } - length=(int32_t)(limit-s); - } - - if(spanCondition!=USET_SPAN_NOT_CONTAINED) { - spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. - } - - const uint8_t *limit0=limit; - - /* - * Make sure that the last 1/2/3/4-byte sequence before limit is complete - * or runs into a lead byte. - * In the span loop compare s with limit only once - * per multi-byte character. - * - * Give a trailing illegal sequence the same value as the result of contains(FFFD), - * including it if that is part of the span, otherwise set limit0 to before - * the truncated sequence. - */ - b=*(limit-1); - if((int8_t)b<0) { - // b>=0x80: lead or trail byte - if(b<0xc0) { - // single trail byte, check for preceding 3- or 4-byte lead byte - if(length>=2 && (b=*(limit-2))>=0xe0) { - limit-=2; - if(containsFFFD!=spanCondition) { - limit0=limit; - } - } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) { - // 4-byte lead byte with only two trail bytes - limit-=3; - if(containsFFFD!=spanCondition) { - limit0=limit; - } - } - } else { - // lead byte with no trail bytes - --limit; - if(containsFFFD!=spanCondition) { - limit0=limit; - } - } - } - - uint8_t t1, t2, t3; - - while(s=0xe0) { - if(b<0xf0) { - if( /* handle U+0000..U+FFFF inline */ - (t1=(uint8_t)(s[0]-0x80)) <= 0x3f && - (t2=(uint8_t)(s[1]-0x80)) <= 0x3f - ) { - b&=0xf; - uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001; - if(twoBits<=1) { - // All 64 code points with this lead byte and middle trail byte - // are either in the set or not. - if(twoBits!=(uint32_t)spanCondition) { - return s-1; - } - } else { - // Look up the code point in its 4k block of code points. - UChar32 c=(b<<12)|(t1<<6)|t2; - if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) { - return s-1; - } - } - s+=2; - continue; - } - } else if( /* handle U+10000..U+10FFFF inline */ - (t1=(uint8_t)(s[0]-0x80)) <= 0x3f && - (t2=(uint8_t)(s[1]-0x80)) <= 0x3f && - (t3=(uint8_t)(s[2]-0x80)) <= 0x3f - ) { - // Give an illegal sequence the same value as the result of contains(FFFD). - UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3; - if( ( (0x10000<=c && c<=0x10ffff) ? - containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) : - containsFFFD - ) != spanCondition - ) { - return s-1; - } - s+=3; - continue; - } - } else { - if( /* handle U+0000..U+07FF inline */ - b>=0xc0 && - (t1=(uint8_t)(*s-0x80)) <= 0x3f - ) { - if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) { - return s-1; - } - ++s; - continue; - } - } - - // Give an illegal sequence the same value as the result of contains(FFFD). - // Handle each byte of an illegal sequence separately to simplify the code; - // no need to optimize error handling. - if(containsFFFD!=spanCondition) { - return s-1; - } - } - - return limit0; -} - -/* - * While going backwards through UTF-8 optimize only for ASCII. - * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not - * possible to tell from the last byte in a multi-byte sequence how many - * preceding bytes there should be. Therefore, going backwards through UTF-8 - * is much harder than going forward. - */ -int32_t -BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { - if(spanCondition!=USET_SPAN_NOT_CONTAINED) { - spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. - } - - uint8_t b; - - do { - b=s[--length]; - if(U8_IS_SINGLE(b)) { - // ASCII sub-span - if(spanCondition) { - do { - if(!latin1Contains[b]) { - return length+1; - } else if(length==0) { - return 0; - } - b=s[--length]; - } while(U8_IS_SINGLE(b)); - } else { - do { - if(latin1Contains[b]) { - return length+1; - } else if(length==0) { - return 0; - } - b=s[--length]; - } while(U8_IS_SINGLE(b)); - } - } - - int32_t prev=length; - UChar32 c; - // trail byte: collect a multi-byte character - // (or lead byte in last-trail position) - c=utf8_prevCharSafeBody(s, 0, &length, b, -3); - // c is a valid code point, not ASCII, not a surrogate - if(c<=0x7ff) { - if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) { - return prev+1; - } - } else if(c<=0xffff) { - int lead=c>>12; - uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; - if(twoBits<=1) { - // All 64 code points with the same bits 15..6 - // are either in the set or not. - if(twoBits!=(uint32_t)spanCondition) { - return prev+1; - } - } else { - // Look up the code point in its 4k block of code points. - if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) { - return prev+1; - } - } - } else { - if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) { - return prev+1; - } - } - } while(length>0); - return 0; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/bmpset.h b/deps/node/deps/icu-small/source/common/bmpset.h deleted file mode 100644 index 018aeb7f..00000000 --- a/deps/node/deps/icu-small/source/common/bmpset.h +++ /dev/null @@ -1,164 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2007, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: bmpset.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2007jan29 -* created by: Markus W. Scherer -*/ - -#ifndef __BMPSET_H__ -#define __BMPSET_H__ - -#include "unicode/utypes.h" -#include "unicode/uniset.h" - -U_NAMESPACE_BEGIN - -/* - * Helper class for frozen UnicodeSets, implements contains() and span() - * optimized for BMP code points. Structured to be UTF-8-friendly. - * - * Latin-1: Look up bytes. - * 2-byte characters: Bits organized vertically. - * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, - * with mixed for illegal ranges. - * Supplementary characters: Binary search over - * the supplementary part of the parent set's inversion list. - */ -class BMPSet : public UMemory { -public: - BMPSet(const int32_t *parentList, int32_t parentListLength); - BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength); - virtual ~BMPSet(); - - virtual UBool contains(UChar32 c) const; - - /* - * Span the initial substring for which each character c has spanCondition==contains(c). - * It must be s0 and spanCondition==0 or 1. - * @return The string pointer which limits the span. - */ - const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; - - /* - * Span the trailing substring for which each character c has spanCondition==contains(c). - * It must be length>0 and spanCondition==0 or 1. - * @return The start of the span. - */ - int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; - -private: - void initBits(); - void overrideIllegal(); - - /** - * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the - * binary search is restricted for finding code points in a certain range. - * - * For restricting the search for finding in the range start..end, - * pass in - * lo=findCodePoint(start) and - * hi=findCodePoint(end) - * with 0<=lo<=hicontains(c); -} - -int32_t -UnhandledEngine::findBreaks( UText *text, - int32_t /* startPos */, - int32_t endPos, - UVector32 &/*foundBreaks*/ ) const { - UChar32 c = utext_current32(text); - while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { - utext_next32(text); // TODO: recast loop to work with post-increment operations. - c = utext_current32(text); - } - return 0; -} - -void -UnhandledEngine::handleCharacter(UChar32 c) { - if (fHandled == nullptr) { - fHandled = new UnicodeSet(); - if (fHandled == nullptr) { - return; - } - } - if (!fHandled->contains(c)) { - UErrorCode status = U_ZERO_ERROR; - // Apply the entire script of the character. - int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); - fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status); - } -} - -/* - ****************************************************************** - */ - -ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { - fEngines = 0; -} - -ICULanguageBreakFactory::~ICULanguageBreakFactory() { - if (fEngines != 0) { - delete fEngines; - } -} - -U_NAMESPACE_END -U_CDECL_BEGIN -static void U_CALLCONV _deleteEngine(void *obj) { - delete (const icu::LanguageBreakEngine *) obj; -} -U_CDECL_END -U_NAMESPACE_BEGIN - -static UMutex gBreakEngineMutex = U_MUTEX_INITIALIZER; - -const LanguageBreakEngine * -ICULanguageBreakFactory::getEngineFor(UChar32 c) { - const LanguageBreakEngine *lbe = NULL; - UErrorCode status = U_ZERO_ERROR; - - Mutex m(&gBreakEngineMutex); - - if (fEngines == NULL) { - UStack *engines = new UStack(_deleteEngine, NULL, status); - if (U_FAILURE(status) || engines == NULL) { - // Note: no way to return error code to caller. - delete engines; - return NULL; - } - fEngines = engines; - } else { - int32_t i = fEngines->size(); - while (--i >= 0) { - lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); - if (lbe != NULL && lbe->handles(c)) { - return lbe; - } - } - } - - // We didn't find an engine. Create one. - lbe = loadEngineFor(c); - if (lbe != NULL) { - fEngines->push((void *)lbe, status); - } - return lbe; -} - -const LanguageBreakEngine * -ICULanguageBreakFactory::loadEngineFor(UChar32 c) { - UErrorCode status = U_ZERO_ERROR; - UScriptCode code = uscript_getScript(c, &status); - if (U_SUCCESS(status)) { - DictionaryMatcher *m = loadDictionaryMatcherFor(code); - if (m != NULL) { - const LanguageBreakEngine *engine = NULL; - switch(code) { - case USCRIPT_THAI: - engine = new ThaiBreakEngine(m, status); - break; - case USCRIPT_LAO: - engine = new LaoBreakEngine(m, status); - break; - case USCRIPT_MYANMAR: - engine = new BurmeseBreakEngine(m, status); - break; - case USCRIPT_KHMER: - engine = new KhmerBreakEngine(m, status); - break; - -#if !UCONFIG_NO_NORMALIZATION - // CJK not available w/o normalization - case USCRIPT_HANGUL: - engine = new CjkBreakEngine(m, kKorean, status); - break; - - // use same BreakEngine and dictionary for both Chinese and Japanese - case USCRIPT_HIRAGANA: - case USCRIPT_KATAKANA: - case USCRIPT_HAN: - engine = new CjkBreakEngine(m, kChineseJapanese, status); - break; -#if 0 - // TODO: Have to get some characters with script=common handled - // by CjkBreakEngine (e.g. U+309B). Simply subjecting - // them to CjkBreakEngine does not work. The engine has to - // special-case them. - case USCRIPT_COMMON: - { - UBlockCode block = ublock_getCode(code); - if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA) - engine = new CjkBreakEngine(dict, kChineseJapanese, status); - break; - } -#endif -#endif - - default: - break; - } - if (engine == NULL) { - delete m; - } - else if (U_FAILURE(status)) { - delete engine; - engine = NULL; - } - return engine; - } - } - return NULL; -} - -DictionaryMatcher * -ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { - UErrorCode status = U_ZERO_ERROR; - // open root from brkitr tree. - UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); - b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); - int32_t dictnlength = 0; - const UChar *dictfname = - ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status); - if (U_FAILURE(status)) { - ures_close(b); - return NULL; - } - CharString dictnbuf; - CharString ext; - const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot - if (extStart != NULL) { - int32_t len = (int32_t)(extStart - dictfname); - ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status); - dictnlength = len; - } - dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status); - ures_close(b); - - UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status); - if (U_SUCCESS(status)) { - // build trie - const uint8_t *data = (const uint8_t *)udata_getMemory(file); - const int32_t *indexes = (const int32_t *)data; - const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; - const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; - DictionaryMatcher *m = NULL; - if (trieType == DictionaryData::TRIE_TYPE_BYTES) { - const int32_t transform = indexes[DictionaryData::IX_TRANSFORM]; - const char *characters = (const char *)(data + offset); - m = new BytesDictionaryMatcher(characters, transform, file); - } - else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { - const UChar *characters = (const UChar *)(data + offset); - m = new UCharsDictionaryMatcher(characters, file); - } - if (m == NULL) { - // no matcher exists to take ownership - either we are an invalid - // type or memory allocation failed - udata_close(file); - } - return m; - } else if (dictfname != NULL) { - // we don't have a dictionary matcher. - // returning NULL here will cause us to fail to find a dictionary break engine, as expected - status = U_ZERO_ERROR; - return NULL; - } - return NULL; -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/brkeng.h b/deps/node/deps/icu-small/source/common/brkeng.h deleted file mode 100644 index e40fce13..00000000 --- a/deps/node/deps/icu-small/source/common/brkeng.h +++ /dev/null @@ -1,271 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ************************************************************************************ - * Copyright (C) 2006-2012, International Business Machines Corporation and others. * - * All Rights Reserved. * - ************************************************************************************ - */ - -#ifndef BRKENG_H -#define BRKENG_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/utext.h" -#include "unicode/uscript.h" - -U_NAMESPACE_BEGIN - -class UnicodeSet; -class UStack; -class UVector32; -class DictionaryMatcher; - -/******************************************************************* - * LanguageBreakEngine - */ - -/** - *

LanguageBreakEngines implement language-specific knowledge for - * finding text boundaries within a run of characters belonging to a - * specific set. The boundaries will be of a specific kind, e.g. word, - * line, etc.

- * - *

LanguageBreakEngines should normally be implemented so as to - * be shared between threads without locking.

- */ -class LanguageBreakEngine : public UMemory { - public: - - /** - *

Default constructor.

- * - */ - LanguageBreakEngine(); - - /** - *

Virtual destructor.

- */ - virtual ~LanguageBreakEngine(); - - /** - *

Indicate whether this engine handles a particular character for - * a particular kind of break.

- * - * @param c A character which begins a run that the engine might handle - * @return TRUE if this engine handles the particular character and break - * type. - */ - virtual UBool handles(UChar32 c) const = 0; - - /** - *

Find any breaks within a run in the supplied text.

- * - * @param text A UText representing the text. The - * iterator is left at the end of the run of characters which the engine - * is capable of handling. - * @param startPos The start of the run within the supplied text. - * @param endPos The end of the run within the supplied text. - * @param foundBreaks A Vector of int32_t to receive the breaks. - * @return The number of breaks found. - */ - virtual int32_t findBreaks( UText *text, - int32_t startPos, - int32_t endPos, - UVector32 &foundBreaks ) const = 0; - -}; - -/******************************************************************* - * LanguageBreakFactory - */ - -/** - *

LanguageBreakFactorys find and return a LanguageBreakEngine - * that can determine breaks for characters in a specific set, if - * such an object can be found.

- * - *

If a LanguageBreakFactory is to be shared between threads, - * appropriate synchronization must be used; there is none internal - * to the factory.

- * - *

A LanguageBreakEngine returned by a LanguageBreakFactory can - * normally be shared between threads without synchronization, unless - * the specific subclass of LanguageBreakFactory indicates otherwise.

- * - *

A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine - * it returns when it itself is deleted, unless the specific subclass of - * LanguageBreakFactory indicates otherwise. Naturally, the factory should - * not be deleted until the LanguageBreakEngines it has returned are no - * longer needed.

- */ -class LanguageBreakFactory : public UMemory { - public: - - /** - *

Default constructor.

- * - */ - LanguageBreakFactory(); - - /** - *

Virtual destructor.

- */ - virtual ~LanguageBreakFactory(); - - /** - *

Find and return a LanguageBreakEngine that can find the desired - * kind of break for the set of characters to which the supplied - * character belongs. It is up to the set of available engines to - * determine what the sets of characters are.

- * - * @param c A character that begins a run for which a LanguageBreakEngine is - * sought. - * @return A LanguageBreakEngine with the desired characteristics, or 0. - */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0; - -}; - -/******************************************************************* - * UnhandledEngine - */ - -/** - *

UnhandledEngine is a special subclass of LanguageBreakEngine that - * handles characters that no other LanguageBreakEngine is available to - * handle. It is told the character and the type of break; at its - * discretion it may handle more than the specified character (e.g., - * the entire script to which that character belongs.

- * - *

UnhandledEngines may not be shared between threads without - * external synchronization.

- */ - -class UnhandledEngine : public LanguageBreakEngine { - private: - - /** - * The sets of characters handled. - * @internal - */ - - UnicodeSet *fHandled; - - public: - - /** - *

Default constructor.

- * - */ - UnhandledEngine(UErrorCode &status); - - /** - *

Virtual destructor.

- */ - virtual ~UnhandledEngine(); - - /** - *

Indicate whether this engine handles a particular character for - * a particular kind of break.

- * - * @param c A character which begins a run that the engine might handle - * @return TRUE if this engine handles the particular character and break - * type. - */ - virtual UBool handles(UChar32 c) const; - - /** - *

Find any breaks within a run in the supplied text.

- * - * @param text A UText representing the text (TODO: UText). The - * iterator is left at the end of the run of characters which the engine - * is capable of handling. - * @param startPos The start of the run within the supplied text. - * @param endPos The end of the run within the supplied text. - * @param foundBreaks An allocated C array of the breaks found, if any - * @return The number of breaks found. - */ - virtual int32_t findBreaks( UText *text, - int32_t startPos, - int32_t endPos, - UVector32 &foundBreaks ) const; - - /** - *

Tell the engine to handle a particular character and break type.

- * - * @param c A character which the engine should handle - */ - virtual void handleCharacter(UChar32 c); - -}; - -/******************************************************************* - * ICULanguageBreakFactory - */ - -/** - *

ICULanguageBreakFactory is the default LanguageBreakFactory for - * ICU. It creates dictionary-based LanguageBreakEngines from dictionary - * data in the ICU data file.

- */ -class ICULanguageBreakFactory : public LanguageBreakFactory { - private: - - /** - * The stack of break engines created by this factory - * @internal - */ - - UStack *fEngines; - - public: - - /** - *

Standard constructor.

- * - */ - ICULanguageBreakFactory(UErrorCode &status); - - /** - *

Virtual destructor.

- */ - virtual ~ICULanguageBreakFactory(); - - /** - *

Find and return a LanguageBreakEngine that can find the desired - * kind of break for the set of characters to which the supplied - * character belongs. It is up to the set of available engines to - * determine what the sets of characters are.

- * - * @param c A character that begins a run for which a LanguageBreakEngine is - * sought. - * @return A LanguageBreakEngine with the desired characteristics, or 0. - */ - virtual const LanguageBreakEngine *getEngineFor(UChar32 c); - -protected: - /** - *

Create a LanguageBreakEngine for the set of characters to which - * the supplied character belongs, for the specified break type.

- * - * @param c A character that begins a run for which a LanguageBreakEngine is - * sought. - * @return A LanguageBreakEngine with the desired characteristics, or 0. - */ - virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); - - /** - *

Create a DictionaryMatcher for the specified script and break type.

- * @param script An ISO 15924 script code that identifies the dictionary to be - * created. - * @return A DictionaryMatcher with the desired characteristics, or NULL. - */ - virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); -}; - -U_NAMESPACE_END - - /* BRKENG_H */ -#endif diff --git a/deps/node/deps/icu-small/source/common/brkiter.cpp b/deps/node/deps/icu-small/source/common/brkiter.cpp deleted file mode 100644 index 23e0cc3c..00000000 --- a/deps/node/deps/icu-small/source/common/brkiter.cpp +++ /dev/null @@ -1,508 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 1997-2015, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -* -* File brkiter.cpp -* -* Modification History: -* -* Date Name Description -* 02/18/97 aliu Converted from OpenClass. Added DONE. -* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. -***************************************************************************************** -*/ - -// ***************************************************************************** -// This file was generated from the java source file BreakIterator.java -// ***************************************************************************** - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/rbbi.h" -#include "unicode/brkiter.h" -#include "unicode/udata.h" -#include "unicode/ures.h" -#include "unicode/ustring.h" -#include "unicode/filteredbrk.h" -#include "ucln_cmn.h" -#include "cstring.h" -#include "umutex.h" -#include "servloc.h" -#include "locbased.h" -#include "uresimp.h" -#include "uassert.h" -#include "ubrkimpl.h" -#include "charstr.h" - -// ***************************************************************************** -// class BreakIterator -// This class implements methods for finding the location of boundaries in text. -// Instances of BreakIterator maintain a current position and scan over text -// returning the index of characters where boundaries occur. -// ***************************************************************************** - -U_NAMESPACE_BEGIN - -// ------------------------------------- - -BreakIterator* -BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status) -{ - char fnbuff[256]; - char ext[4]={'\0'}; - CharString actualLocale; - int32_t size; - const UChar* brkfname = NULL; - UResourceBundle brkRulesStack; - UResourceBundle brkNameStack; - UResourceBundle *brkRules = &brkRulesStack; - UResourceBundle *brkName = &brkNameStack; - RuleBasedBreakIterator *result = NULL; - - if (U_FAILURE(status)) - return NULL; - - ures_initStackObject(brkRules); - ures_initStackObject(brkName); - - // Get the locale - UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); - - // Get the "boundaries" array. - if (U_SUCCESS(status)) { - brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); - // Get the string object naming the rules file - brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); - // Get the actual string - brkfname = ures_getString(brkName, &size, &status); - U_ASSERT((size_t)size=sizeof(fnbuff)) { - size=0; - if (U_SUCCESS(status)) { - status = U_BUFFER_OVERFLOW_ERROR; - } - } - - // Use the string if we found it - if (U_SUCCESS(status) && brkfname) { - actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); - - UChar* extStart=u_strchr(brkfname, 0x002e); - int len = 0; - if(extStart!=NULL){ - len = (int)(extStart-brkfname); - u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff - u_UCharsToChars(brkfname, fnbuff, len); - } - fnbuff[len]=0; // nul terminate - } - } - - ures_close(brkRules); - ures_close(brkName); - - UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); - if (U_FAILURE(status)) { - ures_close(b); - return NULL; - } - - // Create a RuleBasedBreakIterator - result = new RuleBasedBreakIterator(file, status); - - // If there is a result, set the valid locale and actual locale, and the kind - if (U_SUCCESS(status) && result != NULL) { - U_LOCALE_BASED(locBased, *(BreakIterator*)result); - locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), - actualLocale.data()); - } - - ures_close(b); - - if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple - delete result; - return NULL; - } - - if (result == NULL) { - udata_close(file); - if (U_SUCCESS(status)) { - status = U_MEMORY_ALLOCATION_ERROR; - } - } - - return result; -} - -// Creates a break iterator for word breaks. -BreakIterator* U_EXPORT2 -BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) -{ - return createInstance(key, UBRK_WORD, status); -} - -// ------------------------------------- - -// Creates a break iterator for line breaks. -BreakIterator* U_EXPORT2 -BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) -{ - return createInstance(key, UBRK_LINE, status); -} - -// ------------------------------------- - -// Creates a break iterator for character breaks. -BreakIterator* U_EXPORT2 -BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) -{ - return createInstance(key, UBRK_CHARACTER, status); -} - -// ------------------------------------- - -// Creates a break iterator for sentence breaks. -BreakIterator* U_EXPORT2 -BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) -{ - return createInstance(key, UBRK_SENTENCE, status); -} - -// ------------------------------------- - -// Creates a break iterator for title casing breaks. -BreakIterator* U_EXPORT2 -BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) -{ - return createInstance(key, UBRK_TITLE, status); -} - -// ------------------------------------- - -// Gets all the available locales that has localized text boundary data. -const Locale* U_EXPORT2 -BreakIterator::getAvailableLocales(int32_t& count) -{ - return Locale::getAvailableLocales(count); -} - -// ------------------------------------------ -// -// Constructors, destructor and assignment operator -// -//------------------------------------------- - -BreakIterator::BreakIterator() -{ - *validLocale = *actualLocale = 0; -} - -BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { - uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); - uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); -} - -BreakIterator &BreakIterator::operator =(const BreakIterator &other) { - if (this != &other) { - uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); - uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); - } - return *this; -} - -BreakIterator::~BreakIterator() -{ -} - -// ------------------------------------------ -// -// Registration -// -//------------------------------------------- -#if !UCONFIG_NO_SERVICE - -// ------------------------------------- - -class ICUBreakIteratorFactory : public ICUResourceBundleFactory { -public: - virtual ~ICUBreakIteratorFactory(); -protected: - virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { - return BreakIterator::makeInstance(loc, kind, status); - } -}; - -ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} - -// ------------------------------------- - -class ICUBreakIteratorService : public ICULocaleService { -public: - ICUBreakIteratorService() - : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) - { - UErrorCode status = U_ZERO_ERROR; - registerFactory(new ICUBreakIteratorFactory(), status); - } - - virtual ~ICUBreakIteratorService(); - - virtual UObject* cloneInstance(UObject* instance) const { - return ((BreakIterator*)instance)->clone(); - } - - virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { - LocaleKey& lkey = (LocaleKey&)key; - int32_t kind = lkey.kind(); - Locale loc; - lkey.currentLocale(loc); - return BreakIterator::makeInstance(loc, kind, status); - } - - virtual UBool isDefault() const { - return countFactories() == 1; - } -}; - -ICUBreakIteratorService::~ICUBreakIteratorService() {} - -// ------------------------------------- - -// defined in ucln_cmn.h -U_NAMESPACE_END - -static icu::UInitOnce gInitOnceBrkiter; -static icu::ICULocaleService* gService = NULL; - - - -/** - * Release all static memory held by breakiterator. - */ -U_CDECL_BEGIN -static UBool U_CALLCONV breakiterator_cleanup(void) { -#if !UCONFIG_NO_SERVICE - if (gService) { - delete gService; - gService = NULL; - } - gInitOnceBrkiter.reset(); -#endif - return TRUE; -} -U_CDECL_END -U_NAMESPACE_BEGIN - -static void U_CALLCONV -initService(void) { - gService = new ICUBreakIteratorService(); - ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); -} - -static ICULocaleService* -getService(void) -{ - umtx_initOnce(gInitOnceBrkiter, &initService); - return gService; -} - - -// ------------------------------------- - -static inline UBool -hasService(void) -{ - return !gInitOnceBrkiter.isReset() && getService() != NULL; -} - -// ------------------------------------- - -URegistryKey U_EXPORT2 -BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) -{ - ICULocaleService *service = getService(); - if (service == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - return service->registerInstance(toAdopt, locale, kind, status); -} - -// ------------------------------------- - -UBool U_EXPORT2 -BreakIterator::unregister(URegistryKey key, UErrorCode& status) -{ - if (U_SUCCESS(status)) { - if (hasService()) { - return gService->unregister(key, status); - } - status = U_MEMORY_ALLOCATION_ERROR; - } - return FALSE; -} - -// ------------------------------------- - -StringEnumeration* U_EXPORT2 -BreakIterator::getAvailableLocales(void) -{ - ICULocaleService *service = getService(); - if (service == NULL) { - return NULL; - } - return service->getAvailableLocales(); -} -#endif /* UCONFIG_NO_SERVICE */ - -// ------------------------------------- - -BreakIterator* -BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) -{ - if (U_FAILURE(status)) { - return NULL; - } - -#if !UCONFIG_NO_SERVICE - if (hasService()) { - Locale actualLoc(""); - BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); - // TODO: The way the service code works in ICU 2.8 is that if - // there is a real registered break iterator, the actualLoc - // will be populated, but if the handleDefault path is taken - // (because nothing is registered that can handle the - // requested locale) then the actualLoc comes back empty. In - // that case, the returned object already has its actual/valid - // locale data populated (by makeInstance, which is what - // handleDefault calls), so we don't touch it. YES, A COMMENT - // THIS LONG is a sign of bad code -- so the action item is to - // revisit this in ICU 3.0 and clean it up/fix it/remove it. - if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { - U_LOCALE_BASED(locBased, *result); - locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); - } - return result; - } - else -#endif - { - return makeInstance(loc, kind, status); - } -} - -// ------------------------------------- -enum { kKeyValueLenMax = 32 }; - -BreakIterator* -BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) -{ - - if (U_FAILURE(status)) { - return NULL; - } - char lbType[kKeyValueLenMax]; - - BreakIterator *result = NULL; - switch (kind) { - case UBRK_CHARACTER: - result = BreakIterator::buildInstance(loc, "grapheme", status); - break; - case UBRK_WORD: - result = BreakIterator::buildInstance(loc, "word", status); - break; - case UBRK_LINE: - uprv_strcpy(lbType, "line"); - { - char lbKeyValue[kKeyValueLenMax] = {0}; - UErrorCode kvStatus = U_ZERO_ERROR; - int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); - if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { - uprv_strcat(lbType, "_"); - uprv_strcat(lbType, lbKeyValue); - } - } - result = BreakIterator::buildInstance(loc, lbType, status); - break; - case UBRK_SENTENCE: - result = BreakIterator::buildInstance(loc, "sentence", status); -#if !UCONFIG_NO_FILTERED_BREAK_ITERATION - { - char ssKeyValue[kKeyValueLenMax] = {0}; - UErrorCode kvStatus = U_ZERO_ERROR; - int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); - if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) { - FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus); - if (U_SUCCESS(kvStatus)) { - result = fbiBuilder->build(result, status); - delete fbiBuilder; - } - } - } -#endif - break; - case UBRK_TITLE: - result = BreakIterator::buildInstance(loc, "title", status); - break; - default: - status = U_ILLEGAL_ARGUMENT_ERROR; - } - - if (U_FAILURE(status)) { - return NULL; - } - - return result; -} - -Locale -BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { - U_LOCALE_BASED(locBased, *this); - return locBased.getLocale(type, status); -} - -const char * -BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { - U_LOCALE_BASED(locBased, *this); - return locBased.getLocaleID(type, status); -} - - -// This implementation of getRuleStatus is a do-nothing stub, here to -// provide a default implementation for any derived BreakIterator classes that -// do not implement it themselves. -int32_t BreakIterator::getRuleStatus() const { - return 0; -} - -// This implementation of getRuleStatusVec is a do-nothing stub, here to -// provide a default implementation for any derived BreakIterator classes that -// do not implement it themselves. -int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { - if (U_FAILURE(status)) { - return 0; - } - if (capacity < 1) { - status = U_BUFFER_OVERFLOW_ERROR; - return 1; - } - *fillInVec = 0; - return 1; -} - -BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { - U_LOCALE_BASED(locBased, (*this)); - locBased.setLocaleIDs(valid, actual); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -//eof diff --git a/deps/node/deps/icu-small/source/common/bytesinkutil.cpp b/deps/node/deps/icu-small/source/common/bytesinkutil.cpp deleted file mode 100644 index c64a845f..00000000 --- a/deps/node/deps/icu-small/source/common/bytesinkutil.cpp +++ /dev/null @@ -1,161 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// bytesinkutil.cpp -// created: 2017sep14 Markus W. Scherer - -#include "unicode/utypes.h" -#include "unicode/bytestream.h" -#include "unicode/edits.h" -#include "unicode/stringoptions.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "bytesinkutil.h" -#include "charstr.h" -#include "cmemory.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -UBool -ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length, - ByteSink &sink, Edits *edits, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - char scratch[200]; - int32_t s8Length = 0; - for (int32_t i = 0; i < s16Length;) { - int32_t capacity; - int32_t desiredCapacity = s16Length - i; - if (desiredCapacity < (INT32_MAX / 3)) { - desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit - } else if (desiredCapacity < (INT32_MAX / 2)) { - desiredCapacity *= 2; - } else { - desiredCapacity = INT32_MAX; - } - char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity, - scratch, UPRV_LENGTHOF(scratch), &capacity); - capacity -= U8_MAX_LENGTH - 1; - int32_t j = 0; - for (; i < s16Length && j < capacity;) { - UChar32 c; - U16_NEXT_UNSAFE(s16, i, c); - U8_APPEND_UNSAFE(buffer, j, c); - } - if (j > (INT32_MAX - s8Length)) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return FALSE; - } - sink.Append(buffer, j); - s8Length += j; - } - if (edits != nullptr) { - edits->addReplace(length, s8Length); - } - return TRUE; -} - -UBool -ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit, - const char16_t *s16, int32_t s16Length, - ByteSink &sink, Edits *edits, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - if ((limit - s) > INT32_MAX) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return FALSE; - } - return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode); -} - -void -ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) { - char s8[U8_MAX_LENGTH]; - int32_t s8Length = 0; - U8_APPEND_UNSAFE(s8, s8Length, c); - if (edits != nullptr) { - edits->addReplace(length, s8Length); - } - sink.Append(s8, s8Length); -} - -namespace { - -// See unicode/utf8.h U8_APPEND_UNSAFE(). -inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } -inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } - -} // namespace - -void -ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { - U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8 - char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) }; - sink.Append(s8, 2); -} - -void -ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length, - ByteSink &sink, uint32_t options, Edits *edits) { - U_ASSERT(length > 0); - if (edits != nullptr) { - edits->addUnchanged(length); - } - if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { - sink.Append(reinterpret_cast(s), length); - } -} - -UBool -ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, - ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - if ((limit - s) > INT32_MAX) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return FALSE; - } - int32_t length = (int32_t)(limit - s); - if (length > 0) { - appendNonEmptyUnchanged(s, length, sink, options, edits); - } - return TRUE; -} - -CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) { -} - -CharStringByteSink::~CharStringByteSink() = default; - -void -CharStringByteSink::Append(const char* bytes, int32_t n) { - UErrorCode status = U_ZERO_ERROR; - dest_.append(bytes, n, status); - // Any errors are silently ignored. -} - -char* -CharStringByteSink::GetAppendBuffer(int32_t min_capacity, - int32_t desired_capacity_hint, - char* scratch, - int32_t scratch_capacity, - int32_t* result_capacity) { - if (min_capacity < 1 || scratch_capacity < min_capacity) { - *result_capacity = 0; - return nullptr; - } - - UErrorCode status = U_ZERO_ERROR; - char* result = dest_.getAppendBuffer( - min_capacity, - desired_capacity_hint, - *result_capacity, - status); - if (U_SUCCESS(status)) { - return result; - } - - *result_capacity = scratch_capacity; - return scratch; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/bytesinkutil.h b/deps/node/deps/icu-small/source/common/bytesinkutil.h deleted file mode 100644 index 69e4cbcd..00000000 --- a/deps/node/deps/icu-small/source/common/bytesinkutil.h +++ /dev/null @@ -1,83 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// bytesinkutil.h -// created: 2017sep14 Markus W. Scherer - -#include "unicode/utypes.h" -#include "unicode/bytestream.h" -#include "unicode/edits.h" -#include "cmemory.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -class ByteSink; -class CharString; -class Edits; - -class U_COMMON_API ByteSinkUtil { -public: - ByteSinkUtil() = delete; // all static - - /** (length) bytes were mapped to valid (s16, s16Length). */ - static UBool appendChange(int32_t length, - const char16_t *s16, int32_t s16Length, - ByteSink &sink, Edits *edits, UErrorCode &errorCode); - - /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */ - static UBool appendChange(const uint8_t *s, const uint8_t *limit, - const char16_t *s16, int32_t s16Length, - ByteSink &sink, Edits *edits, UErrorCode &errorCode); - - /** (length) bytes were mapped/changed to valid code point c. */ - static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr); - - /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */ - static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c, - ByteSink &sink, Edits *edits = nullptr) { - appendCodePoint((int32_t)(nextSrc - src), c, sink, edits); - } - - /** Append the two-byte character (U+0080..U+07FF). */ - static void appendTwoBytes(UChar32 c, ByteSink &sink); - - static UBool appendUnchanged(const uint8_t *s, int32_t length, - ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); } - return TRUE; - } - - static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit, - ByteSink &sink, uint32_t options, Edits *edits, - UErrorCode &errorCode); - -private: - static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length, - ByteSink &sink, uint32_t options, Edits *edits); -}; - -class CharStringByteSink : public ByteSink { -public: - CharStringByteSink(CharString* dest); - ~CharStringByteSink() override; - - CharStringByteSink() = delete; - CharStringByteSink(const CharStringByteSink&) = delete; - CharStringByteSink& operator=(const CharStringByteSink&) = delete; - - void Append(const char* bytes, int32_t n) override; - - char* GetAppendBuffer(int32_t min_capacity, - int32_t desired_capacity_hint, - char* scratch, - int32_t scratch_capacity, - int32_t* result_capacity) override; - -private: - CharString& dest_; -}; - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/bytestream.cpp b/deps/node/deps/icu-small/source/common/bytestream.cpp deleted file mode 100644 index 0d0e4dda..00000000 --- a/deps/node/deps/icu-small/source/common/bytestream.cpp +++ /dev/null @@ -1,85 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// Copyright (C) 2009-2011, International Business Machines -// Corporation and others. All Rights Reserved. -// -// Copyright 2007 Google Inc. All Rights Reserved. -// Author: sanjay@google.com (Sanjay Ghemawat) - -#include "unicode/utypes.h" -#include "unicode/bytestream.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -ByteSink::~ByteSink() {} - -char* ByteSink::GetAppendBuffer(int32_t min_capacity, - int32_t /*desired_capacity_hint*/, - char* scratch, int32_t scratch_capacity, - int32_t* result_capacity) { - if (min_capacity < 1 || scratch_capacity < min_capacity) { - *result_capacity = 0; - return NULL; - } - *result_capacity = scratch_capacity; - return scratch; -} - -void ByteSink::Flush() {} - -CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity) - : outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity), - size_(0), appended_(0), overflowed_(FALSE) { -} - -CheckedArrayByteSink::~CheckedArrayByteSink() {} - -CheckedArrayByteSink& CheckedArrayByteSink::Reset() { - size_ = appended_ = 0; - overflowed_ = FALSE; - return *this; -} - -void CheckedArrayByteSink::Append(const char* bytes, int32_t n) { - if (n <= 0) { - return; - } - if (n > (INT32_MAX - appended_)) { - // TODO: Report as integer overflow, not merely buffer overflow. - appended_ = INT32_MAX; - overflowed_ = TRUE; - return; - } - appended_ += n; - int32_t available = capacity_ - size_; - if (n > available) { - n = available; - overflowed_ = TRUE; - } - if (n > 0 && bytes != (outbuf_ + size_)) { - uprv_memcpy(outbuf_ + size_, bytes, n); - } - size_ += n; -} - -char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity, - int32_t /*desired_capacity_hint*/, - char* scratch, - int32_t scratch_capacity, - int32_t* result_capacity) { - if (min_capacity < 1 || scratch_capacity < min_capacity) { - *result_capacity = 0; - return NULL; - } - int32_t available = capacity_ - size_; - if (available >= min_capacity) { - *result_capacity = available; - return outbuf_ + size_; - } else { - *result_capacity = scratch_capacity; - return scratch; - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/bytestrie.cpp b/deps/node/deps/icu-small/source/common/bytestrie.cpp deleted file mode 100644 index c4d498c4..00000000 --- a/deps/node/deps/icu-small/source/common/bytestrie.cpp +++ /dev/null @@ -1,441 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: bytestrie.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010sep25 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/bytestream.h" -#include "unicode/bytestrie.h" -#include "unicode/uobject.h" -#include "cmemory.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -BytesTrie::~BytesTrie() { - uprv_free(ownedArray_); -} - -// lead byte already shifted right by 1. -int32_t -BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) { - int32_t value; - if(leadByte=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } -} - -UStringTrieResult -BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { - // Branch according to the current byte. - if(length==0) { - length=*pos++; - } - ++length; - // The length of the branch is the number of bytes to select from. - // The data structure encodes a binary search. - while(length>kMaxBranchLinearSubNodeLength) { - if(inByte<*pos++) { - length>>=1; - pos=jumpByDelta(pos); - } else { - length=length-(length>>1); - pos=skipDelta(pos); - } - } - // Drop down to linear search for the last few bytes. - // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 - // and divides length by 2. - do { - if(inByte==*pos++) { - UStringTrieResult result; - int32_t node=*pos; - U_ASSERT(node>=kMinValueLead); - if(node&kValueIsFinal) { - // Leave the final value for getValue() to read. - result=USTRINGTRIE_FINAL_VALUE; - } else { - // Use the non-final value as the jump delta. - ++pos; - // int32_t delta=readValue(pos, node>>1); - node>>=1; - int32_t delta; - if(node=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; - } - pos_=pos; - return result; - } - --length; - pos=skipValue(pos); - } while(length>1); - if(inByte==*pos++) { - pos_=pos; - int32_t node=*pos; - return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; - } else { - stop(); - return USTRINGTRIE_NO_MATCH; - } -} - -UStringTrieResult -BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) { - for(;;) { - int32_t node=*pos++; - if(node=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } else { - // No match. - break; - } - } else if(node&kValueIsFinal) { - // No further matching bytes. - break; - } else { - // Skip intermediate value. - pos=skipValue(pos, node); - // The next node must not also be a value node. - U_ASSERT(*pos=0) { - // Remaining part of a linear-match node. - if(inByte==*pos++) { - remainingMatchLength_=--length; - pos_=pos; - int32_t node; - return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } else { - stop(); - return USTRINGTRIE_NO_MATCH; - } - } - return nextImpl(pos, inByte); -} - -UStringTrieResult -BytesTrie::next(const char *s, int32_t sLength) { - if(sLength<0 ? *s==0 : sLength==0) { - // Empty input. - return current(); - } - const uint8_t *pos=pos_; - if(pos==NULL) { - return USTRINGTRIE_NO_MATCH; - } - int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. - for(;;) { - // Fetch the next input byte, if there is one. - // Continue a linear-match node without rechecking sLength<0. - int32_t inByte; - if(sLength<0) { - for(;;) { - if((inByte=*s++)==0) { - remainingMatchLength_=length; - pos_=pos; - int32_t node; - return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } - if(length<0) { - remainingMatchLength_=length; - break; - } - if(inByte!=*pos) { - stop(); - return USTRINGTRIE_NO_MATCH; - } - ++pos; - --length; - } - } else { - for(;;) { - if(sLength==0) { - remainingMatchLength_=length; - pos_=pos; - int32_t node; - return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } - inByte=*s++; - --sLength; - if(length<0) { - remainingMatchLength_=length; - break; - } - if(inByte!=*pos) { - stop(); - return USTRINGTRIE_NO_MATCH; - } - ++pos; - --length; - } - } - for(;;) { - int32_t node=*pos++; - if(nodekMaxBranchLinearSubNodeLength) { - ++pos; // ignore the comparison byte - if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { - return NULL; - } - length=length-(length>>1); - pos=skipDelta(pos); - } - do { - ++pos; // ignore a comparison byte - // handle its value - int32_t node=*pos++; - UBool isFinal=(UBool)(node&kValueIsFinal); - int32_t value=readValue(pos, node>>1); - pos=skipValue(pos, node); - if(isFinal) { - if(haveUniqueValue) { - if(value!=uniqueValue) { - return NULL; - } - } else { - uniqueValue=value; - haveUniqueValue=TRUE; - } - } else { - if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { - return NULL; - } - haveUniqueValue=TRUE; - } - } while(--length>1); - return pos+1; // ignore the last comparison byte -} - -UBool -BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) { - for(;;) { - int32_t node=*pos++; - if(node>1); - if(haveUniqueValue) { - if(value!=uniqueValue) { - return FALSE; - } - } else { - uniqueValue=value; - haveUniqueValue=TRUE; - } - if(isFinal) { - return TRUE; - } - pos=skipValue(pos, node); - } - } -} - -int32_t -BytesTrie::getNextBytes(ByteSink &out) const { - const uint8_t *pos=pos_; - if(pos==NULL) { - return 0; - } - if(remainingMatchLength_>=0) { - append(out, *pos); // Next byte of a pending linear-match node. - return 1; - } - int32_t node=*pos++; - if(node>=kMinValueLead) { - if(node&kValueIsFinal) { - return 0; - } else { - pos=skipValue(pos, node); - node=*pos++; - U_ASSERT(nodekMaxBranchLinearSubNodeLength) { - ++pos; // ignore the comparison byte - getNextBranchBytes(jumpByDelta(pos), length>>1, out); - length=length-(length>>1); - pos=skipDelta(pos); - } - do { - append(out, *pos++); - pos=skipValue(pos); - } while(--length>1); - append(out, *pos); -} - -void -BytesTrie::append(ByteSink &out, int c) { - char ch=(char)c; - out.Append(&ch, 1); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/bytestriebuilder.cpp b/deps/node/deps/icu-small/source/common/bytestriebuilder.cpp deleted file mode 100644 index ec1ab7d8..00000000 --- a/deps/node/deps/icu-small/source/common/bytestriebuilder.cpp +++ /dev/null @@ -1,504 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: bytestriebuilder.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010sep25 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/bytestrie.h" -#include "unicode/bytestriebuilder.h" -#include "unicode/stringpiece.h" -#include "charstr.h" -#include "cmemory.h" -#include "uhash.h" -#include "uarrsort.h" -#include "uassert.h" -#include "ustr_imp.h" - -U_NAMESPACE_BEGIN - -/* - * Note: This builder implementation stores (bytes, value) pairs with full copies - * of the byte sequences, until the BytesTrie is built. - * It might(!) take less memory if we collected the data in a temporary, dynamic trie. - */ - -class BytesTrieElement : public UMemory { -public: - // Use compiler's default constructor, initializes nothing. - - void setTo(StringPiece s, int32_t val, CharString &strings, UErrorCode &errorCode); - - StringPiece getString(const CharString &strings) const { - int32_t offset=stringOffset; - int32_t length; - if(offset>=0) { - length=(uint8_t)strings[offset++]; - } else { - offset=~offset; - length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; - offset+=2; - } - return StringPiece(strings.data()+offset, length); - } - int32_t getStringLength(const CharString &strings) const { - int32_t offset=stringOffset; - if(offset>=0) { - return (uint8_t)strings[offset]; - } else { - offset=~offset; - return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; - } - } - - char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; } - - int32_t getValue() const { return value; } - - int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const; - -private: - const char *data(const CharString &strings) const { - int32_t offset=stringOffset; - if(offset>=0) { - ++offset; - } else { - offset=~offset+2; - } - return strings.data()+offset; - } - - // If the stringOffset is non-negative, then the first strings byte contains - // the string length. - // If the stringOffset is negative, then the first two strings bytes contain - // the string length (big-endian), and the offset needs to be bit-inverted. - // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.) - int32_t stringOffset; - int32_t value; -}; - -void -BytesTrieElement::setTo(StringPiece s, int32_t val, - CharString &strings, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - int32_t length=s.length(); - if(length>0xffff) { - // Too long: We store the length in 1 or 2 bytes. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - int32_t offset=strings.length(); - if(length>0xff) { - offset=~offset; - strings.append((char)(length>>8), errorCode); - } - strings.append((char)length, errorCode); - stringOffset=offset; - value=val; - strings.append(s, errorCode); -} - -int32_t -BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const { - // TODO: add StringPiece::compare(), see ticket #8187 - StringPiece thisString=getString(strings); - StringPiece otherString=other.getString(strings); - int32_t lengthDiff=thisString.length()-otherString.length(); - int32_t commonLength; - if(lengthDiff<=0) { - commonLength=thisString.length(); - } else { - commonLength=otherString.length(); - } - int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength); - return diff!=0 ? diff : lengthDiff; -} - -BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode) - : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0), - bytes(NULL), bytesCapacity(0), bytesLength(0) { - if(U_FAILURE(errorCode)) { - return; - } - strings=new CharString(); - if(strings==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } -} - -BytesTrieBuilder::~BytesTrieBuilder() { - delete strings; - delete[] elements; - uprv_free(bytes); -} - -BytesTrieBuilder & -BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return *this; - } - if(bytesLength>0) { - // Cannot add elements after building. - errorCode=U_NO_WRITE_PERMISSION; - return *this; - } - if(elementsLength==elementsCapacity) { - int32_t newCapacity; - if(elementsCapacity==0) { - newCapacity=1024; - } else { - newCapacity=4*elementsCapacity; - } - BytesTrieElement *newElements=new BytesTrieElement[newCapacity]; - if(newElements==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return *this; // error instead of dereferencing null - } - if(elementsLength>0) { - uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement)); - } - delete[] elements; - elements=newElements; - elementsCapacity=newCapacity; - } - elements[elementsLength++].setTo(s, value, *strings, errorCode); - return *this; -} - -U_CDECL_BEGIN - -static int32_t U_CALLCONV -compareElementStrings(const void *context, const void *left, const void *right) { - const CharString *strings=static_cast(context); - const BytesTrieElement *leftElement=static_cast(left); - const BytesTrieElement *rightElement=static_cast(right); - return leftElement->compareStringTo(*rightElement, *strings); -} - -U_CDECL_END - -BytesTrie * -BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - buildBytes(buildOption, errorCode); - BytesTrie *newTrie=NULL; - if(U_SUCCESS(errorCode)) { - newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength)); - if(newTrie==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } else { - bytes=NULL; // The new trie now owns the array. - bytesCapacity=0; - } - } - return newTrie; -} - -StringPiece -BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - buildBytes(buildOption, errorCode); - StringPiece result; - if(U_SUCCESS(errorCode)) { - result.set(bytes+(bytesCapacity-bytesLength), bytesLength); - } - return result; -} - -void -BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - if(bytes!=NULL && bytesLength>0) { - // Already built. - return; - } - if(bytesLength==0) { - if(elementsLength==0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), - compareElementStrings, strings, - FALSE, // need not be a stable sort - &errorCode); - if(U_FAILURE(errorCode)) { - return; - } - // Duplicate strings are not allowed. - StringPiece prev=elements[0].getString(*strings); - for(int32_t i=1; ilength(); - if(capacity<1024) { - capacity=1024; - } - if(bytesCapacity(uprv_malloc(capacity)); - if(bytes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - bytesCapacity=0; - return; - } - bytesCapacity=capacity; - } - StringTrieBuilder::build(buildOption, elementsLength, errorCode); - if(bytes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } -} - -BytesTrieBuilder & -BytesTrieBuilder::clear() { - strings->clear(); - elementsLength=0; - bytesLength=0; - return *this; -} - -int32_t -BytesTrieBuilder::getElementStringLength(int32_t i) const { - return elements[i].getStringLength(*strings); -} - -UChar -BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { - return (uint8_t)elements[i].charAt(byteIndex, *strings); -} - -int32_t -BytesTrieBuilder::getElementValue(int32_t i) const { - return elements[i].getValue(); -} - -int32_t -BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { - const BytesTrieElement &firstElement=elements[first]; - const BytesTrieElement &lastElement=elements[last]; - int32_t minStringLength=firstElement.getStringLength(*strings); - while(++byteIndex0); - return i; -} - -int32_t -BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const { - char b=(char)byte; - while(b==elements[i].charAt(byteIndex, *strings)) { - ++i; - } - return i; -} - -BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) - : LinearMatchNode(len, nextNode), s(bytes) { - hash=static_cast( - static_cast(hash)*37u + static_cast(ustr_hashCharsN(bytes, len))); -} - -UBool -BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!LinearMatchNode::operator==(other)) { - return FALSE; - } - const BTLinearMatchNode &o=(const BTLinearMatchNode &)other; - return 0==uprv_memcmp(s, o.s, length); -} - -void -BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) { - BytesTrieBuilder &b=(BytesTrieBuilder &)builder; - next->write(builder); - b.write(s, length); - offset=b.write(b.getMinLinearMatch()+length-1); -} - -StringTrieBuilder::Node * -BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, - Node *nextNode) const { - return new BTLinearMatchNode( - elements[i].getString(*strings).data()+byteIndex, - length, - nextNode); -} - -UBool -BytesTrieBuilder::ensureCapacity(int32_t length) { - if(bytes==NULL) { - return FALSE; // previous memory allocation had failed - } - if(length>bytesCapacity) { - int32_t newCapacity=bytesCapacity; - do { - newCapacity*=2; - } while(newCapacity<=length); - char *newBytes=static_cast(uprv_malloc(newCapacity)); - if(newBytes==NULL) { - // unable to allocate memory - uprv_free(bytes); - bytes=NULL; - bytesCapacity=0; - return FALSE; - } - uprv_memcpy(newBytes+(newCapacity-bytesLength), - bytes+(bytesCapacity-bytesLength), bytesLength); - uprv_free(bytes); - bytes=newBytes; - bytesCapacity=newCapacity; - } - return TRUE; -} - -int32_t -BytesTrieBuilder::write(int32_t byte) { - int32_t newLength=bytesLength+1; - if(ensureCapacity(newLength)) { - bytesLength=newLength; - bytes[bytesCapacity-bytesLength]=(char)byte; - } - return bytesLength; -} - -int32_t -BytesTrieBuilder::write(const char *b, int32_t length) { - int32_t newLength=bytesLength+length; - if(ensureCapacity(newLength)) { - bytesLength=newLength; - uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length); - } - return bytesLength; -} - -int32_t -BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { - return write(elements[i].getString(*strings).data()+byteIndex, length); -} - -int32_t -BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { - if(0<=i && i<=BytesTrie::kMaxOneByteValue) { - return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal); - } - char intBytes[5]; - int32_t length=1; - if(i<0 || i>0xffffff) { - intBytes[0]=(char)BytesTrie::kFiveByteValueLead; - intBytes[1]=(char)((uint32_t)i>>24); - intBytes[2]=(char)((uint32_t)i>>16); - intBytes[3]=(char)((uint32_t)i>>8); - intBytes[4]=(char)i; - length=5; - // } else if(i<=BytesTrie::kMaxOneByteValue) { - // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i); - } else { - if(i<=BytesTrie::kMaxTwoByteValue) { - intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8)); - } else { - if(i<=BytesTrie::kMaxThreeByteValue) { - intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16)); - } else { - intBytes[0]=(char)BytesTrie::kFourByteValueLead; - intBytes[1]=(char)(i>>16); - length=2; - } - intBytes[length++]=(char)(i>>8); - } - intBytes[length++]=(char)i; - } - intBytes[0]=(char)((intBytes[0]<<1)|isFinal); - return write(intBytes, length); -} - -int32_t -BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { - int32_t offset=write(node); - if(hasValue) { - offset=writeValueAndFinal(value, FALSE); - } - return offset; -} - -int32_t -BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { - int32_t i=bytesLength-jumpTarget; - U_ASSERT(i>=0); - if(i<=BytesTrie::kMaxOneByteDelta) { - return write(i); - } - char intBytes[5]; - int32_t length; - if(i<=BytesTrie::kMaxTwoByteDelta) { - intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); - length=1; - } else { - if(i<=BytesTrie::kMaxThreeByteDelta) { - intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); - length=2; - } else { - if(i<=0xffffff) { - intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; - length=3; - } else { - intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; - intBytes[1]=(char)(i>>24); - length=4; - } - intBytes[1]=(char)(i>>16); - } - intBytes[1]=(char)(i>>8); - } - intBytes[length++]=(char)i; - return write(intBytes, length); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/bytestrieiterator.cpp b/deps/node/deps/icu-small/source/common/bytestrieiterator.cpp deleted file mode 100644 index e64961a1..00000000 --- a/deps/node/deps/icu-small/source/common/bytestrieiterator.cpp +++ /dev/null @@ -1,214 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: bytestrieiterator.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010nov03 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/bytestrie.h" -#include "unicode/stringpiece.h" -#include "charstr.h" -#include "uvectr32.h" - -U_NAMESPACE_BEGIN - -BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, - UErrorCode &errorCode) - : bytes_(static_cast(trieBytes)), - pos_(bytes_), initialPos_(bytes_), - remainingMatchLength_(-1), initialRemainingMatchLength_(-1), - str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { - if(U_FAILURE(errorCode)) { - return; - } - // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into - // a public API header for which we would want it to depend only on - // other public headers. - // Unlike BytesTrie itself, its Iterator performs memory allocations anyway - // via the CharString and UVector32 implementations, so this additional - // cost is minimal. - str_=new CharString(); - stack_=new UVector32(errorCode); - if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } -} - -BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, - UErrorCode &errorCode) - : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), - remainingMatchLength_(trie.remainingMatchLength_), - initialRemainingMatchLength_(trie.remainingMatchLength_), - str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { - if(U_FAILURE(errorCode)) { - return; - } - str_=new CharString(); - stack_=new UVector32(errorCode); - if(U_FAILURE(errorCode)) { - return; - } - if(str_==NULL || stack_==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. - if(length>=0) { - // Pending linear-match node, append remaining bytes to str_. - ++length; - if(maxLength_>0 && length>maxLength_) { - length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. - } - str_->append(reinterpret_cast(pos_), length, errorCode); - pos_+=length; - remainingMatchLength_-=length; - } -} - -BytesTrie::Iterator::~Iterator() { - delete str_; - delete stack_; -} - -BytesTrie::Iterator & -BytesTrie::Iterator::reset() { - pos_=initialPos_; - remainingMatchLength_=initialRemainingMatchLength_; - int32_t length=remainingMatchLength_+1; // Remaining match length. - if(maxLength_>0 && length>maxLength_) { - length=maxLength_; - } - str_->truncate(length); - pos_+=length; - remainingMatchLength_-=length; - stack_->setSize(0); - return *this; -} - -UBool -BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } - -UBool -BytesTrie::Iterator::next(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const uint8_t *pos=pos_; - if(pos==NULL) { - if(stack_->isEmpty()) { - return FALSE; - } - // Pop the state off the stack and continue with the next outbound edge of - // the branch node. - int32_t stackSize=stack_->size(); - int32_t length=stack_->elementAti(stackSize-1); - pos=bytes_+stack_->elementAti(stackSize-2); - stack_->setSize(stackSize-2); - str_->truncate(length&0xffff); - length=(int32_t)((uint32_t)length>>16); - if(length>1) { - pos=branchNext(pos, length, errorCode); - if(pos==NULL) { - return TRUE; // Reached a final value. - } - } else { - str_->append((char)*pos++, errorCode); - } - } - if(remainingMatchLength_>=0) { - // We only get here if we started in a pending linear-match node - // with more than maxLength remaining bytes. - return truncateAndStop(); - } - for(;;) { - int32_t node=*pos++; - if(node>=kMinValueLead) { - // Deliver value for the byte sequence so far. - UBool isFinal=(UBool)(node&kValueIsFinal); - value_=readValue(pos, node>>1); - if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { - pos_=NULL; - } else { - pos_=skipValue(pos, node); - } - return TRUE; - } - if(maxLength_>0 && str_->length()==maxLength_) { - return truncateAndStop(); - } - if(node0 && str_->length()+length>maxLength_) { - str_->append(reinterpret_cast(pos), - maxLength_-str_->length(), errorCode); - return truncateAndStop(); - } - str_->append(reinterpret_cast(pos), length, errorCode); - pos+=length; - } - } -} - -StringPiece -BytesTrie::Iterator::getString() const { - return str_ == NULL ? StringPiece() : str_->toStringPiece(); -} - -UBool -BytesTrie::Iterator::truncateAndStop() { - pos_=NULL; - value_=-1; // no real value for str - return TRUE; -} - -// Branch node, needs to take the first outbound edge and push state for the rest. -const uint8_t * -BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { - while(length>kMaxBranchLinearSubNodeLength) { - ++pos; // ignore the comparison byte - // Push state for the greater-or-equal edge. - stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); - stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); - // Follow the less-than edge. - length>>=1; - pos=jumpByDelta(pos); - } - // List of key-value pairs where values are either final values or jump deltas. - // Read the first (key, value) pair. - uint8_t trieByte=*pos++; - int32_t node=*pos++; - UBool isFinal=(UBool)(node&kValueIsFinal); - int32_t value=readValue(pos, node>>1); - pos=skipValue(pos, node); - stack_->addElement((int32_t)(pos-bytes_), errorCode); - stack_->addElement(((length-1)<<16)|str_->length(), errorCode); - str_->append((char)trieByte, errorCode); - if(isFinal) { - pos_=NULL; - value_=value; - return NULL; - } else { - return pos+value; - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/caniter.cpp b/deps/node/deps/icu-small/source/common/caniter.cpp deleted file mode 100644 index d57c6424..00000000 --- a/deps/node/deps/icu-small/source/common/caniter.cpp +++ /dev/null @@ -1,586 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ***************************************************************************** - * Copyright (C) 1996-2015, International Business Machines Corporation and - * others. All Rights Reserved. - ***************************************************************************** - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/caniter.h" -#include "unicode/normalizer2.h" -#include "unicode/uchar.h" -#include "unicode/uniset.h" -#include "unicode/usetiter.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "hash.h" -#include "normalizer2impl.h" - -/** - * This class allows one to iterate through all the strings that are canonically equivalent to a given - * string. For example, here are some sample results: -Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -1: \u0041\u030A\u0064\u0307\u0327 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -2: \u0041\u030A\u0064\u0327\u0307 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -3: \u0041\u030A\u1E0B\u0327 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -4: \u0041\u030A\u1E11\u0307 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} -5: \u00C5\u0064\u0307\u0327 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -6: \u00C5\u0064\u0327\u0307 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -7: \u00C5\u1E0B\u0327 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -8: \u00C5\u1E11\u0307 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} -9: \u212B\u0064\u0307\u0327 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -10: \u212B\u0064\u0327\u0307 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -11: \u212B\u1E0B\u0327 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -12: \u212B\u1E11\u0307 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} - *
Note: the code is intended for use with small strings, and is not suitable for larger ones, - * since it has not been optimized for that situation. - *@author M. Davis - *@draft - */ - -// public - -U_NAMESPACE_BEGIN - -// TODO: add boilerplate methods. - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator) - -/** - *@param source string to get results for - */ -CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) : - pieces(NULL), - pieces_length(0), - pieces_lengths(NULL), - current(NULL), - current_length(0), - nfd(*Normalizer2::getNFDInstance(status)), - nfcImpl(*Normalizer2Factory::getNFCImpl(status)) -{ - if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) { - setSource(sourceStr, status); - } -} - -CanonicalIterator::~CanonicalIterator() { - cleanPieces(); -} - -void CanonicalIterator::cleanPieces() { - int32_t i = 0; - if(pieces != NULL) { - for(i = 0; i < pieces_length; i++) { - if(pieces[i] != NULL) { - delete[] pieces[i]; - } - } - uprv_free(pieces); - pieces = NULL; - pieces_length = 0; - } - if(pieces_lengths != NULL) { - uprv_free(pieces_lengths); - pieces_lengths = NULL; - } - if(current != NULL) { - uprv_free(current); - current = NULL; - current_length = 0; - } -} - -/** - *@return gets the source: NOTE: it is the NFD form of source - */ -UnicodeString CanonicalIterator::getSource() { - return source; -} - -/** - * Resets the iterator so that one can start again from the beginning. - */ -void CanonicalIterator::reset() { - done = FALSE; - for (int i = 0; i < current_length; ++i) { - current[i] = 0; - } -} - -/** - *@return the next string that is canonically equivalent. The value null is returned when - * the iteration is done. - */ -UnicodeString CanonicalIterator::next() { - int32_t i = 0; - - if (done) { - buffer.setToBogus(); - return buffer; - } - - // delete old contents - buffer.remove(); - - // construct return value - - for (i = 0; i < pieces_length; ++i) { - buffer.append(pieces[i][current[i]]); - } - //String result = buffer.toString(); // not needed - - // find next value for next time - - for (i = current_length - 1; ; --i) { - if (i < 0) { - done = TRUE; - break; - } - current[i]++; - if (current[i] < pieces_lengths[i]) break; // got sequence - current[i] = 0; - } - return buffer; -} - -/** - *@param set the source string to iterate against. This allows the same iterator to be used - * while changing the source string, saving object creation. - */ -void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) { - int32_t list_length = 0; - UChar32 cp = 0; - int32_t start = 0; - int32_t i = 0; - UnicodeString *list = NULL; - - nfd.normalize(newSource, source, status); - if(U_FAILURE(status)) { - return; - } - done = FALSE; - - cleanPieces(); - - // catch degenerate case - if (newSource.length() == 0) { - pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *)); - pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t)); - pieces_length = 1; - current = (int32_t*)uprv_malloc(1 * sizeof(int32_t)); - current_length = 1; - if (pieces == NULL || pieces_lengths == NULL || current == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto CleanPartialInitialization; - } - current[0] = 0; - pieces[0] = new UnicodeString[1]; - pieces_lengths[0] = 1; - if (pieces[0] == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - goto CleanPartialInitialization; - } - return; - } - - - list = new UnicodeString[source.length()]; - if (list == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - goto CleanPartialInitialization; - } - - // i should initialy be the number of code units at the - // start of the string - i = U16_LENGTH(source.char32At(0)); - //int32_t i = 1; - // find the segments - // This code iterates through the source string and - // extracts segments that end up on a codepoint that - // doesn't start any decompositions. (Analysis is done - // on the NFD form - see above). - for (; i < source.length(); i += U16_LENGTH(cp)) { - cp = source.char32At(i); - if (nfcImpl.isCanonSegmentStarter(cp)) { - source.extract(start, i-start, list[list_length++]); // add up to i - start = i; - } - } - source.extract(start, i-start, list[list_length++]); // add last one - - - // allocate the arrays, and find the strings that are CE to each segment - pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *)); - pieces_length = list_length; - pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t)); - current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t)); - current_length = list_length; - if (pieces == NULL || pieces_lengths == NULL || current == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto CleanPartialInitialization; - } - - for (i = 0; i < current_length; i++) { - current[i] = 0; - } - // for each segment, get all the combinations that can produce - // it after NFD normalization - for (i = 0; i < pieces_length; ++i) { - //if (PROGRESS) printf("SEGMENT\n"); - pieces[i] = getEquivalents(list[i], pieces_lengths[i], status); - } - - delete[] list; - return; -// Common section to cleanup all local variables and reset object variables. -CleanPartialInitialization: - if (list != NULL) { - delete[] list; - } - cleanPieces(); -} - -/** - * Dumb recursive implementation of permutation. - * TODO: optimize - * @param source the string to find permutations for - * @return the results in a set. - */ -void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) { - if(U_FAILURE(status)) { - return; - } - //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source))); - int32_t i = 0; - - // optimization: - // if zero or one character, just return a set with it - // we check for length < 2 to keep from counting code points all the time - if (source.length() <= 2 && source.countChar32() <= 1) { - UnicodeString *toPut = new UnicodeString(source); - /* test for NULL */ - if (toPut == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - result->put(source, toPut, status); - return; - } - - // otherwise iterate through the string, and recursively permute all the other characters - UChar32 cp; - Hashtable subpermute(status); - if(U_FAILURE(status)) { - return; - } - subpermute.setValueDeleter(uprv_deleteUObject); - - for (i = 0; i < source.length(); i += U16_LENGTH(cp)) { - cp = source.char32At(i); - const UHashElement *ne = NULL; - int32_t el = UHASH_FIRST; - UnicodeString subPermuteString = source; - - // optimization: - // if the character is canonical combining class zero, - // don't permute it - if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) { - //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i))); - continue; - } - - subpermute.removeAll(); - - // see what the permutations of the characters before and after this one are - //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); - permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); - /* Test for buffer overflows */ - if(U_FAILURE(status)) { - return; - } - // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents - // of source at this point. - - // prefix this character to all of them - ne = subpermute.nextElement(el); - while (ne != NULL) { - UnicodeString *permRes = (UnicodeString *)(ne->value.pointer); - UnicodeString *chStr = new UnicodeString(cp); - //test for NULL - if (chStr == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer)); - //if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr)); - result->put(*chStr, chStr, status); - ne = subpermute.nextElement(el); - } - } - //return result; -} - -// privates - -// we have a segment, in NFD. Find all the strings that are canonically equivalent to it. -UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) { - Hashtable result(status); - Hashtable permutations(status); - Hashtable basic(status); - if (U_FAILURE(status)) { - return 0; - } - result.setValueDeleter(uprv_deleteUObject); - permutations.setValueDeleter(uprv_deleteUObject); - basic.setValueDeleter(uprv_deleteUObject); - - UChar USeg[256]; - int32_t segLen = segment.extract(USeg, 256, status); - getEquivalents2(&basic, USeg, segLen, status); - - // now get all the permutations - // add only the ones that are canonically equivalent - // TODO: optimize by not permuting any class zero. - - const UHashElement *ne = NULL; - int32_t el = UHASH_FIRST; - //Iterator it = basic.iterator(); - ne = basic.nextElement(el); - //while (it.hasNext()) - while (ne != NULL) { - //String item = (String) it.next(); - UnicodeString item = *((UnicodeString *)(ne->value.pointer)); - - permutations.removeAll(); - permute(item, CANITER_SKIP_ZEROES, &permutations, status); - const UHashElement *ne2 = NULL; - int32_t el2 = UHASH_FIRST; - //Iterator it2 = permutations.iterator(); - ne2 = permutations.nextElement(el2); - //while (it2.hasNext()) - while (ne2 != NULL) { - //String possible = (String) it2.next(); - //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer))); - UnicodeString possible(*((UnicodeString *)(ne2->value.pointer))); - UnicodeString attempt; - nfd.normalize(possible, attempt, status); - - // TODO: check if operator == is semanticaly the same as attempt.equals(segment) - if (attempt==segment) { - //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible))); - // TODO: use the hashtable just to catch duplicates - store strings directly (somehow). - result.put(possible, new UnicodeString(possible), status); //add(possible); - } else { - //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible))); - } - - ne2 = permutations.nextElement(el2); - } - ne = basic.nextElement(el); - } - - /* Test for buffer overflows */ - if(U_FAILURE(status)) { - return 0; - } - // convert into a String[] to clean up storage - //String[] finalResult = new String[result.size()]; - UnicodeString *finalResult = NULL; - int32_t resultCount; - if((resultCount = result.count()) != 0) { - finalResult = new UnicodeString[resultCount]; - if (finalResult == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - } - else { - status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - //result.toArray(finalResult); - result_len = 0; - el = UHASH_FIRST; - ne = result.nextElement(el); - while(ne != NULL) { - finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer)); - ne = result.nextElement(el); - } - - - return finalResult; -} - -Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) { - - if (U_FAILURE(status)) { - return NULL; - } - - //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment))); - - UnicodeString toPut(segment, segLen); - - fillinResult->put(toPut, new UnicodeString(toPut), status); - - UnicodeSet starts; - - // cycle through all the characters - UChar32 cp; - for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) { - // see if any character is at the start of some decomposition - U16_GET(segment, 0, i, segLen, cp); - if (!nfcImpl.getCanonStartSet(cp, starts)) { - continue; - } - // if so, see which decompositions match - UnicodeSetIterator iter(starts); - while (iter.next()) { - UChar32 cp2 = iter.getCodepoint(); - Hashtable remainder(status); - remainder.setValueDeleter(uprv_deleteUObject); - if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) { - continue; - } - - // there were some matches, so add all the possibilities to the set. - UnicodeString prefix(segment, i); - prefix += cp2; - - int32_t el = UHASH_FIRST; - const UHashElement *ne = remainder.nextElement(el); - while (ne != NULL) { - UnicodeString item = *((UnicodeString *)(ne->value.pointer)); - UnicodeString *toAdd = new UnicodeString(prefix); - /* test for NULL */ - if (toAdd == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - *toAdd += item; - fillinResult->put(*toAdd, toAdd, status); - - //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd))); - - ne = remainder.nextElement(el); - } - } - } - - /* Test for buffer overflows */ - if(U_FAILURE(status)) { - return NULL; - } - return fillinResult; -} - -/** - * See if the decomposition of cp2 is at segment starting at segmentPos - * (with canonical rearrangment!) - * If so, take the remainder, and return the equivalents - */ -Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { -//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { - //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp)))); - //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos); - - if (U_FAILURE(status)) { - return NULL; - } - - UnicodeString temp(comp); - int32_t inputLen=temp.length(); - UnicodeString decompString; - nfd.normalize(temp, decompString, status); - if (U_FAILURE(status)) { - return NULL; - } - if (decompString.isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - const UChar *decomp=decompString.getBuffer(); - int32_t decompLen=decompString.length(); - - // See if it matches the start of segment (at segmentPos) - UBool ok = FALSE; - UChar32 cp; - int32_t decompPos = 0; - UChar32 decompCp; - U16_NEXT(decomp, decompPos, decompLen, decompCp); - - int32_t i = segmentPos; - while(i < segLen) { - U16_NEXT(segment, i, segLen, cp); - - if (cp == decompCp) { // if equal, eat another cp from decomp - - //if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp)))); - - if (decompPos == decompLen) { // done, have all decomp characters! - temp.append(segment+i, segLen-i); - ok = TRUE; - break; - } - U16_NEXT(decomp, decompPos, decompLen, decompCp); - } else { - //if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp)))); - - // brute force approach - temp.append(cp); - - /* TODO: optimize - // since we know that the classes are monotonically increasing, after zero - // e.g. 0 5 7 9 0 3 - // we can do an optimization - // there are only a few cases that work: zero, less, same, greater - // if both classes are the same, we fail - // if the decomp class < the segment class, we fail - - segClass = getClass(cp); - if (decompClass <= segClass) return null; - */ - } - } - if (!ok) - return NULL; // we failed, characters left over - - //if (PROGRESS) printf("Matches\n"); - - if (inputLen == temp.length()) { - fillinResult->put(UnicodeString(), new UnicodeString(), status); - return fillinResult; // succeed, but no remainder - } - - // brute force approach - // check to make sure result is canonically equivalent - UnicodeString trial; - nfd.normalize(temp, trial, status); - if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) { - return NULL; - } - - return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/deps/node/deps/icu-small/source/common/characterproperties.cpp b/deps/node/deps/icu-small/source/common/characterproperties.cpp deleted file mode 100644 index 3aff85b3..00000000 --- a/deps/node/deps/icu-small/source/common/characterproperties.cpp +++ /dev/null @@ -1,336 +0,0 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// characterproperties.cpp -// created: 2018sep03 Markus W. Scherer - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" -#include "unicode/uchar.h" -#include "unicode/ucpmap.h" -#include "unicode/ucptrie.h" -#include "unicode/umutablecptrie.h" -#include "unicode/uniset.h" -#include "unicode/uscript.h" -#include "unicode/uset.h" -#include "cmemory.h" -#include "mutex.h" -#include "normalizer2impl.h" -#include "uassert.h" -#include "ubidi_props.h" -#include "ucase.h" -#include "ucln_cmn.h" -#include "umutex.h" -#include "uprops.h" - -using icu::UInitOnce; -using icu::UnicodeSet; - -namespace { - -UBool U_CALLCONV characterproperties_cleanup(); - -struct Inclusion { - UnicodeSet *fSet; - UInitOnce fInitOnce; -}; -Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions() - -UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {}; - -UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {}; - -UMutex cpMutex = U_MUTEX_INITIALIZER; - -//---------------------------------------------------------------- -// Inclusions list -//---------------------------------------------------------------- - -// USetAdder implementation -// Does not use uset.h to reduce code dependencies -void U_CALLCONV -_set_add(USet *set, UChar32 c) { - ((UnicodeSet *)set)->add(c); -} - -void U_CALLCONV -_set_addRange(USet *set, UChar32 start, UChar32 end) { - ((UnicodeSet *)set)->add(start, end); -} - -void U_CALLCONV -_set_addString(USet *set, const UChar *str, int32_t length) { - ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length)); -} - -UBool U_CALLCONV characterproperties_cleanup() { - for (Inclusion &in: gInclusions) { - delete in.fSet; - in.fSet = nullptr; - in.fInitOnce.reset(); - } - for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) { - delete sets[i]; - sets[i] = nullptr; - } - for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) { - ucptrie_close(reinterpret_cast(maps[i])); - maps[i] = nullptr; - } - return TRUE; -} - -} // namespace - -U_NAMESPACE_BEGIN - -/* -Reduce excessive reallocation, and make it easier to detect initialization problems. -Usually you don't see smaller sets than this for Unicode 5.0. -*/ -constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072; - -void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) { - // This function is invoked only via umtx_initOnce(). - // This function is a friend of class UnicodeSet. - - U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT); - if (src == UPROPS_SRC_NONE) { - errorCode = U_INTERNAL_PROGRAM_ERROR; - return; - } - UnicodeSet * &incl = gInclusions[src].fSet; - U_ASSERT(incl == nullptr); - - incl = new UnicodeSet(); - if (incl == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - USetAdder sa = { - (USet *)incl, - _set_add, - _set_addRange, - _set_addString, - nullptr, // don't need remove() - nullptr // don't need removeRange() - }; - - incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode); - switch(src) { - case UPROPS_SRC_CHAR: - uchar_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_PROPSVEC: - upropsvec_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_CHAR_AND_PROPSVEC: - uchar_addPropertyStarts(&sa, &errorCode); - upropsvec_addPropertyStarts(&sa, &errorCode); - break; -#if !UCONFIG_NO_NORMALIZATION - case UPROPS_SRC_CASE_AND_NORM: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - ucase_addPropertyStarts(&sa, &errorCode); - break; - } - case UPROPS_SRC_NFC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - break; - } - case UPROPS_SRC_NFKC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - break; - } - case UPROPS_SRC_NFKC_CF: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addPropertyStarts(&sa, errorCode); - } - break; - } - case UPROPS_SRC_NFC_CANON_ITER: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - impl->addCanonIterPropertyStarts(&sa, errorCode); - } - break; - } -#endif - case UPROPS_SRC_CASE: - ucase_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(&sa, &errorCode); - break; - case UPROPS_SRC_INPC: - case UPROPS_SRC_INSC: - case UPROPS_SRC_VO: - uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); - break; - default: - errorCode = U_INTERNAL_PROGRAM_ERROR; - break; - } - - if (U_FAILURE(errorCode)) { - delete incl; - incl = nullptr; - return; - } - // Compact for caching - incl->compact(); - ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); -} - -const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - if (src < 0 || UPROPS_SRC_COUNT <= src) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - Inclusion &i = gInclusions[src]; - umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode); - return i.fSet; -} - -const UnicodeSet *CharacterProperties::getInclusionsForProperty( - UProperty prop, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - UPropertySource src = uprops_getSource(prop); - return getInclusionsForSource(src, errorCode); -} - -U_NAMESPACE_END - -namespace { - -UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - icu::LocalPointer set(new UnicodeSet()); - if (set.isNull()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - const UnicodeSet *inclusions = - icu::CharacterProperties::getInclusionsForProperty(property, errorCode); - if (U_FAILURE(errorCode)) { return nullptr; } - int32_t numRanges = inclusions->getRangeCount(); - UChar32 startHasProperty = -1; - - for (int32_t i = 0; i < numRanges; ++i) { - UChar32 rangeEnd = inclusions->getRangeEnd(i); - for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { - // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. - if (u_hasBinaryProperty(c, property)) { - if (startHasProperty < 0) { - // Transition from false to true. - startHasProperty = c; - } - } else if (startHasProperty >= 0) { - // Transition from true to false. - set->add(startHasProperty, c - 1); - startHasProperty = -1; - } - } - } - if (startHasProperty >= 0) { - set->add(startHasProperty, 0x10FFFF); - } - set->freeze(); - return set.orphan(); -} - -UCPMap *makeMap(UProperty property, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0; - icu::LocalUMutableCPTriePointer mutableTrie( - umutablecptrie_open(nullValue, nullValue, &errorCode)); - const UnicodeSet *inclusions = - icu::CharacterProperties::getInclusionsForProperty(property, errorCode); - if (U_FAILURE(errorCode)) { return nullptr; } - int32_t numRanges = inclusions->getRangeCount(); - UChar32 start = 0; - uint32_t value = nullValue; - - for (int32_t i = 0; i < numRanges; ++i) { - UChar32 rangeEnd = inclusions->getRangeEnd(i); - for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { - // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. - uint32_t nextValue = u_getIntPropertyValue(c, property); - if (value != nextValue) { - if (value != nullValue) { - umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode); - } - start = c; - value = nextValue; - } - } - } - if (value != 0) { - umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode); - } - - UCPTrieType type; - if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) { - type = UCPTRIE_TYPE_FAST; - } else { - type = UCPTRIE_TYPE_SMALL; - } - UCPTrieValueWidth valueWidth; - // TODO: UCharacterProperty.IntProperty - int32_t max = u_getIntPropertyMaxValue(property); - if (max <= 0xff) { - valueWidth = UCPTRIE_VALUE_BITS_8; - } else if (max <= 0xffff) { - valueWidth = UCPTRIE_VALUE_BITS_16; - } else { - valueWidth = UCPTRIE_VALUE_BITS_32; - } - return reinterpret_cast( - umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode)); -} - -} // namespace - -U_NAMESPACE_USE - -U_CAPI const USet * U_EXPORT2 -u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { return nullptr; } - if (property < 0 || UCHAR_BINARY_LIMIT <= property) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - Mutex m(&cpMutex); - UnicodeSet *set = sets[property]; - if (set == nullptr) { - sets[property] = set = makeSet(property, *pErrorCode); - } - if (U_FAILURE(*pErrorCode)) { return nullptr; } - return set->toUSet(); -} - -U_CAPI const UCPMap * U_EXPORT2 -u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { return nullptr; } - if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - Mutex m(&cpMutex); - UCPMap *map = maps[property - UCHAR_INT_START]; - if (map == nullptr) { - maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode); - } - return map; -} diff --git a/deps/node/deps/icu-small/source/common/chariter.cpp b/deps/node/deps/icu-small/source/common/chariter.cpp deleted file mode 100644 index 887119a0..00000000 --- a/deps/node/deps/icu-small/source/common/chariter.cpp +++ /dev/null @@ -1,100 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "unicode/chariter.h" - -U_NAMESPACE_BEGIN - -ForwardCharacterIterator::~ForwardCharacterIterator() {} -ForwardCharacterIterator::ForwardCharacterIterator() -: UObject() -{} -ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other) -: UObject(other) -{} - - -CharacterIterator::CharacterIterator() -: textLength(0), pos(0), begin(0), end(0) { -} - -CharacterIterator::CharacterIterator(int32_t length) -: textLength(length), pos(0), begin(0), end(length) { - if(textLength < 0) { - textLength = end = 0; - } -} - -CharacterIterator::CharacterIterator(int32_t length, int32_t position) -: textLength(length), pos(position), begin(0), end(length) { - if(textLength < 0) { - textLength = end = 0; - } - if(pos < 0) { - pos = 0; - } else if(pos > end) { - pos = end; - } -} - -CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position) -: textLength(length), pos(position), begin(textBegin), end(textEnd) { - if(textLength < 0) { - textLength = 0; - } - if(begin < 0) { - begin = 0; - } else if(begin > textLength) { - begin = textLength; - } - if(end < begin) { - end = begin; - } else if(end > textLength) { - end = textLength; - } - if(pos < begin) { - pos = begin; - } else if(pos > end) { - pos = end; - } -} - -CharacterIterator::~CharacterIterator() {} - -CharacterIterator::CharacterIterator(const CharacterIterator &that) : -ForwardCharacterIterator(that), -textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end) -{ -} - -CharacterIterator & -CharacterIterator::operator=(const CharacterIterator &that) { - ForwardCharacterIterator::operator=(that); - textLength = that.textLength; - pos = that.pos; - begin = that.begin; - end = that.end; - return *this; -} - -// implementing first[32]PostInc() directly in a subclass should be faster -// but these implementations make subclassing a little easier -UChar -CharacterIterator::firstPostInc(void) { - setToStart(); - return nextPostInc(); -} - -UChar32 -CharacterIterator::first32PostInc(void) { - setToStart(); - return next32PostInc(); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/charstr.cpp b/deps/node/deps/icu-small/source/common/charstr.cpp deleted file mode 100644 index 852cc539..00000000 --- a/deps/node/deps/icu-small/source/common/charstr.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: charstr.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010may19 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "charstr.h" -#include "cmemory.h" -#include "cstring.h" -#include "uinvchar.h" - -U_NAMESPACE_BEGIN - -CharString::CharString(CharString&& src) U_NOEXCEPT - : buffer(std::move(src.buffer)), len(src.len) { - src.len = 0; // not strictly necessary because we make no guarantees on the source string -} - -CharString& CharString::operator=(CharString&& src) U_NOEXCEPT { - buffer = std::move(src.buffer); - len = src.len; - src.len = 0; // not strictly necessary because we make no guarantees on the source string - return *this; -} - -CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) { - if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) { - len=s.len; - uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1); - } - return *this; -} - -int32_t CharString::lastIndexOf(char c) const { - for(int32_t i=len; i>0;) { - if(buffer[--i]==c) { - return i; - } - } - return -1; -} - -CharString &CharString::truncate(int32_t newLength) { - if(newLength<0) { - newLength=0; - } - if(newLength(uprv_strlen(s)); - } - if(sLength>0) { - if(s==(buffer.getAlias()+len)) { - // The caller wrote into the getAppendBuffer(). - if(sLength>=(buffer.getCapacity()-len)) { - // The caller wrote too much. - errorCode=U_INTERNAL_PROGRAM_ERROR; - } else { - buffer[len+=sLength]=0; - } - } else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) && - sLength>=(buffer.getCapacity()-len) - ) { - // (Part of) this string is appended to itself which requires reallocation, - // so we have to make a copy of the substring and append that. - return append(CharString(s, sLength, errorCode), errorCode); - } else if(ensureCapacity(len+sLength+1, 0, errorCode)) { - uprv_memcpy(buffer.getAlias()+len, s, sLength); - buffer[len+=sLength]=0; - } - } - return *this; -} - -char *CharString::getAppendBuffer(int32_t minCapacity, - int32_t desiredCapacityHint, - int32_t &resultCapacity, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - resultCapacity=0; - return NULL; - } - int32_t appendCapacity=buffer.getCapacity()-len-1; // -1 for NUL - if(appendCapacity>=minCapacity) { - resultCapacity=appendCapacity; - return buffer.getAlias()+len; - } - if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) { - resultCapacity=buffer.getCapacity()-len-1; - return buffer.getAlias()+len; - } - resultCapacity=0; - return NULL; -} - -CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) { - return appendInvariantChars(s.getBuffer(), s.length(), errorCode); -} - -CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return *this; - } - if (!uprv_isInvariantUString(uchars, ucharsLen)) { - errorCode = U_INVARIANT_CONVERSION_ERROR; - return *this; - } - if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) { - u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen); - len += ucharsLen; - buffer[len] = 0; - } - return *this; -} - -UBool CharString::ensureCapacity(int32_t capacity, - int32_t desiredCapacityHint, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - if(capacity>buffer.getCapacity()) { - if(desiredCapacityHint==0) { - desiredCapacityHint=capacity+buffer.getCapacity(); - } - if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) && - buffer.resize(capacity, len+1)==NULL - ) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - } - return TRUE; -} - -CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return *this; - } - if(s.length()==0) { - return *this; - } - char c; - if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { - append(U_FILE_SEP_CHAR, errorCode); - } - append(s, errorCode); - return *this; -} - -CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) { - char c; - if(U_SUCCESS(errorCode) && len>0 && - (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { - append(U_FILE_SEP_CHAR, errorCode); - } - return *this; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/charstr.h b/deps/node/deps/icu-small/source/common/charstr.h deleted file mode 100644 index 1a97e019..00000000 --- a/deps/node/deps/icu-small/source/common/charstr.h +++ /dev/null @@ -1,154 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2001-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 11/19/2001 aliu Creation. -* 05/19/2010 markus Rewritten from scratch -********************************************************************** -*/ - -#ifndef CHARSTRING_H -#define CHARSTRING_H - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "unicode/uobject.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -// Windows needs us to DLL-export the MaybeStackArray template specialization, -// but MacOS X cannot handle it. Same as in digitlst.h. -#if !U_PLATFORM_IS_DARWIN_BASED -template class U_COMMON_API MaybeStackArray; -#endif - -/** - * ICU-internal char * string class. - * This class does not assume or enforce any particular character encoding. - * Raw bytes can be stored. The string object owns its characters. - * A terminating NUL is stored, but the class does not prevent embedded NUL characters. - * - * This class wants to be convenient but is also deliberately minimalist. - * Please do not add methods if they only add minor convenience. - * For example: - * cs.data()[5]='a'; // no need for setCharAt(5, 'a') - */ -class U_COMMON_API CharString : public UMemory { -public: - CharString() : len(0) { buffer[0]=0; } - CharString(StringPiece s, UErrorCode &errorCode) : len(0) { - buffer[0]=0; - append(s, errorCode); - } - CharString(const CharString &s, UErrorCode &errorCode) : len(0) { - buffer[0]=0; - append(s, errorCode); - } - CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) { - buffer[0]=0; - append(s, sLength, errorCode); - } - ~CharString() {} - - /** - * Move constructor; might leave src in an undefined state. - * This string will have the same contents and state that the source string had. - */ - CharString(CharString &&src) U_NOEXCEPT; - /** - * Move assignment operator; might leave src in an undefined state. - * This string will have the same contents and state that the source string had. - * The behavior is undefined if *this and src are the same object. - */ - CharString &operator=(CharString &&src) U_NOEXCEPT; - - /** - * Replaces this string's contents with the other string's contents. - * CharString does not support the standard copy constructor nor - * the assignment operator, to make copies explicit and to - * use a UErrorCode where memory allocations might be needed. - */ - CharString ©From(const CharString &other, UErrorCode &errorCode); - - UBool isEmpty() const { return len==0; } - int32_t length() const { return len; } - char operator[](int32_t index) const { return buffer[index]; } - StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); } - - const char *data() const { return buffer.getAlias(); } - char *data() { return buffer.getAlias(); } - - /** @return last index of c, or -1 if c is not in this string */ - int32_t lastIndexOf(char c) const; - - CharString &clear() { len=0; buffer[0]=0; return *this; } - CharString &truncate(int32_t newLength); - - CharString &append(char c, UErrorCode &errorCode); - CharString &append(StringPiece s, UErrorCode &errorCode) { - return append(s.data(), s.length(), errorCode); - } - CharString &append(const CharString &s, UErrorCode &errorCode) { - return append(s.data(), s.length(), errorCode); - } - CharString &append(const char *s, int32_t sLength, UErrorCode &status); - /** - * Returns a writable buffer for appending and writes the buffer's capacity to - * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS(). - * There will additionally be space for a terminating NUL right at resultCapacity. - * (This function is similar to ByteSink.GetAppendBuffer().) - * - * The returned buffer is only valid until the next write operation - * on this string. - * - * After writing at most resultCapacity bytes, call append() with the - * pointer returned from this function and the number of bytes written. - * - * @param minCapacity required minimum capacity of the returned buffer; - * must be non-negative - * @param desiredCapacityHint desired capacity of the returned buffer; - * must be non-negative - * @param resultCapacity will be set to the capacity of the returned buffer - * @param errorCode in/out error code - * @return a buffer with resultCapacity>=min_capacity - */ - char *getAppendBuffer(int32_t minCapacity, - int32_t desiredCapacityHint, - int32_t &resultCapacity, - UErrorCode &errorCode); - - CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode); - CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode); - - /** - * Appends a filename/path part, e.g., a directory name. - * First appends a U_FILE_SEP_CHAR if necessary. - * Does nothing if s is empty. - */ - CharString &appendPathPart(StringPiece s, UErrorCode &errorCode); - - /** - * Appends a U_FILE_SEP_CHAR if this string is not empty - * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR. - */ - CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode); - -private: - MaybeStackArray buffer; - int32_t len; - - UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode); - - CharString(const CharString &other); // forbid copying of this class - CharString &operator=(const CharString &other); // forbid copying of this class -}; - -U_NAMESPACE_END - -#endif -//eof diff --git a/deps/node/deps/icu-small/source/common/cmemory.cpp b/deps/node/deps/icu-small/source/common/cmemory.cpp deleted file mode 100644 index 0b7e432c..00000000 --- a/deps/node/deps/icu-small/source/common/cmemory.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2002-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File cmemory.c ICU Heap allocation. -* All ICU heap allocation, both for C and C++ new of ICU -* class types, comes through these functions. -* -* If you have a need to replace ICU allocation, this is the -* place to do it. -* -* Note that uprv_malloc(0) returns a non-NULL pointer, and -* that a subsequent free of that pointer value is a NOP. -* -****************************************************************************** -*/ -#include "unicode/uclean.h" -#include "cmemory.h" -#include "putilimp.h" -#include "uassert.h" -#include - -/* uprv_malloc(0) returns a pointer to this read-only data. */ -static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0}; - -/* Function Pointers for user-supplied heap functions */ -static const void *pContext; -static UMemAllocFn *pAlloc; -static UMemReallocFn *pRealloc; -static UMemFreeFn *pFree; - -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) -#include -static int n=0; -static long b=0; -#endif - -U_CAPI void * U_EXPORT2 -uprv_malloc(size_t s) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) -#if 1 - putchar('>'); - fflush(stdout); -#else - fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr); -#endif -#endif - if (s > 0) { - if (pAlloc) { - return (*pAlloc)(pContext, s); - } else { - return uprv_default_malloc(s); - } - } else { - return (void *)zeroMem; - } -} - -U_CAPI void * U_EXPORT2 -uprv_realloc(void * buffer, size_t size) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - putchar('~'); - fflush(stdout); -#endif - if (buffer == zeroMem) { - return uprv_malloc(size); - } else if (size == 0) { - if (pFree) { - (*pFree)(pContext, buffer); - } else { - uprv_default_free(buffer); - } - return (void *)zeroMem; - } else { - if (pRealloc) { - return (*pRealloc)(pContext, buffer, size); - } else { - return uprv_default_realloc(buffer, size); - } - } -} - -U_CAPI void U_EXPORT2 -uprv_free(void *buffer) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - putchar('<'); - fflush(stdout); -#endif - if (buffer != zeroMem) { - if (pFree) { - (*pFree)(pContext, buffer); - } else { - uprv_default_free(buffer); - } - } -} - -U_CAPI void * U_EXPORT2 -uprv_calloc(size_t num, size_t size) { - void *mem = NULL; - size *= num; - mem = uprv_malloc(size); - if (mem) { - uprv_memset(mem, 0, size); - } - return mem; -} - -U_CAPI void U_EXPORT2 -u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status) -{ - if (U_FAILURE(*status)) { - return; - } - if (a==NULL || r==NULL || f==NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - pContext = context; - pAlloc = a; - pRealloc = r; - pFree = f; -} - - -U_CFUNC UBool cmemory_cleanup(void) { - pContext = NULL; - pAlloc = NULL; - pRealloc = NULL; - pFree = NULL; - return TRUE; -} diff --git a/deps/node/deps/icu-small/source/common/cmemory.h b/deps/node/deps/icu-small/source/common/cmemory.h deleted file mode 100644 index a6dd209d..00000000 --- a/deps/node/deps/icu-small/source/common/cmemory.h +++ /dev/null @@ -1,681 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File CMEMORY.H -* -* Contains stdlib.h/string.h memory functions -* -* @author Bertrand A. Damiba -* -* Modification History: -* -* Date Name Description -* 6/20/98 Bertrand Created. -* 05/03/99 stephen Changed from functions to macros. -* -****************************************************************************** -*/ - -#ifndef CMEMORY_H -#define CMEMORY_H - -#include "unicode/utypes.h" - -#include -#include -#include "unicode/localpointer.h" - -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) -#include -#endif - - -#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size) -#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size) - -/** - * \def UPRV_LENGTHOF - * Convenience macro to determine the length of a fixed array at compile-time. - * @param array A fixed length array - * @return The length of the array, in elements - * @internal - */ -#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) -#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size) -#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size) - -U_CAPI void * U_EXPORT2 -uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1); - -U_CAPI void * U_EXPORT2 -uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2); - -U_CAPI void U_EXPORT2 -uprv_free(void *mem); - -U_CAPI void * U_EXPORT2 -uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2); - -/** - * This should align the memory properly on any machine. - * This is very useful for the safeClone functions. - */ -typedef union { - long t1; - double t2; - void *t3; -} UAlignedMemory; - -/** - * Get the least significant bits of a pointer (a memory address). - * For example, with a mask of 3, the macro gets the 2 least significant bits, - * which will be 0 if the pointer is 32-bit (4-byte) aligned. - * - * ptrdiff_t is the most appropriate integer type to cast to. - * size_t should work too, since on most (or all?) platforms it has the same - * width as ptrdiff_t. - */ -#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask)) - -/** - * Get the amount of bytes that a pointer is off by from - * the previous UAlignedMemory-aligned pointer. - */ -#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1) - -/** - * Get the amount of bytes to add to a pointer - * in order to get the next UAlignedMemory-aligned address. - */ -#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr)) - -/** - * Heap clean up function, called from u_cleanup() - * Clears any user heap functions from u_setMemoryFunctions() - * Does NOT deallocate any remaining allocated memory. - */ -U_CFUNC UBool -cmemory_cleanup(void); - -/** - * A function called by uhash_remove, - * uhash_close, or uhash_put to delete - * an existing key or value. - * @param obj A key or value stored in a hashtable - * @see uprv_deleteUObject - */ -typedef void U_CALLCONV UObjectDeleter(void* obj); - -/** - * Deleter for UObject instances. - * Works for all subclasses of UObject because it has a virtual destructor. - */ -U_CAPI void U_EXPORT2 -uprv_deleteUObject(void *obj); - -#ifdef __cplusplus - -U_NAMESPACE_BEGIN - -/** - * "Smart pointer" class, deletes memory via uprv_free(). - * For most methods see the LocalPointerBase base class. - * Adds operator[] for array item access. - * - * @see LocalPointerBase - */ -template -class LocalMemory : public LocalPointerBase { -public: - using LocalPointerBase::operator*; - using LocalPointerBase::operator->; - /** - * Constructor takes ownership. - * @param p simple pointer to an array of T items that is adopted - */ - explicit LocalMemory(T *p=NULL) : LocalPointerBase(p) {} - /** - * Move constructor, leaves src with isNull(). - * @param src source smart pointer - */ - LocalMemory(LocalMemory &&src) U_NOEXCEPT : LocalPointerBase(src.ptr) { - src.ptr=NULL; - } - /** - * Destructor deletes the memory it owns. - */ - ~LocalMemory() { - uprv_free(LocalPointerBase::ptr); - } - /** - * Move assignment operator, leaves src with isNull(). - * The behavior is undefined if *this and src are the same object. - * @param src source smart pointer - * @return *this - */ - LocalMemory &operator=(LocalMemory &&src) U_NOEXCEPT { - return moveFrom(src); - } - /** - * Move assignment, leaves src with isNull(). - * The behavior is undefined if *this and src are the same object. - * - * Can be called explicitly, does not need C++11 support. - * @param src source smart pointer - * @return *this - */ - LocalMemory &moveFrom(LocalMemory &src) U_NOEXCEPT { - uprv_free(LocalPointerBase::ptr); - LocalPointerBase::ptr=src.ptr; - src.ptr=NULL; - return *this; - } - /** - * Swap pointers. - * @param other other smart pointer - */ - void swap(LocalMemory &other) U_NOEXCEPT { - T *temp=LocalPointerBase::ptr; - LocalPointerBase::ptr=other.ptr; - other.ptr=temp; - } - /** - * Non-member LocalMemory swap function. - * @param p1 will get p2's pointer - * @param p2 will get p1's pointer - */ - friend inline void swap(LocalMemory &p1, LocalMemory &p2) U_NOEXCEPT { - p1.swap(p2); - } - /** - * Deletes the array it owns, - * and adopts (takes ownership of) the one passed in. - * @param p simple pointer to an array of T items that is adopted - */ - void adoptInstead(T *p) { - uprv_free(LocalPointerBase::ptr); - LocalPointerBase::ptr=p; - } - /** - * Deletes the array it owns, allocates a new one and reset its bytes to 0. - * Returns the new array pointer. - * If the allocation fails, then the current array is unchanged and - * this method returns NULL. - * @param newCapacity must be >0 - * @return the allocated array pointer, or NULL if the allocation failed - */ - inline T *allocateInsteadAndReset(int32_t newCapacity=1); - /** - * Deletes the array it owns and allocates a new one, copying length T items. - * Returns the new array pointer. - * If the allocation fails, then the current array is unchanged and - * this method returns NULL. - * @param newCapacity must be >0 - * @param length number of T items to be copied from the old array to the new one; - * must be no more than the capacity of the old array, - * which the caller must track because the LocalMemory does not track it - * @return the allocated array pointer, or NULL if the allocation failed - */ - inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0); - /** - * Array item access (writable). - * No index bounds check. - * @param i array index - * @return reference to the array item - */ - T &operator[](ptrdiff_t i) const { return LocalPointerBase::ptr[i]; } -}; - -template -inline T *LocalMemory::allocateInsteadAndReset(int32_t newCapacity) { - if(newCapacity>0) { - T *p=(T *)uprv_malloc(newCapacity*sizeof(T)); - if(p!=NULL) { - uprv_memset(p, 0, newCapacity*sizeof(T)); - uprv_free(LocalPointerBase::ptr); - LocalPointerBase::ptr=p; - } - return p; - } else { - return NULL; - } -} - - -template -inline T *LocalMemory::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) { - if(newCapacity>0) { - T *p=(T *)uprv_malloc(newCapacity*sizeof(T)); - if(p!=NULL) { - if(length>0) { - if(length>newCapacity) { - length=newCapacity; - } - uprv_memcpy(p, LocalPointerBase::ptr, (size_t)length*sizeof(T)); - } - uprv_free(LocalPointerBase::ptr); - LocalPointerBase::ptr=p; - } - return p; - } else { - return NULL; - } -} - -/** - * Simple array/buffer management class using uprv_malloc() and uprv_free(). - * Provides an internal array with fixed capacity. Can alias another array - * or allocate one. - * - * The array address is properly aligned for type T. It might not be properly - * aligned for types larger than T (or larger than the largest subtype of T). - * - * Unlike LocalMemory and LocalArray, this class never adopts - * (takes ownership of) another array. - * - * WARNING: MaybeStackArray only works with primitive (plain-old data) types. - * It does NOT know how to call a destructor! If you work with classes with - * destructors, consider LocalArray in localpointer.h. - */ -template -class MaybeStackArray { -public: - /** - * Default constructor initializes with internal T[stackCapacity] buffer. - */ - MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(FALSE) {} - /** - * Automatically allocates the heap array if the argument is larger than the stack capacity. - * Intended for use when an approximate capacity is known at compile time but the true - * capacity is not known until runtime. - */ - MaybeStackArray(int32_t newCapacity) : MaybeStackArray() { - if (capacity < newCapacity) { resize(newCapacity); } - }; - /** - * Destructor deletes the array (if owned). - */ - ~MaybeStackArray() { releaseArray(); } - /** - * Move constructor: transfers ownership or copies the stack array. - */ - MaybeStackArray(MaybeStackArray &&src) U_NOEXCEPT; - /** - * Move assignment: transfers ownership or copies the stack array. - */ - MaybeStackArray &operator=(MaybeStackArray &&src) U_NOEXCEPT; - /** - * Returns the array capacity (number of T items). - * @return array capacity - */ - int32_t getCapacity() const { return capacity; } - /** - * Access without ownership change. - * @return the array pointer - */ - T *getAlias() const { return ptr; } - /** - * Returns the array limit. Simple convenience method. - * @return getAlias()+getCapacity() - */ - T *getArrayLimit() const { return getAlias()+capacity; } - // No "operator T *() const" because that can make - // expressions like mbs[index] ambiguous for some compilers. - /** - * Array item access (const). - * No index bounds check. - * @param i array index - * @return reference to the array item - */ - const T &operator[](ptrdiff_t i) const { return ptr[i]; } - /** - * Array item access (writable). - * No index bounds check. - * @param i array index - * @return reference to the array item - */ - T &operator[](ptrdiff_t i) { return ptr[i]; } - /** - * Deletes the array (if owned) and aliases another one, no transfer of ownership. - * If the arguments are illegal, then the current array is unchanged. - * @param otherArray must not be NULL - * @param otherCapacity must be >0 - */ - void aliasInstead(T *otherArray, int32_t otherCapacity) { - if(otherArray!=NULL && otherCapacity>0) { - releaseArray(); - ptr=otherArray; - capacity=otherCapacity; - needToRelease=FALSE; - } - } - /** - * Deletes the array (if owned) and allocates a new one, copying length T items. - * Returns the new array pointer. - * If the allocation fails, then the current array is unchanged and - * this method returns NULL. - * @param newCapacity can be less than or greater than the current capacity; - * must be >0 - * @param length number of T items to be copied from the old array to the new one - * @return the allocated array pointer, or NULL if the allocation failed - */ - inline T *resize(int32_t newCapacity, int32_t length=0); - /** - * Gives up ownership of the array if owned, or else clones it, - * copying length T items; resets itself to the internal stack array. - * Returns NULL if the allocation failed. - * @param length number of T items to copy when cloning, - * and capacity of the clone when cloning - * @param resultCapacity will be set to the returned array's capacity (output-only) - * @return the array pointer; - * caller becomes responsible for deleting the array - */ - inline T *orphanOrClone(int32_t length, int32_t &resultCapacity); -private: - T *ptr; - int32_t capacity; - UBool needToRelease; - T stackArray[stackCapacity]; - void releaseArray() { - if(needToRelease) { - uprv_free(ptr); - } - } - void resetToStackArray() { - ptr=stackArray; - capacity=stackCapacity; - needToRelease=FALSE; - } - /* No comparison operators with other MaybeStackArray's. */ - bool operator==(const MaybeStackArray & /*other*/) {return FALSE;} - bool operator!=(const MaybeStackArray & /*other*/) {return TRUE;} - /* No ownership transfer: No copy constructor, no assignment operator. */ - MaybeStackArray(const MaybeStackArray & /*other*/) {} - void operator=(const MaybeStackArray & /*other*/) {} - - // No heap allocation. Use only on the stack. - // (Declaring these functions private triggers a cascade of problems: - // MSVC insists on exporting an instantiation of MaybeStackArray, which - // requires that all functions be defined. - // An empty implementation of new() is rejected, it must return a value. - // Returning NULL is rejected by gcc for operator new. - // The expedient thing is just not to override operator new. - // While relatively pointless, heap allocated instances will function. - // static void * U_EXPORT2 operator new(size_t size); - // static void * U_EXPORT2 operator new[](size_t size); -#if U_HAVE_PLACEMENT_NEW - // static void * U_EXPORT2 operator new(size_t, void *ptr); -#endif -}; - -template -icu::MaybeStackArray::MaybeStackArray( - MaybeStackArray && src) U_NOEXCEPT - : ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) { - if (src.ptr == src.stackArray) { - ptr = stackArray; - uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity); - } else { - src.resetToStackArray(); // take ownership away from src - } -} - -template -inline MaybeStackArray & -MaybeStackArray::operator=(MaybeStackArray && src) U_NOEXCEPT { - releaseArray(); // in case this instance had its own memory allocated - capacity = src.capacity; - needToRelease = src.needToRelease; - if (src.ptr == src.stackArray) { - ptr = stackArray; - uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity); - } else { - ptr = src.ptr; - src.resetToStackArray(); // take ownership away from src - } - return *this; -} - -template -inline T *MaybeStackArray::resize(int32_t newCapacity, int32_t length) { - if(newCapacity>0) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - ::fprintf(::stderr,"MaybeStacArray (resize) alloc %d * %lu\n", newCapacity,sizeof(T)); -#endif - T *p=(T *)uprv_malloc(newCapacity*sizeof(T)); - if(p!=NULL) { - if(length>0) { - if(length>capacity) { - length=capacity; - } - if(length>newCapacity) { - length=newCapacity; - } - uprv_memcpy(p, ptr, (size_t)length*sizeof(T)); - } - releaseArray(); - ptr=p; - capacity=newCapacity; - needToRelease=TRUE; - } - return p; - } else { - return NULL; - } -} - -template -inline T *MaybeStackArray::orphanOrClone(int32_t length, int32_t &resultCapacity) { - T *p; - if(needToRelease) { - p=ptr; - } else if(length<=0) { - return NULL; - } else { - if(length>capacity) { - length=capacity; - } - p=(T *)uprv_malloc(length*sizeof(T)); -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T)); -#endif - if(p==NULL) { - return NULL; - } - uprv_memcpy(p, ptr, (size_t)length*sizeof(T)); - } - resultCapacity=length; - resetToStackArray(); - return p; -} - -/** - * Variant of MaybeStackArray that allocates a header struct and an array - * in one contiguous memory block, using uprv_malloc() and uprv_free(). - * Provides internal memory with fixed array capacity. Can alias another memory - * block or allocate one. - * The stackCapacity is the number of T items in the internal memory, - * not counting the H header. - * Unlike LocalMemory and LocalArray, this class never adopts - * (takes ownership of) another memory block. - */ -template -class MaybeStackHeaderAndArray { -public: - /** - * Default constructor initializes with internal H+T[stackCapacity] buffer. - */ - MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(FALSE) {} - /** - * Destructor deletes the memory (if owned). - */ - ~MaybeStackHeaderAndArray() { releaseMemory(); } - /** - * Returns the array capacity (number of T items). - * @return array capacity - */ - int32_t getCapacity() const { return capacity; } - /** - * Access without ownership change. - * @return the header pointer - */ - H *getAlias() const { return ptr; } - /** - * Returns the array start. - * @return array start, same address as getAlias()+1 - */ - T *getArrayStart() const { return reinterpret_cast(getAlias()+1); } - /** - * Returns the array limit. - * @return array limit - */ - T *getArrayLimit() const { return getArrayStart()+capacity; } - /** - * Access without ownership change. Same as getAlias(). - * A class instance can be used directly in expressions that take a T *. - * @return the header pointer - */ - operator H *() const { return ptr; } - /** - * Array item access (writable). - * No index bounds check. - * @param i array index - * @return reference to the array item - */ - T &operator[](ptrdiff_t i) { return getArrayStart()[i]; } - /** - * Deletes the memory block (if owned) and aliases another one, no transfer of ownership. - * If the arguments are illegal, then the current memory is unchanged. - * @param otherArray must not be NULL - * @param otherCapacity must be >0 - */ - void aliasInstead(H *otherMemory, int32_t otherCapacity) { - if(otherMemory!=NULL && otherCapacity>0) { - releaseMemory(); - ptr=otherMemory; - capacity=otherCapacity; - needToRelease=FALSE; - } - } - /** - * Deletes the memory block (if owned) and allocates a new one, - * copying the header and length T array items. - * Returns the new header pointer. - * If the allocation fails, then the current memory is unchanged and - * this method returns NULL. - * @param newCapacity can be less than or greater than the current capacity; - * must be >0 - * @param length number of T items to be copied from the old array to the new one - * @return the allocated pointer, or NULL if the allocation failed - */ - inline H *resize(int32_t newCapacity, int32_t length=0); - /** - * Gives up ownership of the memory if owned, or else clones it, - * copying the header and length T array items; resets itself to the internal memory. - * Returns NULL if the allocation failed. - * @param length number of T items to copy when cloning, - * and array capacity of the clone when cloning - * @param resultCapacity will be set to the returned array's capacity (output-only) - * @return the header pointer; - * caller becomes responsible for deleting the array - */ - inline H *orphanOrClone(int32_t length, int32_t &resultCapacity); -private: - H *ptr; - int32_t capacity; - UBool needToRelease; - // stackHeader must precede stackArray immediately. - H stackHeader; - T stackArray[stackCapacity]; - void releaseMemory() { - if(needToRelease) { - uprv_free(ptr); - } - } - /* No comparison operators with other MaybeStackHeaderAndArray's. */ - bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return FALSE;} - bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return TRUE;} - /* No ownership transfer: No copy constructor, no assignment operator. */ - MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {} - void operator=(const MaybeStackHeaderAndArray & /*other*/) {} - - // No heap allocation. Use only on the stack. - // (Declaring these functions private triggers a cascade of problems; - // see the MaybeStackArray class for details.) - // static void * U_EXPORT2 operator new(size_t size); - // static void * U_EXPORT2 operator new[](size_t size); -#if U_HAVE_PLACEMENT_NEW - // static void * U_EXPORT2 operator new(size_t, void *ptr); -#endif -}; - -template -inline H *MaybeStackHeaderAndArray::resize(int32_t newCapacity, - int32_t length) { - if(newCapacity>=0) { -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T)); -#endif - H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T)); - if(p!=NULL) { - if(length<0) { - length=0; - } else if(length>0) { - if(length>capacity) { - length=capacity; - } - if(length>newCapacity) { - length=newCapacity; - } - } - uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T)); - releaseMemory(); - ptr=p; - capacity=newCapacity; - needToRelease=TRUE; - } - return p; - } else { - return NULL; - } -} - -template -inline H *MaybeStackHeaderAndArray::orphanOrClone(int32_t length, - int32_t &resultCapacity) { - H *p; - if(needToRelease) { - p=ptr; - } else { - if(length<0) { - length=0; - } else if(length>capacity) { - length=capacity; - } -#if U_DEBUG && defined(UPRV_MALLOC_COUNT) - ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T)); -#endif - p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T)); - if(p==NULL) { - return NULL; - } - uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T)); - } - resultCapacity=length; - ptr=&stackHeader; - capacity=stackCapacity; - needToRelease=FALSE; - return p; -} - -U_NAMESPACE_END - -#endif /* __cplusplus */ -#endif /* CMEMORY_H */ diff --git a/deps/node/deps/icu-small/source/common/common.rc b/deps/node/deps/icu-small/source/common/common.rc deleted file mode 100644 index 5c723a27..00000000 --- a/deps/node/deps/icu-small/source/common/common.rc +++ /dev/null @@ -1,109 +0,0 @@ -// Do not edit with Microsoft Developer Studio Resource Editor. -// It will permanently substitute version numbers that are intended to be -// picked up by the pre-processor during each build. -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// Copyright (c) 2001-2010 International Business Machines -// Corporation and others. All Rights Reserved. -// -#include "msvcres.h" - -#define APSTUDIO_READONLY_SYMBOLS -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 2 resource. -// -#include -///////////////////////////////////////////////////////////////////////////// -#undef APSTUDIO_READONLY_SYMBOLS - -///////////////////////////////////////////////////////////////////////////// -// - -LANGUAGE LANG_NEUTRAL, SUBLANG_NEUTRAL -#pragma code_page(1252) - -#ifdef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// TEXTINCLUDE -// - -1 TEXTINCLUDE -BEGIN - "msvcres.h\0" -END - -2 TEXTINCLUDE -BEGIN - "#include \0" -END - -3 TEXTINCLUDE -BEGIN - "\r\n" - "\0" -END - -#endif // APSTUDIO_INVOKED - - -///////////////////////////////////////////////////////////////////////////// -// -// Version -// -#define STR(s) #s -#define CommaVersionString(a, b, c, d) STR(a) ", " STR(b) ", " STR(c) ", " STR(d) "\0" - -VS_VERSION_INFO VERSIONINFO - FILEVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM - PRODUCTVERSION U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM - FILEFLAGSMASK 0x3fL -#ifdef _DEBUG - FILEFLAGS 0x1L -#else - FILEFLAGS 0x0L -#endif - FILEOS VOS__WINDOWS32 - FILETYPE VFT_DLL - FILESUBTYPE 0x0L -BEGIN - BLOCK "StringFileInfo" - BEGIN - BLOCK "00000000" - BEGIN - VALUE "Comments", ICU_WEBSITE "\0" - VALUE "CompanyName", ICU_COMPANY "\0" - VALUE "FileDescription", ICU_PRODUCT_PREFIX " Common DLL\0" - VALUE "FileVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM) - VALUE "LegalCopyright", U_COPYRIGHT_STRING "\0" -#ifdef _DEBUG - VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT "d.dll\0" -#else - VALUE "OriginalFilename", "icuuc" U_ICU_VERSION_SHORT ".dll\0" -#endif - VALUE "PrivateBuild", "\0" - VALUE "ProductName", ICU_PRODUCT "\0" - VALUE "ProductVersion", CommaVersionString(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM, U_ICU_VERSION_PATCHLEVEL_NUM, U_ICU_VERSION_BUILDLEVEL_NUM) - VALUE "SpecialBuild", "\0" - END - END - BLOCK "VarFileInfo" - BEGIN - VALUE "Translation", 0x000, 0000 - END -END - -///////////////////////////////////////////////////////////////////////////// - - - -#ifndef APSTUDIO_INVOKED -///////////////////////////////////////////////////////////////////////////// -// -// Generated from the TEXTINCLUDE 3 resource. -// - - -///////////////////////////////////////////////////////////////////////////// -#endif // not APSTUDIO_INVOKED diff --git a/deps/node/deps/icu-small/source/common/cpputils.h b/deps/node/deps/icu-small/source/common/cpputils.h deleted file mode 100644 index 307e5704..00000000 --- a/deps/node/deps/icu-small/source/common/cpputils.h +++ /dev/null @@ -1,97 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: cpputils.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -*/ - -#ifndef CPPUTILS_H -#define CPPUTILS_H - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "cmemory.h" - -/*==========================================================================*/ -/* Array copy utility functions */ -/*==========================================================================*/ - -static -inline void uprv_arrayCopy(const double* src, double* dst, int32_t count) -{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } - -static -inline void uprv_arrayCopy(const double* src, int32_t srcStart, - double* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } - -static -inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count) - { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } - -static -inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart, - int8_t* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } - -static -inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count) -{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } - -static -inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart, - int16_t* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } - -static -inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count) -{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } - -static -inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart, - int32_t* dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } - -static -inline void -uprv_arrayCopy(const UChar *src, int32_t srcStart, - UChar *dst, int32_t dstStart, int32_t count) -{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } - -/** - * Copy an array of UnicodeString OBJECTS (not pointers). - * @internal - */ -static inline void -uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count) -{ while(count-- > 0) *dst++ = *src++; } - -/** - * Copy an array of UnicodeString OBJECTS (not pointers). - * @internal - */ -static inline void -uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart, - icu::UnicodeString *dst, int32_t dstStart, int32_t count) -{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); } - -/** - * Checks that the string is readable and writable. - * Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer(). - */ -inline void -uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) { - if(U_SUCCESS(errorCode) && s.isBogus()) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -#endif /* _CPPUTILS */ diff --git a/deps/node/deps/icu-small/source/common/cstr.cpp b/deps/node/deps/icu-small/source/common/cstr.cpp deleted file mode 100644 index 01144343..00000000 --- a/deps/node/deps/icu-small/source/common/cstr.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2015-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: charstr.cpp -*/ -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/unistr.h" - -#include "cstr.h" - -#include "charstr.h" -#include "uinvchar.h" - -U_NAMESPACE_BEGIN - -CStr::CStr(const UnicodeString &in) { - UErrorCode status = U_ZERO_ERROR; -#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8 - int32_t length = in.extract(0, in.length(), static_cast(NULL), static_cast(0)); - int32_t resultCapacity = 0; - char *buf = s.getAppendBuffer(length, length, resultCapacity, status); - if (U_SUCCESS(status)) { - in.extract(0, in.length(), buf, resultCapacity); - s.append(buf, length, status); - } -#else - // No conversion available. Convert any invariant characters; substitute '?' for the rest. - // Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the - // whole string because they require that the entire input be invariant. - char buf[2]; - for (int i=0; i -#include -#include "unicode/utypes.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" - -/* - * We hardcode case conversion for invariant characters to match our expectation - * and the compiler execution charset. - * This prevents problems on systems - * - with non-default casing behavior, like Turkish system locales where - * tolower('I') maps to dotless i and toupper('i') maps to dotted I - * - where there are no lowercase Latin characters at all, or using different - * codes (some old EBCDIC codepages) - * - * This works because the compiler usually runs on a platform where the execution - * charset includes all of the invariant characters at their expected - * code positions, so that the char * string literals in ICU code match - * the char literals here. - * - * Note that the set of lowercase Latin letters is discontiguous in EBCDIC - * and the set of uppercase Latin letters is discontiguous as well. - */ - -U_CAPI UBool U_EXPORT2 -uprv_isASCIILetter(char c) { -#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY - return - ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || - ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); -#else - return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); -#endif -} - -U_CAPI char U_EXPORT2 -uprv_toupper(char c) { -#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY - if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { - c=(char)(c+('A'-'a')); - } -#else - if('a'<=c && c<='z') { - c=(char)(c+('A'-'a')); - } -#endif - return c; -} - - -#if 0 -/* - * Commented out because cstring.h defines uprv_tolower() to be - * the same as either uprv_asciitolower() or uprv_ebcdictolower() - * to reduce the amount of code to cover with tests. - * - * Note that this uprv_tolower() definition is likely to work for most - * charset families, not just ASCII and EBCDIC, because its #else branch - * is written generically. - */ -U_CAPI char U_EXPORT2 -uprv_tolower(char c) { -#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY - if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { - c=(char)(c+('a'-'A')); - } -#else - if('A'<=c && c<='Z') { - c=(char)(c+('a'-'A')); - } -#endif - return c; -} -#endif - -U_CAPI char U_EXPORT2 -uprv_asciitolower(char c) { - if(0x41<=c && c<=0x5a) { - c=(char)(c+0x20); - } - return c; -} - -U_CAPI char U_EXPORT2 -uprv_ebcdictolower(char c) { - if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || - (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || - (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) - ) { - c=(char)(c-0x40); - } - return c; -} - - -U_CAPI char* U_EXPORT2 -T_CString_toLowerCase(char* str) -{ - char* origPtr = str; - - if (str) { - do - *str = (char)uprv_tolower(*str); - while (*(str++)); - } - - return origPtr; -} - -U_CAPI char* U_EXPORT2 -T_CString_toUpperCase(char* str) -{ - char* origPtr = str; - - if (str) { - do - *str = (char)uprv_toupper(*str); - while (*(str++)); - } - - return origPtr; -} - -/* - * Takes a int32_t and fills in a char* string with that number "radix"-based. - * Does not handle negative values (makes an empty string for them). - * Writes at most 12 chars ("-2147483647" plus NUL). - * Returns the length of the string (not including the NUL). - */ -U_CAPI int32_t U_EXPORT2 -T_CString_integerToString(char* buffer, int32_t v, int32_t radix) -{ - char tbuf[30]; - int32_t tbx = sizeof(tbuf); - uint8_t digit; - int32_t length = 0; - uint32_t uval; - - U_ASSERT(radix>=2 && radix<=16); - uval = (uint32_t) v; - if(v<0 && radix == 10) { - /* Only in base 10 do we conside numbers to be signed. */ - uval = (uint32_t)(-v); - buffer[length++] = '-'; - } - - tbx = sizeof(tbuf)-1; - tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ - do { - digit = (uint8_t)(uval % radix); - tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); - uval = uval / radix; - } while (uval != 0); - - /* copy converted number into user buffer */ - uprv_strcpy(buffer+length, tbuf+tbx); - length += sizeof(tbuf) - tbx -1; - return length; -} - - - -/* - * Takes a int64_t and fills in a char* string with that number "radix"-based. - * Writes at most 21: chars ("-9223372036854775807" plus NUL). - * Returns the length of the string, not including the terminating NULL. - */ -U_CAPI int32_t U_EXPORT2 -T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) -{ - char tbuf[30]; - int32_t tbx = sizeof(tbuf); - uint8_t digit; - int32_t length = 0; - uint64_t uval; - - U_ASSERT(radix>=2 && radix<=16); - uval = (uint64_t) v; - if(v<0 && radix == 10) { - /* Only in base 10 do we conside numbers to be signed. */ - uval = (uint64_t)(-v); - buffer[length++] = '-'; - } - - tbx = sizeof(tbuf)-1; - tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ - do { - digit = (uint8_t)(uval % radix); - tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); - uval = uval / radix; - } while (uval != 0); - - /* copy converted number into user buffer */ - uprv_strcpy(buffer+length, tbuf+tbx); - length += sizeof(tbuf) - tbx -1; - return length; -} - - -U_CAPI int32_t U_EXPORT2 -T_CString_stringToInteger(const char *integerString, int32_t radix) -{ - char *end; - return uprv_strtoul(integerString, &end, radix); - -} - -U_CAPI int U_EXPORT2 -uprv_stricmp(const char *str1, const char *str2) { - if(str1==NULL) { - if(str2==NULL) { - return 0; - } else { - return -1; - } - } else if(str2==NULL) { - return 1; - } else { - /* compare non-NULL strings lexically with lowercase */ - int rc; - unsigned char c1, c2; - - for(;;) { - c1=(unsigned char)*str1; - c2=(unsigned char)*str2; - if(c1==0) { - if(c2==0) { - return 0; - } else { - return -1; - } - } else if(c2==0) { - return 1; - } else { - /* compare non-zero characters with lowercase */ - rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); - if(rc!=0) { - return rc; - } - } - ++str1; - ++str2; - } - } -} - -U_CAPI int U_EXPORT2 -uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { - if(str1==NULL) { - if(str2==NULL) { - return 0; - } else { - return -1; - } - } else if(str2==NULL) { - return 1; - } else { - /* compare non-NULL strings lexically with lowercase */ - int rc; - unsigned char c1, c2; - - for(; n--;) { - c1=(unsigned char)*str1; - c2=(unsigned char)*str2; - if(c1==0) { - if(c2==0) { - return 0; - } else { - return -1; - } - } else if(c2==0) { - return 1; - } else { - /* compare non-zero characters with lowercase */ - rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); - if(rc!=0) { - return rc; - } - } - ++str1; - ++str2; - } - } - - return 0; -} - -U_CAPI char* U_EXPORT2 -uprv_strdup(const char *src) { - size_t len = uprv_strlen(src) + 1; - char *dup = (char *) uprv_malloc(len); - - if (dup) { - uprv_memcpy(dup, src, len); - } - - return dup; -} - -U_CAPI char* U_EXPORT2 -uprv_strndup(const char *src, int32_t n) { - char *dup; - - if(n < 0) { - dup = uprv_strdup(src); - } else { - dup = (char*)uprv_malloc(n+1); - if (dup) { - uprv_memcpy(dup, src, n); - dup[n] = 0; - } - } - - return dup; -} diff --git a/deps/node/deps/icu-small/source/common/cstring.h b/deps/node/deps/icu-small/source/common/cstring.h deleted file mode 100644 index ed0b1a7c..00000000 --- a/deps/node/deps/icu-small/source/common/cstring.h +++ /dev/null @@ -1,124 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File CSTRING.H -* -* Contains CString interface -* -* @author Helena Shih -* -* Modification History: -* -* Date Name Description -* 6/17/98 hshih Created. -* 05/03/99 stephen Changed from functions to macros. -* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower -* -****************************************************************************** -*/ - -#ifndef CSTRING_H -#define CSTRING_H 1 - -#include "unicode/utypes.h" -#include "cmemory.h" -#include -#include -#include - -#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src) -#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str) -#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2) -#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src) -#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c) -#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c) -#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c) -#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size) -#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n) -#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n) - -/** - * Is c an ASCII-repertoire letter a-z or A-Z? - * Note: The implementation is specific to whether ICU is compiled for - * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this. - */ -U_CAPI UBool U_EXPORT2 -uprv_isASCIILetter(char c); - -U_CAPI char U_EXPORT2 -uprv_toupper(char c); - - -U_CAPI char U_EXPORT2 -uprv_asciitolower(char c); - -U_CAPI char U_EXPORT2 -uprv_ebcdictolower(char c); - -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define uprv_tolower uprv_asciitolower -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define uprv_tolower uprv_ebcdictolower -#else -# error U_CHARSET_FAMILY is not valid -#endif - -#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end) -#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base) -#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base) - -/* Conversion from a digit to the character with radix base from 2-19 */ -/* May need to use U_UPPER_ORDINAL*/ -#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10)) - -U_CAPI char* U_EXPORT2 -uprv_strdup(const char *src); - -/** - * uprv_malloc n+1 bytes, and copy n bytes from src into the new string. - * Terminate with a null at offset n. If n is -1, works like uprv_strdup - * @param src - * @param n length of the input string, not including null. - * @return new string (owned by caller, use uprv_free to free). - * @internal - */ -U_CAPI char* U_EXPORT2 -uprv_strndup(const char *src, int32_t n); - -U_CAPI char* U_EXPORT2 -T_CString_toLowerCase(char* str); - -U_CAPI char* U_EXPORT2 -T_CString_toUpperCase(char* str); - -U_CAPI int32_t U_EXPORT2 -T_CString_integerToString(char *buffer, int32_t n, int32_t radix); - -U_CAPI int32_t U_EXPORT2 -T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix); - -U_CAPI int32_t U_EXPORT2 -T_CString_stringToInteger(const char *integerString, int32_t radix); - -/** - * Case-insensitive, language-independent string comparison - * limited to the ASCII character repertoire. - */ -U_CAPI int U_EXPORT2 -uprv_stricmp(const char *str1, const char *str2); - -/** - * Case-insensitive, language-independent string comparison - * limited to the ASCII character repertoire. - */ -U_CAPI int U_EXPORT2 -uprv_strnicmp(const char *str1, const char *str2, uint32_t n); - -#endif /* ! CSTRING_H */ diff --git a/deps/node/deps/icu-small/source/common/cwchar.cpp b/deps/node/deps/icu-small/source/common/cwchar.cpp deleted file mode 100644 index 4fd53111..00000000 --- a/deps/node/deps/icu-small/source/common/cwchar.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: cwchar.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001may25 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !U_HAVE_WCSCPY - -#include "cwchar.h" - -U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) { - wchar_t *start=dst; - while(*dst!=0) { - ++dst; - } - while((*dst=*src)!=0) { - ++dst; - ++src; - } - return start; -} - -U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) { - wchar_t *start=dst; - while((*dst=*src)!=0) { - ++dst; - ++src; - } - return start; -} - -U_CAPI size_t uprv_wcslen(const wchar_t *src) { - const wchar_t *start=src; - while(*src!=0) { - ++src; - } - return src-start; -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/cwchar.h b/deps/node/deps/icu-small/source/common/cwchar.h deleted file mode 100644 index 939eb599..00000000 --- a/deps/node/deps/icu-small/source/common/cwchar.h +++ /dev/null @@ -1,58 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: cwchar.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001may25 -* created by: Markus W. Scherer -* -* This file contains ICU-internal definitions of wchar_t operations. -* These definitions were moved here from cstring.h so that fewer -* ICU implementation files include wchar.h. -*/ - -#ifndef __CWCHAR_H__ -#define __CWCHAR_H__ - -#include -#include -#include "unicode/utypes.h" - -/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */ -#if U_HAVE_WCHAR_H -# include -#endif - -/*===========================================================================*/ -/* Wide-character functions */ -/*===========================================================================*/ - -/* The following are not available on all systems, defined in wchar.h or string.h. */ -#if U_HAVE_WCSCPY -# define uprv_wcscpy wcscpy -# define uprv_wcscat wcscat -# define uprv_wcslen wcslen -#else -U_CAPI wchar_t* U_EXPORT2 -uprv_wcscpy(wchar_t *dst, const wchar_t *src); -U_CAPI wchar_t* U_EXPORT2 -uprv_wcscat(wchar_t *dst, const wchar_t *src); -U_CAPI size_t U_EXPORT2 -uprv_wcslen(const wchar_t *src); -#endif - -/* The following are part of the ANSI C standard, defined in stdlib.h . */ -#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count) -#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count) - - -#endif diff --git a/deps/node/deps/icu-small/source/common/dictbe.cpp b/deps/node/deps/icu-small/source/common/dictbe.cpp deleted file mode 100644 index 0e4d0850..00000000 --- a/deps/node/deps/icu-small/source/common/dictbe.cpp +++ /dev/null @@ -1,1391 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2006-2016, International Business Machines Corporation - * and others. All Rights Reserved. - ******************************************************************************* - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "brkeng.h" -#include "dictbe.h" -#include "unicode/uniset.h" -#include "unicode/chariter.h" -#include "unicode/ubrk.h" -#include "uvectr32.h" -#include "uvector.h" -#include "uassert.h" -#include "unicode/normlzr.h" -#include "cmemory.h" -#include "dictionarydata.h" - -U_NAMESPACE_BEGIN - -/* - ****************************************************************** - */ - -DictionaryBreakEngine::DictionaryBreakEngine() { -} - -DictionaryBreakEngine::~DictionaryBreakEngine() { -} - -UBool -DictionaryBreakEngine::handles(UChar32 c) const { - return fSet.contains(c); -} - -int32_t -DictionaryBreakEngine::findBreaks( UText *text, - int32_t startPos, - int32_t endPos, - UVector32 &foundBreaks ) const { - (void)startPos; // TODO: remove this param? - int32_t result = 0; - - // Find the span of characters included in the set. - // The span to break begins at the current position in the text, and - // extends towards the start or end of the text, depending on 'reverse'. - - int32_t start = (int32_t)utext_getNativeIndex(text); - int32_t current; - int32_t rangeStart; - int32_t rangeEnd; - UChar32 c = utext_current32(text); - while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) { - utext_next32(text); // TODO: recast loop for postincrement - c = utext_current32(text); - } - rangeStart = start; - rangeEnd = current; - result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); - utext_setNativeIndex(text, current); - - return result; -} - -void -DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) { - fSet = set; - // Compact for caching - fSet.compact(); -} - -/* - ****************************************************************** - * PossibleWord - */ - -// Helper class for improving readability of the Thai/Lao/Khmer word break -// algorithm. The implementation is completely inline. - -// List size, limited by the maximum number of words in the dictionary -// that form a nested sequence. -static const int32_t POSSIBLE_WORD_LIST_MAX = 20; - -class PossibleWord { -private: - // list of word candidate lengths, in increasing length order - // TODO: bytes would be sufficient for word lengths. - int32_t count; // Count of candidates - int32_t prefix; // The longest match with a dictionary word - int32_t offset; // Offset in the text of these candidates - int32_t mark; // The preferred candidate's offset - int32_t current; // The candidate we're currently looking at - int32_t cuLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code units. - int32_t cpLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code points. - -public: - PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {}; - ~PossibleWord() {}; - - // Fill the list of candidates if needed, select the longest, and return the number found - int32_t candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ); - - // Select the currently marked candidate, point after it in the text, and invalidate self - int32_t acceptMarked( UText *text ); - - // Back up from the current candidate to the next shorter one; return TRUE if that exists - // and point the text after it - UBool backUp( UText *text ); - - // Return the longest prefix this candidate location shares with a dictionary word - // Return value is in code points. - int32_t longestPrefix() { return prefix; }; - - // Mark the current candidate as the one we like - void markCurrent() { mark = current; }; - - // Get length in code points of the marked word. - int32_t markedCPLength() { return cpLengths[mark]; }; -}; - - -int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) { - // TODO: If getIndex is too slow, use offset < 0 and add discardAll() - int32_t start = (int32_t)utext_getNativeIndex(text); - if (start != offset) { - offset = start; - count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix); - // Dictionary leaves text after longest prefix, not longest word. Back up. - if (count <= 0) { - utext_setNativeIndex(text, start); - } - } - if (count > 0) { - utext_setNativeIndex(text, start+cuLengths[count-1]); - } - current = count-1; - mark = current; - return count; -} - -int32_t -PossibleWord::acceptMarked( UText *text ) { - utext_setNativeIndex(text, offset + cuLengths[mark]); - return cuLengths[mark]; -} - - -UBool -PossibleWord::backUp( UText *text ) { - if (current > 0) { - utext_setNativeIndex(text, offset + cuLengths[--current]); - return TRUE; - } - return FALSE; -} - -/* - ****************************************************************** - * ThaiBreakEngine - */ - -// How many words in a row are "good enough"? -static const int32_t THAI_LOOKAHEAD = 3; - -// Will not combine a non-word with a preceding dictionary word longer than this -static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3; - -// Will not combine a non-word that shares at least this much prefix with a -// dictionary word, with a preceding word -static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3; - -// Ellision character -static const int32_t THAI_PAIYANNOI = 0x0E2F; - -// Repeat character -static const int32_t THAI_MAIYAMOK = 0x0E46; - -// Minimum word size -static const int32_t THAI_MIN_WORD = 2; - -// Minimum number of characters for two words -static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2; - -ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine(), - fDictionary(adoptDictionary) -{ - fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status); - if (U_SUCCESS(status)) { - setCharacters(fThaiWordSet); - } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status); - fMarkSet.add(0x0020); - fEndWordSet = fThaiWordSet; - fEndWordSet.remove(0x0E31); // MAI HAN-AKAT - fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI - fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK - fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI - fSuffixSet.add(THAI_PAIYANNOI); - fSuffixSet.add(THAI_MAIYAMOK); - - // Compact for caching. - fMarkSet.compact(); - fEndWordSet.compact(); - fBeginWordSet.compact(); - fSuffixSet.compact(); -} - -ThaiBreakEngine::~ThaiBreakEngine() { - delete fDictionary; -} - -int32_t -ThaiBreakEngine::divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const { - utext_setNativeIndex(text, rangeStart); - utext_moveIndex32(text, THAI_MIN_WORD_SPAN); - if (utext_getNativeIndex(text) >= rangeEnd) { - return 0; // Not enough characters for two words - } - utext_setNativeIndex(text, rangeStart); - - - uint32_t wordsFound = 0; - int32_t cpWordLength = 0; // Word Length in Code Points. - int32_t cuWordLength = 0; // Word length in code units (UText native indexing) - int32_t current; - UErrorCode status = U_ZERO_ERROR; - PossibleWord words[THAI_LOOKAHEAD]; - - utext_setNativeIndex(text, rangeStart); - - while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { - cpWordLength = 0; - cuWordLength = 0; - - // Look for candidate words at the current position - int32_t candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - - // If we found exactly one, use that - if (candidates == 1) { - cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - // If there was more than one, see which one can take us forward the most words - else if (candidates > 1) { - // If we're already at the end of the range, we're done - if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - do { - int32_t wordsMatched = 1; - if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound%THAI_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } - - // If we're already at the end of the range, we're done - if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - - // See if any of the possible second words is followed by a third word - do { - // If we find a third word, stop right away - if (words[(wordsFound + 2) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { - words[wordsFound % THAI_LOOKAHEAD].markCurrent(); - goto foundBest; - } - } - while (words[(wordsFound + 1) % THAI_LOOKAHEAD].backUp(text)); - } - } - while (words[wordsFound % THAI_LOOKAHEAD].backUp(text)); -foundBest: - // Set UText position to after the accepted word. - cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - - // We come here after having either found a word or not. We look ahead to the - // next word. If it's not a dictionary word, we will combine it with the word we - // just found (if there is one), but only if the preceding word does not exceed - // the threshold. - // The text iterator should now be positioned at the end of the word we found. - - UChar32 uc = 0; - if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) { - // if it is a dictionary word, do nothing. If it isn't, then if there is - // no preceding word, or the non-word shares less than the minimum threshold - // of characters with a dictionary word, then scan to resynchronize - if (words[wordsFound % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 - && (cuWordLength == 0 - || words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) { - // Look for a plausible word boundary - int32_t remaining = rangeEnd - (current+cuWordLength); - UChar32 pc; - int32_t chars = 0; - for (;;) { - int32_t pcIndex = (int32_t)utext_getNativeIndex(text); - pc = utext_next32(text); - int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; - chars += pcSize; - remaining -= pcSize; - if (remaining <= 0) { - break; - } - uc = utext_current32(text); - if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { - // Maybe. See if it's in the dictionary. - // NOTE: In the original Apple code, checked that the next - // two characters after uc were not 0x0E4C THANTHAKHAT before - // checking the dictionary. That is just a performance filter, - // but it's not clear it's faster than checking the trie. - int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - utext_setNativeIndex(text, current + cuWordLength + chars); - if (num_candidates > 0) { - break; - } - } - } - - // Bump the word count if there wasn't already one - if (cuWordLength <= 0) { - wordsFound += 1; - } - - // Update the length with the passed-over characters - cuWordLength += chars; - } - else { - // Back up to where we were for next iteration - utext_setNativeIndex(text, current+cuWordLength); - } - } - - // Never stop before a combining mark. - int32_t currPos; - while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { - utext_next32(text); - cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; - } - - // Look ahead for possible suffixes if a dictionary word does not follow. - // We do this in code rather than using a rule so that the heuristic - // resynch continues to function. For example, one of the suffix characters - // could be a typo in the middle of a word. - if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cuWordLength > 0) { - if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 - && fSuffixSet.contains(uc = utext_current32(text))) { - if (uc == THAI_PAIYANNOI) { - if (!fSuffixSet.contains(utext_previous32(text))) { - // Skip over previous end and PAIYANNOI - utext_next32(text); - int32_t paiyannoiIndex = (int32_t)utext_getNativeIndex(text); - utext_next32(text); - cuWordLength += (int32_t)utext_getNativeIndex(text) - paiyannoiIndex; // Add PAIYANNOI to word - uc = utext_current32(text); // Fetch next character - } - else { - // Restore prior position - utext_next32(text); - } - } - if (uc == THAI_MAIYAMOK) { - if (utext_previous32(text) != THAI_MAIYAMOK) { - // Skip over previous end and MAIYAMOK - utext_next32(text); - int32_t maiyamokIndex = (int32_t)utext_getNativeIndex(text); - utext_next32(text); - cuWordLength += (int32_t)utext_getNativeIndex(text) - maiyamokIndex; // Add MAIYAMOK to word - } - else { - // Restore prior position - utext_next32(text); - } - } - } - else { - utext_setNativeIndex(text, current+cuWordLength); - } - } - - // Did we find a word on this iteration? If so, push it on the break stack - if (cuWordLength > 0) { - foundBreaks.push((current+cuWordLength), status); - } - } - - // Don't return a break for the end of the dictionary range if there is one there. - if (foundBreaks.peeki() >= rangeEnd) { - (void) foundBreaks.popi(); - wordsFound -= 1; - } - - return wordsFound; -} - -/* - ****************************************************************** - * LaoBreakEngine - */ - -// How many words in a row are "good enough"? -static const int32_t LAO_LOOKAHEAD = 3; - -// Will not combine a non-word with a preceding dictionary word longer than this -static const int32_t LAO_ROOT_COMBINE_THRESHOLD = 3; - -// Will not combine a non-word that shares at least this much prefix with a -// dictionary word, with a preceding word -static const int32_t LAO_PREFIX_COMBINE_THRESHOLD = 3; - -// Minimum word size -static const int32_t LAO_MIN_WORD = 2; - -// Minimum number of characters for two words -static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2; - -LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine(), - fDictionary(adoptDictionary) -{ - fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status); - if (U_SUCCESS(status)) { - setCharacters(fLaoWordSet); - } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status); - fMarkSet.add(0x0020); - fEndWordSet = fLaoWordSet; - fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels - fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters) - fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent) - fBeginWordSet.add(0x0EC0, 0x0EC4); // prefix vowels - - // Compact for caching. - fMarkSet.compact(); - fEndWordSet.compact(); - fBeginWordSet.compact(); -} - -LaoBreakEngine::~LaoBreakEngine() { - delete fDictionary; -} - -int32_t -LaoBreakEngine::divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const { - if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) { - return 0; // Not enough characters for two words - } - - uint32_t wordsFound = 0; - int32_t cpWordLength = 0; - int32_t cuWordLength = 0; - int32_t current; - UErrorCode status = U_ZERO_ERROR; - PossibleWord words[LAO_LOOKAHEAD]; - - utext_setNativeIndex(text, rangeStart); - - while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { - cuWordLength = 0; - cpWordLength = 0; - - // Look for candidate words at the current position - int32_t candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - - // If we found exactly one, use that - if (candidates == 1) { - cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - // If there was more than one, see which one can take us forward the most words - else if (candidates > 1) { - // If we're already at the end of the range, we're done - if (utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - do { - int32_t wordsMatched = 1; - if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound%LAO_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } - - // If we're already at the end of the range, we're done - if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - - // See if any of the possible second words is followed by a third word - do { - // If we find a third word, stop right away - if (words[(wordsFound + 2) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { - words[wordsFound % LAO_LOOKAHEAD].markCurrent(); - goto foundBest; - } - } - while (words[(wordsFound + 1) % LAO_LOOKAHEAD].backUp(text)); - } - } - while (words[wordsFound % LAO_LOOKAHEAD].backUp(text)); -foundBest: - cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - - // We come here after having either found a word or not. We look ahead to the - // next word. If it's not a dictionary word, we will combine it withe the word we - // just found (if there is one), but only if the preceding word does not exceed - // the threshold. - // The text iterator should now be positioned at the end of the word we found. - if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) { - // if it is a dictionary word, do nothing. If it isn't, then if there is - // no preceding word, or the non-word shares less than the minimum threshold - // of characters with a dictionary word, then scan to resynchronize - if (words[wordsFound % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 - && (cuWordLength == 0 - || words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) { - // Look for a plausible word boundary - int32_t remaining = rangeEnd - (current + cuWordLength); - UChar32 pc; - UChar32 uc; - int32_t chars = 0; - for (;;) { - int32_t pcIndex = (int32_t)utext_getNativeIndex(text); - pc = utext_next32(text); - int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; - chars += pcSize; - remaining -= pcSize; - if (remaining <= 0) { - break; - } - uc = utext_current32(text); - if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { - // Maybe. See if it's in the dictionary. - // TODO: this looks iffy; compare with old code. - int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - utext_setNativeIndex(text, current + cuWordLength + chars); - if (num_candidates > 0) { - break; - } - } - } - - // Bump the word count if there wasn't already one - if (cuWordLength <= 0) { - wordsFound += 1; - } - - // Update the length with the passed-over characters - cuWordLength += chars; - } - else { - // Back up to where we were for next iteration - utext_setNativeIndex(text, current + cuWordLength); - } - } - - // Never stop before a combining mark. - int32_t currPos; - while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { - utext_next32(text); - cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; - } - - // Look ahead for possible suffixes if a dictionary word does not follow. - // We do this in code rather than using a rule so that the heuristic - // resynch continues to function. For example, one of the suffix characters - // could be a typo in the middle of a word. - // NOT CURRENTLY APPLICABLE TO LAO - - // Did we find a word on this iteration? If so, push it on the break stack - if (cuWordLength > 0) { - foundBreaks.push((current+cuWordLength), status); - } - } - - // Don't return a break for the end of the dictionary range if there is one there. - if (foundBreaks.peeki() >= rangeEnd) { - (void) foundBreaks.popi(); - wordsFound -= 1; - } - - return wordsFound; -} - -/* - ****************************************************************** - * BurmeseBreakEngine - */ - -// How many words in a row are "good enough"? -static const int32_t BURMESE_LOOKAHEAD = 3; - -// Will not combine a non-word with a preceding dictionary word longer than this -static const int32_t BURMESE_ROOT_COMBINE_THRESHOLD = 3; - -// Will not combine a non-word that shares at least this much prefix with a -// dictionary word, with a preceding word -static const int32_t BURMESE_PREFIX_COMBINE_THRESHOLD = 3; - -// Minimum word size -static const int32_t BURMESE_MIN_WORD = 2; - -// Minimum number of characters for two words -static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2; - -BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine(), - fDictionary(adoptDictionary) -{ - fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status); - if (U_SUCCESS(status)) { - setCharacters(fBurmeseWordSet); - } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status); - fMarkSet.add(0x0020); - fEndWordSet = fBurmeseWordSet; - fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels - - // Compact for caching. - fMarkSet.compact(); - fEndWordSet.compact(); - fBeginWordSet.compact(); -} - -BurmeseBreakEngine::~BurmeseBreakEngine() { - delete fDictionary; -} - -int32_t -BurmeseBreakEngine::divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const { - if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) { - return 0; // Not enough characters for two words - } - - uint32_t wordsFound = 0; - int32_t cpWordLength = 0; - int32_t cuWordLength = 0; - int32_t current; - UErrorCode status = U_ZERO_ERROR; - PossibleWord words[BURMESE_LOOKAHEAD]; - - utext_setNativeIndex(text, rangeStart); - - while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { - cuWordLength = 0; - cpWordLength = 0; - - // Look for candidate words at the current position - int32_t candidates = words[wordsFound%BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - - // If we found exactly one, use that - if (candidates == 1) { - cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - // If there was more than one, see which one can take us forward the most words - else if (candidates > 1) { - // If we're already at the end of the range, we're done - if (utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - do { - int32_t wordsMatched = 1; - if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound%BURMESE_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } - - // If we're already at the end of the range, we're done - if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - - // See if any of the possible second words is followed by a third word - do { - // If we find a third word, stop right away - if (words[(wordsFound + 2) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { - words[wordsFound % BURMESE_LOOKAHEAD].markCurrent(); - goto foundBest; - } - } - while (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].backUp(text)); - } - } - while (words[wordsFound % BURMESE_LOOKAHEAD].backUp(text)); -foundBest: - cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - - // We come here after having either found a word or not. We look ahead to the - // next word. If it's not a dictionary word, we will combine it withe the word we - // just found (if there is one), but only if the preceding word does not exceed - // the threshold. - // The text iterator should now be positioned at the end of the word we found. - if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) { - // if it is a dictionary word, do nothing. If it isn't, then if there is - // no preceding word, or the non-word shares less than the minimum threshold - // of characters with a dictionary word, then scan to resynchronize - if (words[wordsFound % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 - && (cuWordLength == 0 - || words[wordsFound%BURMESE_LOOKAHEAD].longestPrefix() < BURMESE_PREFIX_COMBINE_THRESHOLD)) { - // Look for a plausible word boundary - int32_t remaining = rangeEnd - (current + cuWordLength); - UChar32 pc; - UChar32 uc; - int32_t chars = 0; - for (;;) { - int32_t pcIndex = (int32_t)utext_getNativeIndex(text); - pc = utext_next32(text); - int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; - chars += pcSize; - remaining -= pcSize; - if (remaining <= 0) { - break; - } - uc = utext_current32(text); - if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { - // Maybe. See if it's in the dictionary. - // TODO: this looks iffy; compare with old code. - int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - utext_setNativeIndex(text, current + cuWordLength + chars); - if (num_candidates > 0) { - break; - } - } - } - - // Bump the word count if there wasn't already one - if (cuWordLength <= 0) { - wordsFound += 1; - } - - // Update the length with the passed-over characters - cuWordLength += chars; - } - else { - // Back up to where we were for next iteration - utext_setNativeIndex(text, current + cuWordLength); - } - } - - // Never stop before a combining mark. - int32_t currPos; - while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { - utext_next32(text); - cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; - } - - // Look ahead for possible suffixes if a dictionary word does not follow. - // We do this in code rather than using a rule so that the heuristic - // resynch continues to function. For example, one of the suffix characters - // could be a typo in the middle of a word. - // NOT CURRENTLY APPLICABLE TO BURMESE - - // Did we find a word on this iteration? If so, push it on the break stack - if (cuWordLength > 0) { - foundBreaks.push((current+cuWordLength), status); - } - } - - // Don't return a break for the end of the dictionary range if there is one there. - if (foundBreaks.peeki() >= rangeEnd) { - (void) foundBreaks.popi(); - wordsFound -= 1; - } - - return wordsFound; -} - -/* - ****************************************************************** - * KhmerBreakEngine - */ - -// How many words in a row are "good enough"? -static const int32_t KHMER_LOOKAHEAD = 3; - -// Will not combine a non-word with a preceding dictionary word longer than this -static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3; - -// Will not combine a non-word that shares at least this much prefix with a -// dictionary word, with a preceding word -static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3; - -// Minimum word size -static const int32_t KHMER_MIN_WORD = 2; - -// Minimum number of characters for two words -static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; - -KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) - : DictionaryBreakEngine(), - fDictionary(adoptDictionary) -{ - fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status); - if (U_SUCCESS(status)) { - setCharacters(fKhmerWordSet); - } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status); - fMarkSet.add(0x0020); - fEndWordSet = fKhmerWordSet; - fBeginWordSet.add(0x1780, 0x17B3); - //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels - //fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word - //fEndWordSet.remove(0x17B2); // Khmer independent vowel that can't end a word - fEndWordSet.remove(0x17D2); // KHMER SIGN COENG that combines some following characters - //fEndWordSet.remove(0x17B6, 0x17C5); // Remove dependent vowels -// fEndWordSet.remove(0x0E31); // MAI HAN-AKAT -// fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI -// fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK -// fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI -// fSuffixSet.add(THAI_PAIYANNOI); -// fSuffixSet.add(THAI_MAIYAMOK); - - // Compact for caching. - fMarkSet.compact(); - fEndWordSet.compact(); - fBeginWordSet.compact(); -// fSuffixSet.compact(); -} - -KhmerBreakEngine::~KhmerBreakEngine() { - delete fDictionary; -} - -int32_t -KhmerBreakEngine::divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const { - if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) { - return 0; // Not enough characters for two words - } - - uint32_t wordsFound = 0; - int32_t cpWordLength = 0; - int32_t cuWordLength = 0; - int32_t current; - UErrorCode status = U_ZERO_ERROR; - PossibleWord words[KHMER_LOOKAHEAD]; - - utext_setNativeIndex(text, rangeStart); - - while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { - cuWordLength = 0; - cpWordLength = 0; - - // Look for candidate words at the current position - int32_t candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - - // If we found exactly one, use that - if (candidates == 1) { - cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - - // If there was more than one, see which one can take us forward the most words - else if (candidates > 1) { - // If we're already at the end of the range, we're done - if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - do { - int32_t wordsMatched = 1; - if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { - if (wordsMatched < 2) { - // Followed by another dictionary word; mark first word as a good candidate - words[wordsFound % KHMER_LOOKAHEAD].markCurrent(); - wordsMatched = 2; - } - - // If we're already at the end of the range, we're done - if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { - goto foundBest; - } - - // See if any of the possible second words is followed by a third word - do { - // If we find a third word, stop right away - if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { - words[wordsFound % KHMER_LOOKAHEAD].markCurrent(); - goto foundBest; - } - } - while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text)); - } - } - while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text)); -foundBest: - cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text); - cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength(); - wordsFound += 1; - } - - // We come here after having either found a word or not. We look ahead to the - // next word. If it's not a dictionary word, we will combine it with the word we - // just found (if there is one), but only if the preceding word does not exceed - // the threshold. - // The text iterator should now be positioned at the end of the word we found. - if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) { - // if it is a dictionary word, do nothing. If it isn't, then if there is - // no preceding word, or the non-word shares less than the minimum threshold - // of characters with a dictionary word, then scan to resynchronize - if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 - && (cuWordLength == 0 - || words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) { - // Look for a plausible word boundary - int32_t remaining = rangeEnd - (current+cuWordLength); - UChar32 pc; - UChar32 uc; - int32_t chars = 0; - for (;;) { - int32_t pcIndex = (int32_t)utext_getNativeIndex(text); - pc = utext_next32(text); - int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; - chars += pcSize; - remaining -= pcSize; - if (remaining <= 0) { - break; - } - uc = utext_current32(text); - if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { - // Maybe. See if it's in the dictionary. - int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); - utext_setNativeIndex(text, current+cuWordLength+chars); - if (num_candidates > 0) { - break; - } - } - } - - // Bump the word count if there wasn't already one - if (cuWordLength <= 0) { - wordsFound += 1; - } - - // Update the length with the passed-over characters - cuWordLength += chars; - } - else { - // Back up to where we were for next iteration - utext_setNativeIndex(text, current+cuWordLength); - } - } - - // Never stop before a combining mark. - int32_t currPos; - while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { - utext_next32(text); - cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; - } - - // Look ahead for possible suffixes if a dictionary word does not follow. - // We do this in code rather than using a rule so that the heuristic - // resynch continues to function. For example, one of the suffix characters - // could be a typo in the middle of a word. -// if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) { -// if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 -// && fSuffixSet.contains(uc = utext_current32(text))) { -// if (uc == KHMER_PAIYANNOI) { -// if (!fSuffixSet.contains(utext_previous32(text))) { -// // Skip over previous end and PAIYANNOI -// utext_next32(text); -// utext_next32(text); -// wordLength += 1; // Add PAIYANNOI to word -// uc = utext_current32(text); // Fetch next character -// } -// else { -// // Restore prior position -// utext_next32(text); -// } -// } -// if (uc == KHMER_MAIYAMOK) { -// if (utext_previous32(text) != KHMER_MAIYAMOK) { -// // Skip over previous end and MAIYAMOK -// utext_next32(text); -// utext_next32(text); -// wordLength += 1; // Add MAIYAMOK to word -// } -// else { -// // Restore prior position -// utext_next32(text); -// } -// } -// } -// else { -// utext_setNativeIndex(text, current+wordLength); -// } -// } - - // Did we find a word on this iteration? If so, push it on the break stack - if (cuWordLength > 0) { - foundBreaks.push((current+cuWordLength), status); - } - } - - // Don't return a break for the end of the dictionary range if there is one there. - if (foundBreaks.peeki() >= rangeEnd) { - (void) foundBreaks.popi(); - wordsFound -= 1; - } - - return wordsFound; -} - -#if !UCONFIG_NO_NORMALIZATION -/* - ****************************************************************** - * CjkBreakEngine - */ -static const uint32_t kuint32max = 0xFFFFFFFF; -CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status) -: DictionaryBreakEngine(), fDictionary(adoptDictionary) { - // Korean dictionary only includes Hangul syllables - fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status); - fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status); - fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status); - fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status); - nfkcNorm2 = Normalizer2::getNFKCInstance(status); - - if (U_SUCCESS(status)) { - // handle Korean and Japanese/Chinese using different dictionaries - if (type == kKorean) { - setCharacters(fHangulWordSet); - } else { //Chinese and Japanese - UnicodeSet cjSet; - cjSet.addAll(fHanWordSet); - cjSet.addAll(fKatakanaWordSet); - cjSet.addAll(fHiraganaWordSet); - cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK - cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK - setCharacters(cjSet); - } - } -} - -CjkBreakEngine::~CjkBreakEngine(){ - delete fDictionary; -} - -// The katakanaCost values below are based on the length frequencies of all -// katakana phrases in the dictionary -static const int32_t kMaxKatakanaLength = 8; -static const int32_t kMaxKatakanaGroupLength = 20; -static const uint32_t maxSnlp = 255; - -static inline uint32_t getKatakanaCost(int32_t wordLength){ - //TODO: fill array with actual values from dictionary! - static const uint32_t katakanaCost[kMaxKatakanaLength + 1] - = {8192, 984, 408, 240, 204, 252, 300, 372, 480}; - return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength]; -} - -static inline bool isKatakana(UChar32 value) { - return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) || - (value >= 0xFF66 && value <= 0xFF9f); -} - - -// Function for accessing internal utext flags. -// Replicates an internal UText function. - -static inline int32_t utext_i32_flag(int32_t bitIndex) { - return (int32_t)1 << bitIndex; -} - - -/* - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks vector to receive the break positions - * @return The number of breaks found - */ -int32_t -CjkBreakEngine::divideUpDictionaryRange( UText *inText, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const { - if (rangeStart >= rangeEnd) { - return 0; - } - - // UnicodeString version of input UText, NFKC normalized if necessary. - UnicodeString inString; - - // inputMap[inStringIndex] = corresponding native index from UText inText. - // If NULL then mapping is 1:1 - LocalPointer inputMap; - - UErrorCode status = U_ZERO_ERROR; - - - // if UText has the input string as one contiguous UTF-16 chunk - if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) && - inText->chunkNativeStart <= rangeStart && - inText->chunkNativeLimit >= rangeEnd && - inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) { - - // Input UText is in one contiguous UTF-16 chunk. - // Use Read-only aliasing UnicodeString. - inString.setTo(FALSE, - inText->chunkContents + rangeStart - inText->chunkNativeStart, - rangeEnd - rangeStart); - } else { - // Copy the text from the original inText (UText) to inString (UnicodeString). - // Create a map from UnicodeString indices -> UText offsets. - utext_setNativeIndex(inText, rangeStart); - int32_t limit = rangeEnd; - U_ASSERT(limit <= utext_nativeLength(inText)); - if (limit > utext_nativeLength(inText)) { - limit = (int32_t)utext_nativeLength(inText); - } - inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status); - if (U_FAILURE(status)) { - return 0; - } - while (utext_getNativeIndex(inText) < limit) { - int32_t nativePosition = (int32_t)utext_getNativeIndex(inText); - UChar32 c = utext_next32(inText); - U_ASSERT(c != U_SENTINEL); - inString.append(c); - while (inputMap->size() < inString.length()) { - inputMap->addElement(nativePosition, status); - } - } - inputMap->addElement(limit, status); - } - - - if (!nfkcNorm2->isNormalized(inString, status)) { - UnicodeString normalizedInput; - // normalizedMap[normalizedInput position] == original UText position. - LocalPointer normalizedMap(new UVector32(status), status); - if (U_FAILURE(status)) { - return 0; - } - - UnicodeString fragment; - UnicodeString normalizedFragment; - for (int32_t srcI = 0; srcI < inString.length();) { // Once per normalization chunk - fragment.remove(); - int32_t fragmentStartI = srcI; - UChar32 c = inString.char32At(srcI); - for (;;) { - fragment.append(c); - srcI = inString.moveIndex32(srcI, 1); - if (srcI == inString.length()) { - break; - } - c = inString.char32At(srcI); - if (nfkcNorm2->hasBoundaryBefore(c)) { - break; - } - } - nfkcNorm2->normalize(fragment, normalizedFragment, status); - normalizedInput.append(normalizedFragment); - - // Map every position in the normalized chunk to the start of the chunk - // in the original input. - int32_t fragmentOriginalStart = inputMap.isValid() ? - inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeStart; - while (normalizedMap->size() < normalizedInput.length()) { - normalizedMap->addElement(fragmentOriginalStart, status); - if (U_FAILURE(status)) { - break; - } - } - } - U_ASSERT(normalizedMap->size() == normalizedInput.length()); - int32_t nativeEnd = inputMap.isValid() ? - inputMap->elementAti(inString.length()) : inString.length()+rangeStart; - normalizedMap->addElement(nativeEnd, status); - - inputMap.moveFrom(normalizedMap); - inString.moveFrom(normalizedInput); - } - - int32_t numCodePts = inString.countChar32(); - if (numCodePts != inString.length()) { - // There are supplementary characters in the input. - // The dictionary will produce boundary positions in terms of code point indexes, - // not in terms of code unit string indexes. - // Use the inputMap mechanism to take care of this in addition to indexing differences - // from normalization and/or UTF-8 input. - UBool hadExistingMap = inputMap.isValid(); - if (!hadExistingMap) { - inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status); - if (U_FAILURE(status)) { - return 0; - } - } - int32_t cpIdx = 0; - for (int32_t cuIdx = 0; ; cuIdx = inString.moveIndex32(cuIdx, 1)) { - U_ASSERT(cuIdx >= cpIdx); - if (hadExistingMap) { - inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx); - } else { - inputMap->addElement(cuIdx+rangeStart, status); - } - cpIdx++; - if (cuIdx == inString.length()) { - break; - } - } - } - - // bestSnlp[i] is the snlp of the best segmentation of the first i - // code points in the range to be matched. - UVector32 bestSnlp(numCodePts + 1, status); - bestSnlp.addElement(0, status); - for(int32_t i = 1; i <= numCodePts; i++) { - bestSnlp.addElement(kuint32max, status); - } - - - // prev[i] is the index of the last CJK code point in the previous word in - // the best segmentation of the first i characters. - UVector32 prev(numCodePts + 1, status); - for(int32_t i = 0; i <= numCodePts; i++){ - prev.addElement(-1, status); - } - - const int32_t maxWordSize = 20; - UVector32 values(numCodePts, status); - values.setSize(numCodePts); - UVector32 lengths(numCodePts, status); - lengths.setSize(numCodePts); - - UText fu = UTEXT_INITIALIZER; - utext_openUnicodeString(&fu, &inString, &status); - - // Dynamic programming to find the best segmentation. - - // In outer loop, i is the code point index, - // ix is the corresponding string (code unit) index. - // They differ when the string contains supplementary characters. - int32_t ix = 0; - bool is_prev_katakana = false; - for (int32_t i = 0; i < numCodePts; ++i, ix = inString.moveIndex32(ix, 1)) { - if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) { - continue; - } - - int32_t count; - utext_setNativeIndex(&fu, ix); - count = fDictionary->matches(&fu, maxWordSize, numCodePts, - NULL, lengths.getBuffer(), values.getBuffer(), NULL); - // Note: lengths is filled with code point lengths - // The NULL parameter is the ignored code unit lengths. - - // if there are no single character matches found in the dictionary - // starting with this character, treat character as a 1-character word - // with the highest value possible, i.e. the least likely to occur. - // Exclude Korean characters from this treatment, as they should be left - // together by default. - if ((count == 0 || lengths.elementAti(0) != 1) && - !fHangulWordSet.contains(inString.char32At(ix))) { - values.setElementAt(maxSnlp, count); // 255 - lengths.setElementAt(1, count++); - } - - for (int32_t j = 0; j < count; j++) { - uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)values.elementAti(j); - int32_t ln_j_i = lengths.elementAti(j) + i; - if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) { - bestSnlp.setElementAt(newSnlp, ln_j_i); - prev.setElementAt(i, ln_j_i); - } - } - - // In Japanese, - // Katakana word in single character is pretty rare. So we apply - // the following heuristic to Katakana: any continuous run of Katakana - // characters is considered a candidate word with a default cost - // specified in the katakanaCost table according to its length. - - bool is_katakana = isKatakana(inString.char32At(ix)); - int32_t katakanaRunLength = 1; - if (!is_prev_katakana && is_katakana) { - int32_t j = inString.moveIndex32(ix, 1); - // Find the end of the continuous run of Katakana characters - while (j < inString.length() && katakanaRunLength < kMaxKatakanaGroupLength && - isKatakana(inString.char32At(j))) { - j = inString.moveIndex32(j, 1); - katakanaRunLength++; - } - if (katakanaRunLength < kMaxKatakanaGroupLength) { - uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength); - if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) { - bestSnlp.setElementAt(newSnlp, i+katakanaRunLength); - prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i; - } - } - } - is_prev_katakana = is_katakana; - } - utext_close(&fu); - - // Start pushing the optimal offset index into t_boundary (t for tentative). - // prev[numCodePts] is guaranteed to be meaningful. - // We'll first push in the reverse order, i.e., - // t_boundary[0] = numCodePts, and afterwards do a swap. - UVector32 t_boundary(numCodePts+1, status); - - int32_t numBreaks = 0; - // No segmentation found, set boundary to end of range - if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) { - t_boundary.addElement(numCodePts, status); - numBreaks++; - } else { - for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) { - t_boundary.addElement(i, status); - numBreaks++; - } - U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0); - } - - // Add a break for the start of the dictionary range if there is not one - // there already. - if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) { - t_boundary.addElement(0, status); - numBreaks++; - } - - // Now that we're done, convert positions in t_boundary[] (indices in - // the normalized input string) back to indices in the original input UText - // while reversing t_boundary and pushing values to foundBreaks. - int32_t prevCPPos = -1; - int32_t prevUTextPos = -1; - for (int32_t i = numBreaks-1; i >= 0; i--) { - int32_t cpPos = t_boundary.elementAti(i); - U_ASSERT(cpPos > prevCPPos); - int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart; - U_ASSERT(utextPos >= prevUTextPos); - if (utextPos > prevUTextPos) { - // Boundaries are added to foundBreaks output in ascending order. - U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos); - foundBreaks.push(utextPos, status); - } else { - // Normalization expanded the input text, the dictionary found a boundary - // within the expansion, giving two boundaries with the same index in the - // original text. Ignore the second. See ticket #12918. - --numBreaks; - } - prevCPPos = cpPos; - prevUTextPos = utextPos; - } - (void)prevCPPos; // suppress compiler warnings about unused variable - - // inString goes out of scope - // inputMap goes out of scope - return numBreaks; -} -#endif - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/dictbe.h b/deps/node/deps/icu-small/source/common/dictbe.h deleted file mode 100644 index 99d176cc..00000000 --- a/deps/node/deps/icu-small/source/common/dictbe.h +++ /dev/null @@ -1,402 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2006-2014, International Business Machines Corporation * - * and others. All Rights Reserved. * - ******************************************************************************* - */ - -#ifndef DICTBE_H -#define DICTBE_H - -#include "unicode/utypes.h" -#include "unicode/uniset.h" -#include "unicode/utext.h" - -#include "brkeng.h" -#include "uvectr32.h" - -U_NAMESPACE_BEGIN - -class DictionaryMatcher; -class Normalizer2; - -/******************************************************************* - * DictionaryBreakEngine - */ - -/** - *

DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a - * dictionary to determine language-specific breaks.

- * - *

After it is constructed a DictionaryBreakEngine may be shared between - * threads without synchronization.

- */ -class DictionaryBreakEngine : public LanguageBreakEngine { - private: - /** - * The set of characters handled by this engine - * @internal - */ - - UnicodeSet fSet; - - public: - - /** - *

Constructor

- */ - DictionaryBreakEngine(); - - /** - *

Virtual destructor.

- */ - virtual ~DictionaryBreakEngine(); - - /** - *

Indicate whether this engine handles a particular character for - * a particular kind of break.

- * - * @param c A character which begins a run that the engine might handle - * @return TRUE if this engine handles the particular character and break - * type. - */ - virtual UBool handles(UChar32 c) const; - - /** - *

Find any breaks within a run in the supplied text.

- * - * @param text A UText representing the text. The iterator is left at - * the end of the run of characters which the engine is capable of handling - * that starts from the first character in the range. - * @param startPos The start of the run within the supplied text. - * @param endPos The end of the run within the supplied text. - * @param foundBreaks vector of int32_t to receive the break positions - * @return The number of breaks found. - */ - virtual int32_t findBreaks( UText *text, - int32_t startPos, - int32_t endPos, - UVector32 &foundBreaks ) const; - - protected: - - /** - *

Set the character set handled by this engine.

- * - * @param set A UnicodeSet of the set of characters handled by the engine - */ - virtual void setCharacters( const UnicodeSet &set ); - - /** - *

Divide up a range of known dictionary characters handled by this break engine.

- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const = 0; - -}; - -/******************************************************************* - * ThaiBreakEngine - */ - -/** - *

ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a - * dictionary and heuristics to determine Thai-specific breaks.

- * - *

After it is constructed a ThaiBreakEngine may be shared between - * threads without synchronization.

- */ -class ThaiBreakEngine : public DictionaryBreakEngine { - private: - /** - * The set of characters handled by this engine - * @internal - */ - - UnicodeSet fThaiWordSet; - UnicodeSet fEndWordSet; - UnicodeSet fBeginWordSet; - UnicodeSet fSuffixSet; - UnicodeSet fMarkSet; - DictionaryMatcher *fDictionary; - - public: - - /** - *

Default constructor.

- * - * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the - * engine is deleted. - */ - ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); - - /** - *

Virtual destructor.

- */ - virtual ~ThaiBreakEngine(); - - protected: - /** - *

Divide up a range of known dictionary characters handled by this break engine.

- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const; - -}; - -/******************************************************************* - * LaoBreakEngine - */ - -/** - *

LaoBreakEngine is a kind of DictionaryBreakEngine that uses a - * dictionary and heuristics to determine Lao-specific breaks.

- * - *

After it is constructed a LaoBreakEngine may be shared between - * threads without synchronization.

- */ -class LaoBreakEngine : public DictionaryBreakEngine { - private: - /** - * The set of characters handled by this engine - * @internal - */ - - UnicodeSet fLaoWordSet; - UnicodeSet fEndWordSet; - UnicodeSet fBeginWordSet; - UnicodeSet fMarkSet; - DictionaryMatcher *fDictionary; - - public: - - /** - *

Default constructor.

- * - * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the - * engine is deleted. - */ - LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); - - /** - *

Virtual destructor.

- */ - virtual ~LaoBreakEngine(); - - protected: - /** - *

Divide up a range of known dictionary characters handled by this break engine.

- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const; - -}; - -/******************************************************************* - * BurmeseBreakEngine - */ - -/** - *

BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a - * DictionaryMatcher and heuristics to determine Burmese-specific breaks.

- * - *

After it is constructed a BurmeseBreakEngine may be shared between - * threads without synchronization.

- */ -class BurmeseBreakEngine : public DictionaryBreakEngine { - private: - /** - * The set of characters handled by this engine - * @internal - */ - - UnicodeSet fBurmeseWordSet; - UnicodeSet fEndWordSet; - UnicodeSet fBeginWordSet; - UnicodeSet fMarkSet; - DictionaryMatcher *fDictionary; - - public: - - /** - *

Default constructor.

- * - * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the - * engine is deleted. - */ - BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); - - /** - *

Virtual destructor.

- */ - virtual ~BurmeseBreakEngine(); - - protected: - /** - *

Divide up a range of known dictionary characters.

- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const; - -}; - -/******************************************************************* - * KhmerBreakEngine - */ - -/** - *

KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a - * DictionaryMatcher and heuristics to determine Khmer-specific breaks.

- * - *

After it is constructed a KhmerBreakEngine may be shared between - * threads without synchronization.

- */ -class KhmerBreakEngine : public DictionaryBreakEngine { - private: - /** - * The set of characters handled by this engine - * @internal - */ - - UnicodeSet fKhmerWordSet; - UnicodeSet fEndWordSet; - UnicodeSet fBeginWordSet; - UnicodeSet fMarkSet; - DictionaryMatcher *fDictionary; - - public: - - /** - *

Default constructor.

- * - * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the - * engine is deleted. - */ - KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); - - /** - *

Virtual destructor.

- */ - virtual ~KhmerBreakEngine(); - - protected: - /** - *

Divide up a range of known dictionary characters.

- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const; - -}; - -#if !UCONFIG_NO_NORMALIZATION - -/******************************************************************* - * CjkBreakEngine - */ - -//indicates language/script that the CjkBreakEngine will handle -enum LanguageType { - kKorean, - kChineseJapanese -}; - -/** - *

CjkBreakEngine is a kind of DictionaryBreakEngine that uses a - * dictionary with costs associated with each word and - * Viterbi decoding to determine CJK-specific breaks.

- */ -class CjkBreakEngine : public DictionaryBreakEngine { - protected: - /** - * The set of characters handled by this engine - * @internal - */ - UnicodeSet fHangulWordSet; - UnicodeSet fHanWordSet; - UnicodeSet fKatakanaWordSet; - UnicodeSet fHiraganaWordSet; - - DictionaryMatcher *fDictionary; - const Normalizer2 *nfkcNorm2; - - public: - - /** - *

Default constructor.

- * - * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the - * engine is deleted. The DictionaryMatcher must contain costs for each word - * in order for the dictionary to work properly. - */ - CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status); - - /** - *

Virtual destructor.

- */ - virtual ~CjkBreakEngine(); - - protected: - /** - *

Divide up a range of known dictionary characters handled by this break engine.

- * - * @param text A UText representing the text - * @param rangeStart The start of the range of dictionary characters - * @param rangeEnd The end of the range of dictionary characters - * @param foundBreaks Output of C array of int32_t break positions, or 0 - * @return The number of breaks found - */ - virtual int32_t divideUpDictionaryRange( UText *text, - int32_t rangeStart, - int32_t rangeEnd, - UVector32 &foundBreaks ) const; - -}; - -#endif - -U_NAMESPACE_END - - /* DICTBE_H */ -#endif diff --git a/deps/node/deps/icu-small/source/common/dictionarydata.cpp b/deps/node/deps/icu-small/source/common/dictionarydata.cpp deleted file mode 100644 index 0efa5874..00000000 --- a/deps/node/deps/icu-small/source/common/dictionarydata.cpp +++ /dev/null @@ -1,242 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2014-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* dictionarydata.h -* -* created on: 2012may31 -* created by: Markus W. Scherer & Maxime Serrano -*/ - -#include "dictionarydata.h" -#include "unicode/ucharstrie.h" -#include "unicode/bytestrie.h" -#include "unicode/udata.h" -#include "cmemory.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -U_NAMESPACE_BEGIN - -const int32_t DictionaryData::TRIE_TYPE_BYTES = 0; -const int32_t DictionaryData::TRIE_TYPE_UCHARS = 1; -const int32_t DictionaryData::TRIE_TYPE_MASK = 7; -const int32_t DictionaryData::TRIE_HAS_VALUES = 8; - -const int32_t DictionaryData::TRANSFORM_NONE = 0; -const int32_t DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000; -const int32_t DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000; -const int32_t DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff; - -DictionaryMatcher::~DictionaryMatcher() { -} - -UCharsDictionaryMatcher::~UCharsDictionaryMatcher() { - udata_close(file); -} - -int32_t UCharsDictionaryMatcher::getType() const { - return DictionaryData::TRIE_TYPE_UCHARS; -} - -int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit, - int32_t *lengths, int32_t *cpLengths, int32_t *values, - int32_t *prefix) const { - - UCharsTrie uct(characters); - int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text); - int32_t wordCount = 0; - int32_t codePointsMatched = 0; - - for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { - UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c); - int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex; - codePointsMatched += 1; - if (USTRINGTRIE_HAS_VALUE(result)) { - if (wordCount < limit) { - if (values != NULL) { - values[wordCount] = uct.getValue(); - } - if (lengths != NULL) { - lengths[wordCount] = lengthMatched; - } - if (cpLengths != NULL) { - cpLengths[wordCount] = codePointsMatched; - } - ++wordCount; - } - if (result == USTRINGTRIE_FINAL_VALUE) { - break; - } - } - else if (result == USTRINGTRIE_NO_MATCH) { - break; - } - if (lengthMatched >= maxLength) { - break; - } - } - - if (prefix != NULL) { - *prefix = codePointsMatched; - } - return wordCount; -} - -BytesDictionaryMatcher::~BytesDictionaryMatcher() { - udata_close(file); -} - -UChar32 BytesDictionaryMatcher::transform(UChar32 c) const { - if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) { - if (c == 0x200D) { - return 0xFF; - } else if (c == 0x200C) { - return 0xFE; - } - int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK); - if (delta < 0 || 0xFD < delta) { - return U_SENTINEL; - } - return (UChar32)delta; - } - return c; -} - -int32_t BytesDictionaryMatcher::getType() const { - return DictionaryData::TRIE_TYPE_BYTES; -} - -int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit, - int32_t *lengths, int32_t *cpLengths, int32_t *values, - int32_t *prefix) const { - BytesTrie bt(characters); - int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text); - int32_t wordCount = 0; - int32_t codePointsMatched = 0; - - for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { - UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c)); - int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex; - codePointsMatched += 1; - if (USTRINGTRIE_HAS_VALUE(result)) { - if (wordCount < limit) { - if (values != NULL) { - values[wordCount] = bt.getValue(); - } - if (lengths != NULL) { - lengths[wordCount] = lengthMatched; - } - if (cpLengths != NULL) { - cpLengths[wordCount] = codePointsMatched; - } - ++wordCount; - } - if (result == USTRINGTRIE_FINAL_VALUE) { - break; - } - } - else if (result == USTRINGTRIE_NO_MATCH) { - break; - } - if (lengthMatched >= maxLength) { - break; - } - } - - if (prefix != NULL) { - *prefix = codePointsMatched; - } - return wordCount; -} - - -U_NAMESPACE_END - -U_NAMESPACE_USE - -U_CAPI int32_t U_EXPORT2 -udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, - void *outData, UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - const uint8_t *inBytes; - uint8_t *outBytes; - const int32_t *inIndexes; - int32_t indexes[DictionaryData::IX_COUNT]; - int32_t i, offset, size; - - headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0; - pInfo = (const UDataInfo *)((const char *)inData + 4); - if (!(pInfo->dataFormat[0] == 0x44 && - pInfo->dataFormat[1] == 0x69 && - pInfo->dataFormat[2] == 0x63 && - pInfo->dataFormat[3] == 0x74 && - pInfo->formatVersion[0] == 1)) { - udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); - *pErrorCode = U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes = (const uint8_t *)inData + headerSize; - outBytes = (uint8_t *)outData + headerSize; - - inIndexes = (const int32_t *)inBytes; - if (length >= 0) { - length -= headerSize; - if (length < (int32_t)(sizeof(indexes))) { - udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length); - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - for (i = 0; i < DictionaryData::IX_COUNT; i++) { - indexes[i] = udata_readInt32(ds, inIndexes[i]); - } - - size = indexes[DictionaryData::IX_TOTAL_SIZE]; - - if (length >= 0) { - if (length < size) { - udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length); - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - if (inBytes != outBytes) { - uprv_memcpy(outBytes, inBytes, size); - } - - offset = 0; - ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode); - offset = (int32_t)sizeof(indexes); - int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; - int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET]; - - if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { - ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode); - } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) { - // nothing to do - } else { - udata_printError(ds, "udict_swap(): unknown trie type!\n"); - *pErrorCode = U_UNSUPPORTED_ERROR; - return 0; - } - - // these next two sections are empty in the current format, - // but may be used later. - offset = nextOffset; - nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; - offset = nextOffset; - nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; - offset = nextOffset; - } - return headerSize + size; -} -#endif diff --git a/deps/node/deps/icu-small/source/common/dictionarydata.h b/deps/node/deps/icu-small/source/common/dictionarydata.h deleted file mode 100644 index 5aec8fe0..00000000 --- a/deps/node/deps/icu-small/source/common/dictionarydata.h +++ /dev/null @@ -1,191 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* dictionarydata.h -* -* created on: 2012may31 -* created by: Markus W. Scherer & Maxime Serrano -*/ - -#ifndef __DICTIONARYDATA_H__ -#define __DICTIONARYDATA_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/utext.h" -#include "unicode/udata.h" -#include "udataswp.h" -#include "unicode/uobject.h" -#include "unicode/ustringtrie.h" - -U_NAMESPACE_BEGIN - -class UCharsTrie; -class BytesTrie; - -class U_COMMON_API DictionaryData : public UMemory { -public: - static const int32_t TRIE_TYPE_BYTES; // = 0; - static const int32_t TRIE_TYPE_UCHARS; // = 1; - static const int32_t TRIE_TYPE_MASK; // = 7; - static const int32_t TRIE_HAS_VALUES; // = 8; - - static const int32_t TRANSFORM_NONE; // = 0; - static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000; - static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000; - static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff; - - enum { - // Byte offsets from the start of the data, after the generic header. - IX_STRING_TRIE_OFFSET, - IX_RESERVED1_OFFSET, - IX_RESERVED2_OFFSET, - IX_TOTAL_SIZE, - - // Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc. - IX_TRIE_TYPE, - // Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc. - IX_TRANSFORM, - - IX_RESERVED6, - IX_RESERVED7, - IX_COUNT - }; -}; - -/** - * Wrapper class around generic dictionaries, implementing matches(). - * getType() should return a TRIE_TYPE_??? constant from DictionaryData. - * - * All implementations of this interface must be thread-safe if they are to be used inside of the - * dictionary-based break iteration code. - */ -class U_COMMON_API DictionaryMatcher : public UMemory { -public: - DictionaryMatcher() {}; - virtual ~DictionaryMatcher(); - // this should emulate CompactTrieDictionary::matches() - /* @param text The text in which to look for matching words. Matching begins - * at the current position of the UText. - * @param maxLength The max length of match to consider. Units are the native indexing - * units of the UText. - * @param limit Capacity of output arrays, which is also the maximum number of - * matching words to be found. - * @param lengths output array, filled with the lengths of the matches, in order, - * from shortest to longest. Lengths are in native indexing units - * of the UText. May be NULL. - * @param cpLengths output array, filled with the lengths of the matches, in order, - * from shortest to longest. Lengths are the number of Unicode code points. - * May be NULL. - * @param values Output array, filled with the values associated with the words found. - * May be NULL. - * @param prefix Output parameter, the code point length of the prefix match, even if that - * prefix didn't lead to a complete word. Will always be >= the cpLength - * of the longest complete word matched. May be NULL. - * @return Number of matching words found. - */ - virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, - int32_t *lengths, int32_t *cpLengths, int32_t *values, - int32_t *prefix) const = 0; - - /** @return DictionaryData::TRIE_TYPE_XYZ */ - virtual int32_t getType() const = 0; -}; - -// Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary -class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher { -public: - // constructs a new UCharsDictionaryMatcher. - // The UDataMemory * will be closed on this object's destruction. - UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { } - virtual ~UCharsDictionaryMatcher(); - virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, - int32_t *lengths, int32_t *cpLengths, int32_t *values, - int32_t *prefix) const; - virtual int32_t getType() const; -private: - const UChar *characters; - UDataMemory *file; -}; - -// Implementation of the DictionaryMatcher interface for a BytesTrie dictionary -class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher { -public: - // constructs a new BytesTrieDictionaryMatcher - // the transform constant should be the constant read from the file, not a masked version! - // the UDataMemory * fed in here will be closed on this object's destruction - BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f) - : characters(c), transformConstant(t), file(f) { } - virtual ~BytesDictionaryMatcher(); - virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, - int32_t *lengths, int32_t *cpLengths, int32_t *values, - int32_t *prefix) const; - virtual int32_t getType() const; -private: - UChar32 transform(UChar32 c) const; - - const char *characters; - int32_t transformConstant; - UDataMemory *file; -}; - -U_NAMESPACE_END - -U_CAPI int32_t U_EXPORT2 -udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode); - -/** - * Format of dictionary .dict data files. - * Format version 1.0. - * - * A dictionary .dict data file contains a byte-serialized BytesTrie or - * a UChars-serialized UCharsTrie. - * Such files are used in dictionary-based break iteration (DBBI). - * - * For a BytesTrie, a transformation type is specified for - * transforming Unicode strings into byte sequences. - * - * A .dict file begins with a standard ICU data file header - * (DataHeader, see ucmndata.h and unicode/udata.h). - * The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0). - * - * After the header, the file contains the following parts. - * Constants are defined in the DictionaryData class. - * - * For the data structure of BytesTrie & UCharsTrie see - * http://site.icu-project.org/design/struct/tries - * and the bytestrie.h and ucharstrie.h header files. - * - * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4; - * - * The first four indexes are byte offsets in ascending order. - * Each byte offset marks the start of the next part in the data file, - * and the end of the previous one. - * When two consecutive byte offsets are the same, then the corresponding part is empty. - * Byte offsets are offsets from after the header, - * that is, from the beginning of the indexes[]. - * Each part starts at an offset with proper alignment for its data. - * If necessary, the previous part may include padding bytes to achieve this alignment. - * - * trieType=indexes[IX_TRIE_TYPE] defines the trie type. - * transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation. - * If the transformation type is TRANSFORM_TYPE_OFFSET, - * then the lower 21 bits contain the offset code point. - * Each code point c is mapped to byte b = (c - offset). - * Code points outside the range offset..(offset+0xff) cannot be mapped - * and do not occur in the dictionary. - * - * stringTrie; -- a serialized BytesTrie or UCharsTrie - * - * The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType), - * or it maps all strings to 0 (TRIE_HAS_VALUES bit not set). - */ - -#endif /* !UCONFIG_NO_BREAK_ITERATION */ -#endif /* __DICTIONARYDATA_H__ */ diff --git a/deps/node/deps/icu-small/source/common/dtintrv.cpp b/deps/node/deps/icu-small/source/common/dtintrv.cpp deleted file mode 100644 index dee637e6..00000000 --- a/deps/node/deps/icu-small/source/common/dtintrv.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/******************************************************************************* -* Copyright (C) 2008, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -* -* File DTINTRV.CPP -* -******************************************************************************* -*/ - - - -#include "unicode/dtintrv.h" - - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval) - -//DateInterval::DateInterval(){} - - -DateInterval::DateInterval(UDate from, UDate to) -: fromDate(from), - toDate(to) -{} - - -DateInterval::~DateInterval(){} - - -DateInterval::DateInterval(const DateInterval& other) -: UObject(other) { - *this = other; -} - - -DateInterval& -DateInterval::operator=(const DateInterval& other) { - if ( this != &other ) { - fromDate = other.fromDate; - toDate = other.toDate; - } - return *this; -} - - -DateInterval* -DateInterval::clone() const { - return new DateInterval(*this); -} - - -UBool -DateInterval::operator==(const DateInterval& other) const { - return ( fromDate == other.fromDate && toDate == other.toDate ); -} - - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/edits.cpp b/deps/node/deps/icu-small/source/common/edits.cpp deleted file mode 100644 index 00a8d601..00000000 --- a/deps/node/deps/icu-small/source/common/edits.cpp +++ /dev/null @@ -1,803 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// edits.cpp -// created: 2017feb08 Markus W. Scherer - -#include "unicode/edits.h" -#include "unicode/unistr.h" -#include "unicode/utypes.h" -#include "cmemory.h" -#include "uassert.h" -#include "util.h" - -U_NAMESPACE_BEGIN - -namespace { - -// 0000uuuuuuuuuuuu records u+1 unchanged text units. -const int32_t MAX_UNCHANGED_LENGTH = 0x1000; -const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1; - -// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units. -const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6; -const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7; -const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff; -const int32_t MAX_SHORT_CHANGE = 0x6fff; - -// 0111mmmmmmnnnnnn records a replacement of m text units with n. -// m or n = 61: actual length follows in the next edits array unit. -// m or n = 62..63: actual length follows in the next two edits array units. -// Bit 30 of the actual length is in the head unit. -// Trailing units have bit 15 set. -const int32_t LENGTH_IN_1TRAIL = 61; -const int32_t LENGTH_IN_2TRAIL = 62; - -} // namespace - -void Edits::releaseArray() U_NOEXCEPT { - if (array != stackArray) { - uprv_free(array); - } -} - -Edits &Edits::copyArray(const Edits &other) { - if (U_FAILURE(errorCode_)) { - length = delta = numChanges = 0; - return *this; - } - if (length > capacity) { - uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2); - if (newArray == nullptr) { - length = delta = numChanges = 0; - errorCode_ = U_MEMORY_ALLOCATION_ERROR; - return *this; - } - releaseArray(); - array = newArray; - capacity = length; - } - if (length > 0) { - uprv_memcpy(array, other.array, (size_t)length * 2); - } - return *this; -} - -Edits &Edits::moveArray(Edits &src) U_NOEXCEPT { - if (U_FAILURE(errorCode_)) { - length = delta = numChanges = 0; - return *this; - } - releaseArray(); - if (length > STACK_CAPACITY) { - array = src.array; - capacity = src.capacity; - src.array = src.stackArray; - src.capacity = STACK_CAPACITY; - src.reset(); - return *this; - } - array = stackArray; - capacity = STACK_CAPACITY; - if (length > 0) { - uprv_memcpy(array, src.array, (size_t)length * 2); - } - return *this; -} - -Edits &Edits::operator=(const Edits &other) { - length = other.length; - delta = other.delta; - numChanges = other.numChanges; - errorCode_ = other.errorCode_; - return copyArray(other); -} - -Edits &Edits::operator=(Edits &&src) U_NOEXCEPT { - length = src.length; - delta = src.delta; - numChanges = src.numChanges; - errorCode_ = src.errorCode_; - return moveArray(src); -} - -Edits::~Edits() { - releaseArray(); -} - -void Edits::reset() U_NOEXCEPT { - length = delta = numChanges = 0; - errorCode_ = U_ZERO_ERROR; -} - -void Edits::addUnchanged(int32_t unchangedLength) { - if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; } - if(unchangedLength < 0) { - errorCode_ = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - // Merge into previous unchanged-text record, if any. - int32_t last = lastUnit(); - if(last < MAX_UNCHANGED) { - int32_t remaining = MAX_UNCHANGED - last; - if (remaining >= unchangedLength) { - setLastUnit(last + unchangedLength); - return; - } - setLastUnit(MAX_UNCHANGED); - unchangedLength -= remaining; - } - // Split large lengths into multiple units. - while(unchangedLength >= MAX_UNCHANGED_LENGTH) { - append(MAX_UNCHANGED); - unchangedLength -= MAX_UNCHANGED_LENGTH; - } - // Write a small (remaining) length. - if(unchangedLength > 0) { - append(unchangedLength - 1); - } -} - -void Edits::addReplace(int32_t oldLength, int32_t newLength) { - if(U_FAILURE(errorCode_)) { return; } - if(oldLength < 0 || newLength < 0) { - errorCode_ = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (oldLength == 0 && newLength == 0) { - return; - } - ++numChanges; - int32_t newDelta = newLength - oldLength; - if (newDelta != 0) { - if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) || - (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) { - // Integer overflow or underflow. - errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - delta += newDelta; - } - - if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH && - newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) { - // Merge into previous same-lengths short-replacement record, if any. - int32_t u = (oldLength << 12) | (newLength << 9); - int32_t last = lastUnit(); - if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE && - (last & ~SHORT_CHANGE_NUM_MASK) == u && - (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) { - setLastUnit(last + 1); - return; - } - append(u); - return; - } - - int32_t head = 0x7000; - if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) { - head |= oldLength << 6; - head |= newLength; - append(head); - } else if ((capacity - length) >= 5 || growArray()) { - int32_t limit = length + 1; - if(oldLength < LENGTH_IN_1TRAIL) { - head |= oldLength << 6; - } else if(oldLength <= 0x7fff) { - head |= LENGTH_IN_1TRAIL << 6; - array[limit++] = (uint16_t)(0x8000 | oldLength); - } else { - head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6; - array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15)); - array[limit++] = (uint16_t)(0x8000 | oldLength); - } - if(newLength < LENGTH_IN_1TRAIL) { - head |= newLength; - } else if(newLength <= 0x7fff) { - head |= LENGTH_IN_1TRAIL; - array[limit++] = (uint16_t)(0x8000 | newLength); - } else { - head |= LENGTH_IN_2TRAIL + (newLength >> 30); - array[limit++] = (uint16_t)(0x8000 | (newLength >> 15)); - array[limit++] = (uint16_t)(0x8000 | newLength); - } - array[length] = (uint16_t)head; - length = limit; - } -} - -void Edits::append(int32_t r) { - if(length < capacity || growArray()) { - array[length++] = (uint16_t)r; - } -} - -UBool Edits::growArray() { - int32_t newCapacity; - if (array == stackArray) { - newCapacity = 2000; - } else if (capacity == INT32_MAX) { - // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API - // with a result-string-buffer overflow. - errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR; - return FALSE; - } else if (capacity >= (INT32_MAX / 2)) { - newCapacity = INT32_MAX; - } else { - newCapacity = 2 * capacity; - } - // Grow by at least 5 units so that a maximal change record will fit. - if ((newCapacity - capacity) < 5) { - errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR; - return FALSE; - } - uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2); - if (newArray == NULL) { - errorCode_ = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - uprv_memcpy(newArray, array, (size_t)length * 2); - releaseArray(); - array = newArray; - capacity = newCapacity; - return TRUE; -} - -UBool Edits::copyErrorTo(UErrorCode &outErrorCode) { - if (U_FAILURE(outErrorCode)) { return TRUE; } - if (U_SUCCESS(errorCode_)) { return FALSE; } - outErrorCode = errorCode_; - return TRUE; -} - -Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) { - if (copyErrorTo(errorCode)) { return *this; } - // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c. - // Parallel iteration over both Edits. - Iterator abIter = ab.getFineIterator(); - Iterator bcIter = bc.getFineIterator(); - UBool abHasNext = TRUE, bcHasNext = TRUE; - // Copy iterator state into local variables, so that we can modify and subdivide spans. - // ab old & new length, bc old & new length - int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0; - // When we have different-intermediate-length changes, we accumulate a larger change. - int32_t pending_aLength = 0, pending_cLength = 0; - for (;;) { - // At this point, for each of the two iterators: - // Either we are done with the locally cached current edit, - // and its intermediate-string length has been reset, - // or we will continue to work with a truncated remainder of this edit. - // - // If the current edit is done, and the iterator has not yet reached the end, - // then we fetch the next edit. This is true for at least one of the iterators. - // - // Normally it does not matter whether we fetch from ab and then bc or vice versa. - // However, the result is observably different when - // ab deletions meet bc insertions at the same intermediate-string index. - // Some users expect the bc insertions to come first, so we fetch from bc first. - if (bc_bLength == 0) { - if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) { - bc_bLength = bcIter.oldLength(); - cLength = bcIter.newLength(); - if (bc_bLength == 0) { - // insertion - if (ab_bLength == 0 || !abIter.hasChange()) { - addReplace(pending_aLength, pending_cLength + cLength); - pending_aLength = pending_cLength = 0; - } else { - pending_cLength += cLength; - } - continue; - } - } - // else see if the other iterator is done, too. - } - if (ab_bLength == 0) { - if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) { - aLength = abIter.oldLength(); - ab_bLength = abIter.newLength(); - if (ab_bLength == 0) { - // deletion - if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) { - addReplace(pending_aLength + aLength, pending_cLength); - pending_aLength = pending_cLength = 0; - } else { - pending_aLength += aLength; - } - continue; - } - } else if (bc_bLength == 0) { - // Both iterators are done at the same time: - // The intermediate-string lengths match. - break; - } else { - // The ab output string is shorter than the bc input string. - if (!copyErrorTo(errorCode)) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - return *this; - } - } - if (bc_bLength == 0) { - // The bc input string is shorter than the ab output string. - if (!copyErrorTo(errorCode)) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - return *this; - } - // Done fetching: ab_bLength > 0 && bc_bLength > 0 - - // The current state has two parts: - // - Past: We accumulate a longer ac edit in the "pending" variables. - // - Current: We have copies of the current ab/bc edits in local variables. - // At least one side is newly fetched. - // One side might be a truncated remainder of an edit we fetched earlier. - - if (!abIter.hasChange() && !bcIter.hasChange()) { - // An unchanged span all the way from string a to string c. - if (pending_aLength != 0 || pending_cLength != 0) { - addReplace(pending_aLength, pending_cLength); - pending_aLength = pending_cLength = 0; - } - int32_t unchangedLength = aLength <= cLength ? aLength : cLength; - addUnchanged(unchangedLength); - ab_bLength = aLength -= unchangedLength; - bc_bLength = cLength -= unchangedLength; - // At least one of the unchanged spans is now empty. - continue; - } - if (!abIter.hasChange() && bcIter.hasChange()) { - // Unchanged a->b but changed b->c. - if (ab_bLength >= bc_bLength) { - // Split the longer unchanged span into change + remainder. - addReplace(pending_aLength + bc_bLength, pending_cLength + cLength); - pending_aLength = pending_cLength = 0; - aLength = ab_bLength -= bc_bLength; - bc_bLength = 0; - continue; - } - // Handle the shorter unchanged span below like a change. - } else if (abIter.hasChange() && !bcIter.hasChange()) { - // Changed a->b and then unchanged b->c. - if (ab_bLength <= bc_bLength) { - // Split the longer unchanged span into change + remainder. - addReplace(pending_aLength + aLength, pending_cLength + ab_bLength); - pending_aLength = pending_cLength = 0; - cLength = bc_bLength -= ab_bLength; - ab_bLength = 0; - continue; - } - // Handle the shorter unchanged span below like a change. - } else { // both abIter.hasChange() && bcIter.hasChange() - if (ab_bLength == bc_bLength) { - // Changes on both sides up to the same position. Emit & reset. - addReplace(pending_aLength + aLength, pending_cLength + cLength); - pending_aLength = pending_cLength = 0; - ab_bLength = bc_bLength = 0; - continue; - } - } - // Accumulate the a->c change, reset the shorter side, - // keep a remainder of the longer one. - pending_aLength += aLength; - pending_cLength += cLength; - if (ab_bLength < bc_bLength) { - bc_bLength -= ab_bLength; - cLength = ab_bLength = 0; - } else { // ab_bLength > bc_bLength - ab_bLength -= bc_bLength; - aLength = bc_bLength = 0; - } - } - if (pending_aLength != 0 || pending_cLength != 0) { - addReplace(pending_aLength, pending_cLength); - } - copyErrorTo(errorCode); - return *this; -} - -Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) : - array(a), index(0), length(len), remaining(0), - onlyChanges_(oc), coarse(crs), - dir(0), changed(FALSE), oldLength_(0), newLength_(0), - srcIndex(0), replIndex(0), destIndex(0) {} - -int32_t Edits::Iterator::readLength(int32_t head) { - if (head < LENGTH_IN_1TRAIL) { - return head; - } else if (head < LENGTH_IN_2TRAIL) { - U_ASSERT(index < length); - U_ASSERT(array[index] >= 0x8000); - return array[index++] & 0x7fff; - } else { - U_ASSERT((index + 2) <= length); - U_ASSERT(array[index] >= 0x8000); - U_ASSERT(array[index + 1] >= 0x8000); - int32_t len = ((head & 1) << 30) | - ((int32_t)(array[index] & 0x7fff) << 15) | - (array[index + 1] & 0x7fff); - index += 2; - return len; - } -} - -void Edits::Iterator::updateNextIndexes() { - srcIndex += oldLength_; - if (changed) { - replIndex += newLength_; - } - destIndex += newLength_; -} - -void Edits::Iterator::updatePreviousIndexes() { - srcIndex -= oldLength_; - if (changed) { - replIndex -= newLength_; - } - destIndex -= newLength_; -} - -UBool Edits::Iterator::noNext() { - // No change before or beyond the string. - dir = 0; - changed = FALSE; - oldLength_ = newLength_ = 0; - return FALSE; -} - -UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) { - // Forward iteration: Update the string indexes to the limit of the current span, - // and post-increment-read array units to assemble a new span. - // Leaves the array index one after the last unit of that span. - if (U_FAILURE(errorCode)) { return FALSE; } - // We have an errorCode in case we need to start guarding against integer overflows. - // It is also convenient for caller loops if we bail out when an error was set elsewhere. - if (dir > 0) { - updateNextIndexes(); - } else { - if (dir < 0) { - // Turn around from previous() to next(). - // Post-increment-read the same span again. - if (remaining > 0) { - // Fine-grained iterator: - // Stay on the current one of a sequence of compressed changes. - ++index; // next() rests on the index after the sequence unit. - dir = 1; - return TRUE; - } - } - dir = 1; - } - if (remaining >= 1) { - // Fine-grained iterator: Continue a sequence of compressed changes. - if (remaining > 1) { - --remaining; - return TRUE; - } - remaining = 0; - } - if (index >= length) { - return noNext(); - } - int32_t u = array[index++]; - if (u <= MAX_UNCHANGED) { - // Combine adjacent unchanged ranges. - changed = FALSE; - oldLength_ = u + 1; - while (index < length && (u = array[index]) <= MAX_UNCHANGED) { - ++index; - oldLength_ += u + 1; - } - newLength_ = oldLength_; - if (onlyChanges) { - updateNextIndexes(); - if (index >= length) { - return noNext(); - } - // already fetched u > MAX_UNCHANGED at index - ++index; - } else { - return TRUE; - } - } - changed = TRUE; - if (u <= MAX_SHORT_CHANGE) { - int32_t oldLen = u >> 12; - int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH; - int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; - if (coarse) { - oldLength_ = num * oldLen; - newLength_ = num * newLen; - } else { - // Split a sequence of changes that was compressed into one unit. - oldLength_ = oldLen; - newLength_ = newLen; - if (num > 1) { - remaining = num; // This is the first of two or more changes. - } - return TRUE; - } - } else { - U_ASSERT(u <= 0x7fff); - oldLength_ = readLength((u >> 6) & 0x3f); - newLength_ = readLength(u & 0x3f); - if (!coarse) { - return TRUE; - } - } - // Combine adjacent changes. - while (index < length && (u = array[index]) > MAX_UNCHANGED) { - ++index; - if (u <= MAX_SHORT_CHANGE) { - int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; - oldLength_ += (u >> 12) * num; - newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num; - } else { - U_ASSERT(u <= 0x7fff); - oldLength_ += readLength((u >> 6) & 0x3f); - newLength_ += readLength(u & 0x3f); - } - } - return TRUE; -} - -UBool Edits::Iterator::previous(UErrorCode &errorCode) { - // Backward iteration: Pre-decrement-read array units to assemble a new span, - // then update the string indexes to the start of that span. - // Leaves the array index on the head unit of that span. - if (U_FAILURE(errorCode)) { return FALSE; } - // We have an errorCode in case we need to start guarding against integer overflows. - // It is also convenient for caller loops if we bail out when an error was set elsewhere. - if (dir >= 0) { - if (dir > 0) { - // Turn around from next() to previous(). - // Set the string indexes to the span limit and - // pre-decrement-read the same span again. - if (remaining > 0) { - // Fine-grained iterator: - // Stay on the current one of a sequence of compressed changes. - --index; // previous() rests on the sequence unit. - dir = -1; - return TRUE; - } - updateNextIndexes(); - } - dir = -1; - } - if (remaining > 0) { - // Fine-grained iterator: Continue a sequence of compressed changes. - int32_t u = array[index]; - U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE); - if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) { - ++remaining; - updatePreviousIndexes(); - return TRUE; - } - remaining = 0; - } - if (index <= 0) { - return noNext(); - } - int32_t u = array[--index]; - if (u <= MAX_UNCHANGED) { - // Combine adjacent unchanged ranges. - changed = FALSE; - oldLength_ = u + 1; - while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) { - --index; - oldLength_ += u + 1; - } - newLength_ = oldLength_; - // No need to handle onlyChanges as long as previous() is called only from findIndex(). - updatePreviousIndexes(); - return TRUE; - } - changed = TRUE; - if (u <= MAX_SHORT_CHANGE) { - int32_t oldLen = u >> 12; - int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH; - int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; - if (coarse) { - oldLength_ = num * oldLen; - newLength_ = num * newLen; - } else { - // Split a sequence of changes that was compressed into one unit. - oldLength_ = oldLen; - newLength_ = newLen; - if (num > 1) { - remaining = 1; // This is the last of two or more changes. - } - updatePreviousIndexes(); - return TRUE; - } - } else { - if (u <= 0x7fff) { - // The change is encoded in u alone. - oldLength_ = readLength((u >> 6) & 0x3f); - newLength_ = readLength(u & 0x3f); - } else { - // Back up to the head of the change, read the lengths, - // and reset the index to the head again. - U_ASSERT(index > 0); - while ((u = array[--index]) > 0x7fff) {} - U_ASSERT(u > MAX_SHORT_CHANGE); - int32_t headIndex = index++; - oldLength_ = readLength((u >> 6) & 0x3f); - newLength_ = readLength(u & 0x3f); - index = headIndex; - } - if (!coarse) { - updatePreviousIndexes(); - return TRUE; - } - } - // Combine adjacent changes. - while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) { - --index; - if (u <= MAX_SHORT_CHANGE) { - int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; - oldLength_ += (u >> 12) * num; - newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num; - } else if (u <= 0x7fff) { - // Read the lengths, and reset the index to the head again. - int32_t headIndex = index++; - oldLength_ += readLength((u >> 6) & 0x3f); - newLength_ += readLength(u & 0x3f); - index = headIndex; - } - } - updatePreviousIndexes(); - return TRUE; -} - -int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) { - if (U_FAILURE(errorCode) || i < 0) { return -1; } - int32_t spanStart, spanLength; - if (findSource) { // find source index - spanStart = srcIndex; - spanLength = oldLength_; - } else { // find destination index - spanStart = destIndex; - spanLength = newLength_; - } - if (i < spanStart) { - if (i >= (spanStart / 2)) { - // Search backwards. - for (;;) { - UBool hasPrevious = previous(errorCode); - U_ASSERT(hasPrevious); // because i>=0 and the first span starts at 0 - (void)hasPrevious; // avoid unused-variable warning - spanStart = findSource ? srcIndex : destIndex; - if (i >= spanStart) { - // The index is in the current span. - return 0; - } - if (remaining > 0) { - // Is the index in one of the remaining compressed edits? - // spanStart is the start of the current span, first of the remaining ones. - spanLength = findSource ? oldLength_ : newLength_; - int32_t u = array[index]; - U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE); - int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining; - int32_t len = num * spanLength; - if (i >= (spanStart - len)) { - int32_t n = ((spanStart - i - 1) / spanLength) + 1; - // 1 <= n <= num - srcIndex -= n * oldLength_; - replIndex -= n * newLength_; - destIndex -= n * newLength_; - remaining += n; - return 0; - } - // Skip all of these edits at once. - srcIndex -= num * oldLength_; - replIndex -= num * newLength_; - destIndex -= num * newLength_; - remaining = 0; - } - } - } - // Reset the iterator to the start. - dir = 0; - index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; - } else if (i < (spanStart + spanLength)) { - // The index is in the current span. - return 0; - } - while (next(FALSE, errorCode)) { - if (findSource) { - spanStart = srcIndex; - spanLength = oldLength_; - } else { - spanStart = destIndex; - spanLength = newLength_; - } - if (i < (spanStart + spanLength)) { - // The index is in the current span. - return 0; - } - if (remaining > 1) { - // Is the index in one of the remaining compressed edits? - // spanStart is the start of the current span, first of the remaining ones. - int32_t len = remaining * spanLength; - if (i < (spanStart + len)) { - int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1 - srcIndex += n * oldLength_; - replIndex += n * newLength_; - destIndex += n * newLength_; - remaining -= n; - return 0; - } - // Make next() skip all of these edits at once. - oldLength_ *= remaining; - newLength_ *= remaining; - remaining = 0; - } - } - return 1; -} - -int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) { - int32_t where = findIndex(i, TRUE, errorCode); - if (where < 0) { - // Error or before the string. - return 0; - } - if (where > 0 || i == srcIndex) { - // At or after string length, or at start of the found span. - return destIndex; - } - if (changed) { - // In a change span, map to its end. - return destIndex + newLength_; - } else { - // In an unchanged span, offset 1:1 within it. - return destIndex + (i - srcIndex); - } -} - -int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) { - int32_t where = findIndex(i, FALSE, errorCode); - if (where < 0) { - // Error or before the string. - return 0; - } - if (where > 0 || i == destIndex) { - // At or after string length, or at start of the found span. - return srcIndex; - } - if (changed) { - // In a change span, map to its end. - return srcIndex + oldLength_; - } else { - // In an unchanged span, offset within it. - return srcIndex + (i - destIndex); - } -} - -UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const { - sb.append(u"{ src[", -1); - ICU_Utility::appendNumber(sb, srcIndex); - sb.append(u"..", -1); - ICU_Utility::appendNumber(sb, srcIndex + oldLength_); - if (changed) { - sb.append(u"] ⇝ dest[", -1); - } else { - sb.append(u"] ≡ dest[", -1); - } - ICU_Utility::appendNumber(sb, destIndex); - sb.append(u"..", -1); - ICU_Utility::appendNumber(sb, destIndex + newLength_); - if (changed) { - sb.append(u"], repl[", -1); - ICU_Utility::appendNumber(sb, replIndex); - sb.append(u"..", -1); - ICU_Utility::appendNumber(sb, replIndex + newLength_); - sb.append(u"] }", -1); - } else { - sb.append(u"] (no-change) }", -1); - } - return sb; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/errorcode.cpp b/deps/node/deps/icu-small/source/common/errorcode.cpp deleted file mode 100644 index e7ac43b5..00000000 --- a/deps/node/deps/icu-small/source/common/errorcode.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: errorcode.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009mar10 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/errorcode.h" - -U_NAMESPACE_BEGIN - -ErrorCode::~ErrorCode() {} - -UErrorCode ErrorCode::reset() { - UErrorCode code = errorCode; - errorCode = U_ZERO_ERROR; - return code; -} - -void ErrorCode::assertSuccess() const { - if(isFailure()) { - handleFailure(); - } -} - -const char* ErrorCode::errorName() const { - return u_errorName(errorCode); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/filteredbrk.cpp b/deps/node/deps/icu-small/source/common/filteredbrk.cpp deleted file mode 100644 index 162b38de..00000000 --- a/deps/node/deps/icu-small/source/common/filteredbrk.cpp +++ /dev/null @@ -1,710 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2014-2015, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -*/ - -#include "unicode/utypes.h" -#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION - -#include "cmemory.h" - -#include "unicode/filteredbrk.h" -#include "unicode/ucharstriebuilder.h" -#include "unicode/ures.h" - -#include "uresimp.h" // ures_getByKeyWithFallback -#include "ubrkimpl.h" // U_ICUDATA_BRKITR -#include "uvector.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -#ifndef FB_DEBUG -#define FB_DEBUG 0 -#endif - -#if FB_DEBUG -#include -static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) { - char buf[2048]; - if(s) { - s->extract(0,s->length(),buf,2048); - } else { - strcpy(buf,"NULL"); - } - fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", - f, l, m, buf, (const void*)s, b?'T':'F',(int)d); -} - -#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) -#else -#define FB_TRACE(m,s,b,d) -#endif - -/** - * Used with sortedInsert() - */ -static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { - const UnicodeString &a = *(const UnicodeString*)t1.pointer; - const UnicodeString &b = *(const UnicodeString*)t2.pointer; - return a.compare(b); -} - -/** - * A UVector which implements a set of strings. - */ -class U_COMMON_API UStringSet : public UVector { - public: - UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, - uhash_compareUnicodeString, - 1, - status) {} - virtual ~UStringSet(); - /** - * Is this UnicodeSet contained? - */ - inline UBool contains(const UnicodeString& s) { - return contains((void*) &s); - } - using UVector::contains; - /** - * Return the ith UnicodeString alias - */ - inline const UnicodeString* getStringAt(int32_t i) const { - return (const UnicodeString*)elementAt(i); - } - /** - * Adopt the UnicodeString if not already contained. - * Caller no longer owns the pointer in any case. - * @return true if adopted successfully, false otherwise (error, or else duplicate) - */ - inline UBool adopt(UnicodeString *str, UErrorCode &status) { - if(U_FAILURE(status) || contains(*str)) { - delete str; - return false; - } else { - sortedInsert(str, compareUnicodeString, status); - if(U_FAILURE(status)) { - delete str; - return false; - } - return true; - } - } - /** - * Add by value. - * @return true if successfully adopted. - */ - inline UBool add(const UnicodeString& str, UErrorCode &status) { - if(U_FAILURE(status)) return false; - UnicodeString *t = new UnicodeString(str); - if(t==NULL) { - status = U_MEMORY_ALLOCATION_ERROR; return false; - } - return adopt(t, status); - } - /** - * Remove this string. - * @return true if successfully removed, false otherwise (error, or else it wasn't there) - */ - inline UBool remove(const UnicodeString &s, UErrorCode &status) { - if(U_FAILURE(status)) return false; - return removeElement((void*) &s); - } -}; - -/** - * Virtual, won't be inlined - */ -UStringSet::~UStringSet() {} - -/* ----------------------------------------------------------- */ - - -/* Filtered Break constants */ -static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie -static const int32_t kMATCH = (1<<1); //< exact match - skip this one. -static const int32_t kSuppressInReverse = (1<<0); -static const int32_t kAddToForward = (1<<1); -static const UChar kFULLSTOP = 0x002E; // '.' - -/** - * Shared data for SimpleFilteredSentenceBreakIterator - */ -class SimpleFilteredSentenceBreakData : public UMemory { -public: - SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) - : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { } - SimpleFilteredSentenceBreakData *incr() { refcount++; return this; } - SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; } - virtual ~SimpleFilteredSentenceBreakData(); - - LocalPointer fForwardsPartialTrie; // Has ".a" for "a.M." - LocalPointer fBackwardsTrie; // i.e. ".srM" for Mrs. - int32_t refcount; -}; - -SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {} - -/** - * Concrete implementation - */ -class SimpleFilteredSentenceBreakIterator : public BreakIterator { -public: - SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status); - SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other); - virtual ~SimpleFilteredSentenceBreakIterator(); -private: - SimpleFilteredSentenceBreakData *fData; - LocalPointer fDelegate; - LocalUTextPointer fText; - - /* -- subclass interface -- */ -public: - /* -- cloning and other subclass stuff -- */ - virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, - int32_t &/*BufferSize*/, - UErrorCode &status) { - // for now - always deep clone - status = U_SAFECLONE_ALLOCATED_WARNING; - return clone(); - } - virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); } - virtual UClassID getDynamicClassID(void) const { return NULL; } - virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; } - - /* -- text modifying -- */ - virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); } - virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; } - virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } - virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } - - /* -- other functions that are just delegated -- */ - virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); } - virtual CharacterIterator& getText(void) const { return fDelegate->getText(); } - - /* -- ITERATION -- */ - virtual int32_t first(void); - virtual int32_t preceding(int32_t offset); - virtual int32_t previous(void); - virtual UBool isBoundary(int32_t offset); - virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct. - - virtual int32_t next(void); - - virtual int32_t next(int32_t n); - virtual int32_t following(int32_t offset); - virtual int32_t last(void); - -private: - /** - * Given that the fDelegate has already given its "initial" answer, - * find the NEXT actual (non-excepted) break. - * @param n initial position from delegate - * @return new break position or UBRK_DONE - */ - int32_t internalNext(int32_t n); - /** - * Given that the fDelegate has already given its "initial" answer, - * find the PREV actual (non-excepted) break. - * @param n initial position from delegate - * @return new break position or UBRK_DONE - */ - int32_t internalPrev(int32_t n); - /** - * set up the UText with the value of the fDelegate. - * Call this before calling breakExceptionAt. - * May be able to avoid excess calls - */ - void resetState(UErrorCode &status); - /** - * Is there a match (exception) at this spot? - */ - enum EFBMatchResult { kNoExceptionHere, kExceptionHere }; - /** - * Determine if there is an exception at this spot - * @param n spot to check - * @return kNoExceptionHere or kExceptionHere - **/ - enum EFBMatchResult breakExceptionAt(int32_t n); -}; - -SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other) - : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone()) -{ -} - - -SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) : - BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)), - fData(new SimpleFilteredSentenceBreakData(forwards, backwards)), - fDelegate(adopt) -{ - // all set.. -} - -SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() { - fData = fData->decr(); -} - -void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) { - fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); -} - -SimpleFilteredSentenceBreakIterator::EFBMatchResult -SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) { - int64_t bestPosn = -1; - int32_t bestValue = -1; - // loops while 'n' points to an exception. - utext_setNativeIndex(fText.getAlias(), n); // from n.. - fData->fBackwardsTrie->reset(); - UChar32 uch; - - //if(debug2) u_printf(" n@ %d\n", n); - // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") - if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here?? - // TODO only do this the 1st time? - //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); - } else { - //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); - uch = utext_next32(fText.getAlias()); - //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); - } - - UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; - - while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and.. - USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie - if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far - bestPosn = utext_getNativeIndex(fText.getAlias()); - bestValue = fData->fBackwardsTrie->getValue(); - } - //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias())); - } - - if(USTRINGTRIE_MATCHES(r)) { // exact match? - //if(debug2) u_printf("revfBackwardsTrie->getValue(); - bestPosn = utext_getNativeIndex(fText.getAlias()); - //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); - } - - if(bestPosn>=0) { - //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); - - //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? - //int32_t bestValue = fBackwardsTrie->getValue(); - ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue); - - if(bestValue == kMATCH) { // exact match! - //if(debug2) u_printf(" exact backward match\n"); - return kExceptionHere; // See if the next is another exception. - } else if(bestValue == kPARTIAL - && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie - //if(debug2) u_printf(" partial backward match\n"); - // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie - // to see if it matches something going forward. - fData->fForwardsPartialTrie->reset(); - UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; - utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .. - //if(debug2) u_printf("Retrying at %d\n", bestPosn); - while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && - USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) { - //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias())); - } - if(USTRINGTRIE_MATCHES(rfwd)) { - //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); - // only full matches here, nothing to check - // skip the next: - return kExceptionHere; - } else { - //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); - // no match (no exception) -return the 'underlying' break - return kNoExceptionHere; - } - } else { - return kNoExceptionHere; // internal error and/or no forwards trie - } - } else { - //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match - return kNoExceptionHere; // No match - so exit. Not an exception. - } -} - -// the workhorse single next. -int32_t -SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) { - if(n == UBRK_DONE || // at end or - fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions - return n; - } - // OK, do we need to break here? - UErrorCode status = U_ZERO_ERROR; - // refresh text - resetState(status); - if(U_FAILURE(status)) return UBRK_DONE; // bail out - int64_t utextLen = utext_nativeLength(fText.getAlias()); - - //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias())); - while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate). - SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); - - switch(m) { - case kExceptionHere: - n = fDelegate->next(); // skip this one. Find the next lowerlevel break. - continue; - - default: - case kNoExceptionHere: - return n; - } - } - return n; -} - -int32_t -SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) { - if(n == 0 || n == UBRK_DONE || // at end or - fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions - return n; - } - // OK, do we need to break here? - UErrorCode status = U_ZERO_ERROR; - // refresh text - resetState(status); - if(U_FAILURE(status)) return UBRK_DONE; // bail out - - //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias())); - while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate). - SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); - - switch(m) { - case kExceptionHere: - n = fDelegate->previous(); // skip this one. Find the next lowerlevel break. - continue; - - default: - case kNoExceptionHere: - return n; - } - } - return n; -} - - -int32_t -SimpleFilteredSentenceBreakIterator::next() { - return internalNext(fDelegate->next()); -} - -int32_t -SimpleFilteredSentenceBreakIterator::first(void) { - // Don't suppress a break opportunity at the beginning of text. - return fDelegate->first(); -} - -int32_t -SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) { - return internalPrev(fDelegate->preceding(offset)); -} - -int32_t -SimpleFilteredSentenceBreakIterator::previous(void) { - return internalPrev(fDelegate->previous()); -} - -UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) { - if (!fDelegate->isBoundary(offset)) return false; // no break to suppress - - if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions - - UErrorCode status = U_ZERO_ERROR; - resetState(status); - - SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset); - - switch(m) { - case kExceptionHere: - return false; - default: - case kNoExceptionHere: - return true; - } -} - -int32_t -SimpleFilteredSentenceBreakIterator::next(int32_t offset) { - return internalNext(fDelegate->next(offset)); -} - -int32_t -SimpleFilteredSentenceBreakIterator::following(int32_t offset) { - return internalNext(fDelegate->following(offset)); -} - -int32_t -SimpleFilteredSentenceBreakIterator::last(void) { - // Don't suppress a break opportunity at the end of text. - return fDelegate->last(); -} - - -/** - * Concrete implementation of builder class. - */ -class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder { -public: - virtual ~SimpleFilteredBreakIteratorBuilder(); - SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status); - SimpleFilteredBreakIteratorBuilder(UErrorCode &status); - virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status); - virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status); - virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status); -private: - UStringSet fSet; -}; - -SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder() -{ -} - -SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status) - : fSet(status) -{ -} - -SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status) - : fSet(status) -{ - if(U_SUCCESS(status)) { - UErrorCode subStatus = U_ZERO_ERROR; - LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus)); - if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { - status = subStatus; // copy the failing status -#if FB_DEBUG - fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); -#endif - return; // leaves the builder empty, if you try to use it. - } - LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus)); - if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { - status = subStatus; // copy the failing status -#if FB_DEBUG - fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); -#endif - return; // leaves the builder empty, if you try to use it. - } - LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus)); - -#if FB_DEBUG - { - UErrorCode subsub = subStatus; - fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus)); - } -#endif - - if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { - status = subStatus; // copy the failing status -#if FB_DEBUG - fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); -#endif - return; // leaves the builder empty, if you try to use it. - } - - LocalUResourceBundlePointer strs; - subStatus = status; // Pick up inherited warning status now - do { - strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus)); - if(strs.isValid() && U_SUCCESS(subStatus)) { - UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status)); - suppressBreakAfter(str, status); // load the string - } - } while (strs.isValid() && U_SUCCESS(subStatus)); - if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) { - status = subStatus; - } - } -} - -UBool -SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) -{ - UBool r = fSet.add(exception, status); - FB_TRACE("suppressBreakAfter",&exception,r,0); - return r; -} - -UBool -SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) -{ - UBool r = fSet.remove(exception, status); - FB_TRACE("unsuppressBreakAfter",&exception,r,0); - return r; -} - -/** - * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly. - * Work around this. - * - * Note: "new UnicodeString[subCount]" ends up calling global operator new - * on MSVC2012 for some reason. - */ -static inline UnicodeString* newUnicodeStringArray(size_t count) { - return new UnicodeString[count ? count : 1]; -} - -BreakIterator * -SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) { - LocalPointer adopt(adoptBreakIterator); - - LocalPointer builder(new UCharsTrieBuilder(status), status); - LocalPointer builder2(new UCharsTrieBuilder(status), status); - if(U_FAILURE(status)) { - return NULL; - } - - int32_t revCount = 0; - int32_t fwdCount = 0; - - int32_t subCount = fSet.size(); - - UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); - - LocalArray ustrs(ustrs_ptr); - - LocalMemory partials; - partials.allocateInsteadAndReset(subCount); - - LocalPointer backwardsTrie; // i.e. ".srM" for Mrs. - LocalPointer forwardsPartialTrie; // Has ".a" for "a.M." - - int n=0; - for ( int32_t i = 0; - i-1 && (nn+1)!=ustrs[i].length()) { - FB_TRACE("partial",&ustrs[i],FALSE,i); - // is partial. - // is it unique? - int sameAs = -1; - for(int j=0;jadd(prefix, kPARTIAL, status); - revCount++; - FB_TRACE("Added partial",&prefix,FALSE, i); - FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); - partials[i] = kSuppressInReverse | kAddToForward; - } else { - FB_TRACE("NOT adding partial",&prefix,FALSE, i); - FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); - } - } - } - for(int i=0;iadd(ustrs[i], kMATCH, status); - revCount++; - FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i); - } else { - FB_TRACE("Adding fwd",&ustrs[i], FALSE, i); - - // an optimization would be to only add the portion after the '.' - // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward, - // instead of "Ph.D." since we already know the "Ph." part is a match. - // would need the trie to be able to hold 0-length strings, though. - builder2->add(ustrs[i], kMATCH, status); // forward - fwdCount++; - //ustrs[i].reverse(); - ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); - } - } - FB_TRACE("AbbrCount",NULL,FALSE, subCount); - - if(revCount>0) { - backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); - if(U_FAILURE(status)) { - FB_TRACE(u_errorName(status),NULL,FALSE, -1); - return NULL; - } - } - - if(fwdCount>0) { - forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status)); - if(U_FAILURE(status)) { - FB_TRACE(u_errorName(status),NULL,FALSE, -1); - return NULL; - } - } - - return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); -} - - -// ----------- Base class implementation - -FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { -} - -FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { -} - -FilteredBreakIteratorBuilder * -FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) { - if(U_FAILURE(status)) return NULL; - LocalPointer ret(new SimpleFilteredBreakIteratorBuilder(where, status), status); - return (U_SUCCESS(status))? ret.orphan(): NULL; -} - -FilteredBreakIteratorBuilder * -FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) { - return createEmptyInstance(status); -} - -FilteredBreakIteratorBuilder * -FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) { - if(U_FAILURE(status)) return NULL; - LocalPointer ret(new SimpleFilteredBreakIteratorBuilder(status), status); - return (U_SUCCESS(status))? ret.orphan(): NULL; -} - -U_NAMESPACE_END - -#endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION diff --git a/deps/node/deps/icu-small/source/common/filterednormalizer2.cpp b/deps/node/deps/icu-small/source/common/filterednormalizer2.cpp deleted file mode 100644 index 1a0914d3..00000000 --- a/deps/node/deps/icu-small/source/common/filterednormalizer2.cpp +++ /dev/null @@ -1,363 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: filterednormalizer2.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009dec10 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/edits.h" -#include "unicode/normalizer2.h" -#include "unicode/stringoptions.h" -#include "unicode/uniset.h" -#include "unicode/unistr.h" -#include "unicode/unorm.h" -#include "cpputils.h" - -U_NAMESPACE_BEGIN - -FilteredNormalizer2::~FilteredNormalizer2() {} - -UnicodeString & -FilteredNormalizer2::normalize(const UnicodeString &src, - UnicodeString &dest, - UErrorCode &errorCode) const { - uprv_checkCanGetBuffer(src, errorCode); - if(U_FAILURE(errorCode)) { - dest.setToBogus(); - return dest; - } - if(&dest==&src) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return dest; - } - dest.remove(); - return normalize(src, dest, USET_SPAN_SIMPLE, errorCode); -} - -// Internal: No argument checking, and appends to dest. -// Pass as input spanCondition the one that is likely to yield a non-zero -// span length at the start of src. -// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2, -// USET_SPAN_SIMPLE should be passed in for the start of src -// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after -// an in-filter prefix. -UnicodeString & -FilteredNormalizer2::normalize(const UnicodeString &src, - UnicodeString &dest, - USetSpanCondition spanCondition, - UErrorCode &errorCode) const { - UnicodeString tempDest; // Don't throw away destination buffer between iterations. - for(int32_t prevSpanLimit=0; prevSpanLimitreset(); - } - options |= U_EDITS_NO_RESET; // Do not reset for each span. - normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode); -} - -void -FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length, - ByteSink &sink, Edits *edits, - USetSpanCondition spanCondition, - UErrorCode &errorCode) const { - while (length > 0) { - int32_t spanLength = set.spanUTF8(src, length, spanCondition); - if (spanCondition == USET_SPAN_NOT_CONTAINED) { - if (spanLength != 0) { - if (edits != nullptr) { - edits->addUnchanged(spanLength); - } - if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { - sink.Append(src, spanLength); - } - } - spanCondition = USET_SPAN_SIMPLE; - } else { - if (spanLength != 0) { - // Not norm2.normalizeSecondAndAppend() because we do not want - // to modify the non-filter part of dest. - norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode); - if (U_FAILURE(errorCode)) { - break; - } - } - spanCondition = USET_SPAN_NOT_CONTAINED; - } - src += spanLength; - length -= spanLength; - } -} - -UnicodeString & -FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, TRUE, errorCode); -} - -UnicodeString & -FilteredNormalizer2::append(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, FALSE, errorCode); -} - -UnicodeString & -FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UBool doNormalize, - UErrorCode &errorCode) const { - uprv_checkCanGetBuffer(first, errorCode); - uprv_checkCanGetBuffer(second, errorCode); - if(U_FAILURE(errorCode)) { - return first; - } - if(&first==&second) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return first; - } - if(first.isEmpty()) { - if(doNormalize) { - return normalize(second, first, errorCode); - } else { - return first=second; - } - } - // merge the in-filter suffix of the first string with the in-filter prefix of the second - int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE); - if(prefixLimit!=0) { - UnicodeString prefix(second.tempSubString(0, prefixLimit)); - int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE); - if(suffixStart==0) { - if(doNormalize) { - norm2.normalizeSecondAndAppend(first, prefix, errorCode); - } else { - norm2.append(first, prefix, errorCode); - } - } else { - UnicodeString middle(first, suffixStart, INT32_MAX); - if(doNormalize) { - norm2.normalizeSecondAndAppend(middle, prefix, errorCode); - } else { - norm2.append(middle, prefix, errorCode); - } - first.replace(suffixStart, INT32_MAX, middle); - } - } - if(prefixLimit 0) { - int32_t spanLength = set.spanUTF8(s, length, spanCondition); - if (spanCondition == USET_SPAN_NOT_CONTAINED) { - spanCondition = USET_SPAN_SIMPLE; - } else { - if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) || - U_FAILURE(errorCode)) { - return FALSE; - } - spanCondition = USET_SPAN_NOT_CONTAINED; - } - s += spanLength; - length -= spanLength; - } - return TRUE; -} - -UNormalizationCheckResult -FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { - uprv_checkCanGetBuffer(s, errorCode); - if(U_FAILURE(errorCode)) { - return UNORM_MAYBE; - } - UNormalizationCheckResult result=UNORM_YES; - USetSpanCondition spanCondition=USET_SPAN_SIMPLE; - for(int32_t prevSpanLimit=0; prevSpanLimit -#ifdef __MVS__ /* defined by z/OS compiler */ -#define _POSIX_SOURCE -#include /* 12 Nov 2011 JAM iscics() function */ -#endif -#include "charstr.h" - -using namespace icu; - -#ifndef UPLUG_TRACE -#define UPLUG_TRACE 0 -#endif - -#if UPLUG_TRACE -#include -#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x -#endif - -/** - * Internal structure of an ICU plugin. - */ - -struct UPlugData { - UPlugEntrypoint *entrypoint; /**< plugin entrypoint */ - uint32_t structSize; /**< initialized to the size of this structure */ - uint32_t token; /**< must be U_PLUG_TOKEN */ - void *lib; /**< plugin library, or NULL */ - char libName[UPLUG_NAME_MAX]; /**< library name */ - char sym[UPLUG_NAME_MAX]; /**< plugin symbol, or NULL */ - char config[UPLUG_NAME_MAX]; /**< configuration data */ - void *context; /**< user context data */ - char name[UPLUG_NAME_MAX]; /**< name of plugin */ - UPlugLevel level; /**< level of plugin */ - UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */ - UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */ - UErrorCode pluginStatus; /**< status code of plugin */ -}; - - - -#define UPLUG_LIBRARY_INITIAL_COUNT 8 -#define UPLUG_PLUGIN_INITIAL_COUNT 12 - -/** - * Remove an item - * @param list the full list - * @param listSize the number of entries in the list - * @param memberSize the size of one member - * @param itemToRemove the item number of the member - * @return the new listsize - */ -static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) { - uint8_t *bytePtr = (uint8_t *)list; - - /* get rid of some bad cases first */ - if(listSize<1) { - return listSize; - } - - /* is there anything to move? */ - if(listSize > itemToRemove+1) { - memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize); - } - - return listSize-1; -} - - - - -#if U_ENABLE_DYLOAD -/** - * Library management. Internal. - * @internal - */ -struct UPlugLibrary; - -/** - * Library management. Internal. - * @internal - */ -typedef struct UPlugLibrary { - void *lib; /**< library ptr */ - char name[UPLUG_NAME_MAX]; /**< library name */ - uint32_t ref; /**< reference count */ -} UPlugLibrary; - -static UPlugLibrary staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT]; -static UPlugLibrary * libraryList = staticLibraryList; -static int32_t libraryCount = 0; -static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT; - -/** - * Search for a library. Doesn't lock - * @param libName libname to search for - * @return the library's struct - */ -static int32_t searchForLibraryName(const char *libName) { - int32_t i; - - for(i=0;i= libraryMax) { - /* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */ - *status = U_MEMORY_ALLOCATION_ERROR; -#if UPLUG_TRACE - DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax)); -#endif - return NULL; - } - /* Some operating systems don't want - DL operations from multiple threads. */ - libraryList[libEntry].lib = uprv_dl_open(libName, status); -#if UPLUG_TRACE - DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib)); -#endif - - if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) { - /* cleanup. */ - libraryList[libEntry].lib = NULL; /* failure with open */ - libraryList[libEntry].name[0] = 0; -#if UPLUG_TRACE - DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib)); -#endif - /* no need to free - just won't increase the count. */ - libraryCount--; - } else { /* is it still there? */ - /* link it in */ - uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX); - libraryList[libEntry].ref=1; - lib = libraryList[libEntry].lib; - } - - } else { - lib = libraryList[libEntry].lib; - libraryList[libEntry].ref++; - } - return lib; -} - -U_INTERNAL void U_EXPORT2 -uplug_closeLibrary(void *lib, UErrorCode *status) { - int32_t i; - -#if UPLUG_TRACE - DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList)); -#endif - if(U_FAILURE(*status)) return; - - for(i=0;i=pastPlug) { - return pluginCount; - } else { - return (d-pluginList)/sizeof(pluginList[0]); - } -} - - -U_CAPI UPlugData * U_EXPORT2 -uplug_nextPlug(UPlugData *prior) { - if(prior==NULL) { - return pluginList; - } else { - UPlugData *nextPlug = &prior[1]; - UPlugData *pastPlug = &pluginList[pluginCount]; - - if(nextPlug>=pastPlug) { - return NULL; - } else { - return nextPlug; - } - } -} - - - -/** - * Call the plugin with some params - */ -static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) { - UPlugTokenReturn token; - if(plug==NULL||U_FAILURE(*status)) { - return; - } - token = (*(plug->entrypoint))(plug, reason, status); - if(token!=UPLUG_TOKEN) { - *status = U_INTERNAL_PROGRAM_ERROR; - } -} - - -static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) { - if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/ - *status = U_INTERNAL_PROGRAM_ERROR; - return; - } - if(U_SUCCESS(plug->pluginStatus)) { - /* Don't unload a plug which has a failing load status - means it didn't actually load. */ - uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status); - } -} - -static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) { - if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/ - *status = U_INTERNAL_PROGRAM_ERROR; - return; - } - plug->level = UPLUG_LEVEL_INVALID; - uplug_callPlug(plug, UPLUG_REASON_QUERY, status); - if(U_SUCCESS(*status)) { - if(plug->level == UPLUG_LEVEL_INVALID) { - plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL; - plug->awaitingLoad = FALSE; - } - } else { - plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR; - plug->awaitingLoad = FALSE; - } -} - - -static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) { - if(U_FAILURE(*status)) { - return; - } - if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/ - *status = U_INTERNAL_PROGRAM_ERROR; - return; - } - uplug_callPlug(plug, UPLUG_REASON_LOAD, status); - plug->awaitingLoad = FALSE; - if(!U_SUCCESS(*status)) { - plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR; - } -} - -static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status) -{ - UPlugData *plug = NULL; - - if(U_FAILURE(*status)) { - return NULL; - } - - if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - plug = &pluginList[pluginCount++]; - - plug->token = UPLUG_TOKEN; - plug->structSize = sizeof(UPlugData); - plug->name[0]=0; - plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */ - plug->awaitingLoad = TRUE; - plug->dontUnload = FALSE; - plug->pluginStatus = U_ZERO_ERROR; - plug->libName[0] = 0; - plug->config[0]=0; - plug->sym[0]=0; - plug->lib=NULL; - plug->entrypoint=NULL; - - - return plug; -} - -static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName, - UErrorCode *status) { - UPlugData *plug = uplug_allocateEmptyPlug(status); - if(U_FAILURE(*status)) { - return NULL; - } - - if(config!=NULL) { - uprv_strncpy(plug->config, config, UPLUG_NAME_MAX); - } else { - plug->config[0] = 0; - } - - if(symName!=NULL) { - uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX); - } else { - plug->sym[0] = 0; - } - - plug->entrypoint = entrypoint; - plug->lib = lib; - uplug_queryPlug(plug, status); - - return plug; -} - -static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) { - UErrorCode subStatus = U_ZERO_ERROR; - if(!plug->dontUnload) { -#if U_ENABLE_DYLOAD - uplug_closeLibrary(plug->lib, &subStatus); -#endif - } - plug->lib = NULL; - if(U_SUCCESS(*status) && U_FAILURE(subStatus)) { - *status = subStatus; - } - /* shift plugins up and decrement count. */ - if(U_SUCCESS(*status)) { - /* all ok- remove. */ - pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug)); - } else { - /* not ok- leave as a message. */ - plug->awaitingLoad=FALSE; - plug->entrypoint=0; - plug->dontUnload=TRUE; - } -} - -static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) { - if(plugToRemove != NULL) { - uplug_unloadPlug(plugToRemove, status); - uplug_deallocatePlug(plugToRemove, status); - } -} - -U_CAPI void U_EXPORT2 -uplug_removePlug(UPlugData *plug, UErrorCode *status) { - UPlugData *cursor = NULL; - UPlugData *plugToRemove = NULL; - if(U_FAILURE(*status)) return; - - for(cursor=pluginList;cursor!=NULL;) { - if(cursor==plug) { - plugToRemove = plug; - cursor=NULL; - } else { - cursor = uplug_nextPlug(cursor); - } - } - - uplug_doUnloadPlug(plugToRemove, status); -} - - - - -U_CAPI void U_EXPORT2 -uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload) -{ - data->dontUnload = dontUnload; -} - - -U_CAPI void U_EXPORT2 -uplug_setPlugLevel(UPlugData *data, UPlugLevel level) { - data->level = level; -} - - -U_CAPI UPlugLevel U_EXPORT2 -uplug_getPlugLevel(UPlugData *data) { - return data->level; -} - - -U_CAPI void U_EXPORT2 -uplug_setPlugName(UPlugData *data, const char *name) { - uprv_strncpy(data->name, name, UPLUG_NAME_MAX); -} - - -U_CAPI const char * U_EXPORT2 -uplug_getPlugName(UPlugData *data) { - return data->name; -} - - -U_CAPI const char * U_EXPORT2 -uplug_getSymbolName(UPlugData *data) { - return data->sym; -} - -U_CAPI const char * U_EXPORT2 -uplug_getLibraryName(UPlugData *data, UErrorCode *status) { - if(data->libName[0]) { - return data->libName; - } else { -#if U_ENABLE_DYLOAD - return uplug_findLibrary(data->lib, status); -#else - return NULL; -#endif - } -} - -U_CAPI void * U_EXPORT2 -uplug_getLibrary(UPlugData *data) { - return data->lib; -} - -U_CAPI void * U_EXPORT2 -uplug_getContext(UPlugData *data) { - return data->context; -} - - -U_CAPI void U_EXPORT2 -uplug_setContext(UPlugData *data, void *context) { - data->context = context; -} - -U_CAPI const char* U_EXPORT2 -uplug_getConfiguration(UPlugData *data) { - return data->config; -} - -U_INTERNAL UPlugData* U_EXPORT2 -uplug_getPlugInternal(int32_t n) { - if(n <0 || n >= pluginCount) { - return NULL; - } else { - return &(pluginList[n]); - } -} - - -U_CAPI UErrorCode U_EXPORT2 -uplug_getPlugLoadStatus(UPlugData *plug) { - return plug->pluginStatus; -} - - - - -/** - * Initialize a plugin fron an entrypoint and library - but don't load it. - */ -static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym, - UErrorCode *status) { - UPlugData *plug = NULL; - - plug = uplug_allocatePlug(entrypoint, config, lib, sym, status); - - if(U_SUCCESS(*status)) { - return plug; - } else { - uplug_deallocatePlug(plug, status); - return NULL; - } -} - -U_CAPI UPlugData* U_EXPORT2 -uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) { - UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status); - uplug_loadPlug(plug, status); - return plug; -} - -#if U_ENABLE_DYLOAD - -static UPlugData* -uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status) -{ - UPlugData *plug = uplug_allocateEmptyPlug(status); - if(U_FAILURE(*status)) return NULL; - - plug->pluginStatus = loadStatus; - plug->awaitingLoad = FALSE; /* Won't load. */ - plug->dontUnload = TRUE; /* cannot unload. */ - - if(sym!=NULL) { - uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX); - } - - if(libName!=NULL) { - uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX); - } - - if(nameOrError!=NULL) { - uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX); - } - - if(config!=NULL) { - uprv_strncpy(plug->config, config, UPLUG_NAME_MAX); - } - - return plug; -} - -/** - * Fetch a plugin from DLL, and then initialize it from a library- but don't load it. - */ -static UPlugData* -uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) { - void *lib = NULL; - UPlugData *plug = NULL; - if(U_FAILURE(*status)) { return NULL; } - lib = uplug_openLibrary(libName, status); - if(lib!=NULL && U_SUCCESS(*status)) { - UPlugEntrypoint *entrypoint = NULL; - entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status); - - if(entrypoint!=NULL&&U_SUCCESS(*status)) { - plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status); - if(plug!=NULL&&U_SUCCESS(*status)) { - plug->lib = lib; /* plug takes ownership of library */ - lib = NULL; /* library is now owned by plugin. */ - } - } else { - UErrorCode subStatus = U_ZERO_ERROR; - plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus); - } - if(lib!=NULL) { /* still need to close the lib */ - UErrorCode subStatus = U_ZERO_ERROR; - uplug_closeLibrary(lib, &subStatus); /* don't care here */ - } - } else { - UErrorCode subStatus = U_ZERO_ERROR; - plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus); - } - return plug; -} - -U_CAPI UPlugData* U_EXPORT2 -uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) { - UPlugData *plug = NULL; - if(U_FAILURE(*status)) { return NULL; } - plug = uplug_initPlugFromLibrary(libName, sym, config, status); - uplug_loadPlug(plug, status); - - return plug; -} - -#endif - -static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW; - -U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() { - return gCurrentLevel; -} - -static UBool U_CALLCONV uplug_cleanup(void) -{ - int32_t i; - - UPlugData *pluginToRemove; - /* cleanup plugs */ - for(i=0;iawaitingLoad) { - if(pluginToLoad->level == UPLUG_LEVEL_LOW) { - if(currentLevel > UPLUG_LEVEL_LOW) { - pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH; - } else { - UPlugLevel newLevel; - uplug_loadPlug(pluginToLoad, &subStatus); - newLevel = uplug_getCurrentLevel(); - if(newLevel > currentLevel) { - pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING; - currentLevel = newLevel; - } - } - pluginToLoad->awaitingLoad = FALSE; - } - } - } - for(i=0;iawaitingLoad) { - if(pluginToLoad->level == UPLUG_LEVEL_INVALID) { - pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL; - } else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) { - pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR; - } else { - uplug_loadPlug(pluginToLoad, &subStatus); - } - pluginToLoad->awaitingLoad = FALSE; - } - } - -#if UPLUG_TRACE - DBG((stderr, " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel())); -#endif -} - -/* Name of the plugin config file */ -static char plugin_file[2048] = ""; -#endif - -U_INTERNAL const char* U_EXPORT2 -uplug_getPluginFile() { -#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO - return plugin_file; -#else - return NULL; -#endif -} - - -// uplug_init() is called first thing from u_init(). - -U_CAPI void U_EXPORT2 -uplug_init(UErrorCode *status) { -#if !U_ENABLE_DYLOAD - (void)status; /* unused */ -#elif !UCONFIG_NO_FILE_IO - CharString plugin_dir; - const char *env = getenv("ICU_PLUGINS"); - - if(U_FAILURE(*status)) return; - if(env != NULL) { - plugin_dir.append(env, -1, *status); - } - if(U_FAILURE(*status)) return; - -#if defined(DEFAULT_ICU_PLUGINS) - if(plugin_dir.isEmpty()) { - plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status); - } -#endif - -#if UPLUG_TRACE - DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data())); -#endif - - if(!plugin_dir.isEmpty()) { - FILE *f; - - CharString pluginFile; -#ifdef OS390BATCH -/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */ -/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */ -/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */ -/* System Services. Alternative techniques might be allocating a member in */ -/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?). The */ -/* DDNAME can be connected to a file in the HFS if need be. */ - - pluginFile.append("//DD:ICUPLUG", -1, *status); /* JAM 20 Oct 2011 */ -#else - pluginFile.append(plugin_dir, *status); - pluginFile.append(U_FILE_SEP_STRING, -1, *status); - pluginFile.append("icuplugins", -1, *status); - pluginFile.append(U_ICU_VERSION_SHORT, -1, *status); - pluginFile.append(".txt", -1, *status); -#endif - -#if UPLUG_TRACE - DBG((stderr, "status=%s\n", u_errorName(*status))); -#endif - - if(U_FAILURE(*status)) { - return; - } - if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) { - *status = U_BUFFER_OVERFLOW_ERROR; -#if UPLUG_TRACE - DBG((stderr, "status=%s\n", u_errorName(*status))); -#endif - return; - } - - /* plugin_file is not used for processing - it is only used - so that uplug_getPluginFile() works (i.e. icuinfo) - */ - uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file)); - -#if UPLUG_TRACE - DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file))); -#endif - -#ifdef __MVS__ - if (iscics()) /* 12 Nov 2011 JAM */ - { - f = NULL; - } - else -#endif - { - f = fopen(pluginFile.data(), "r"); - } - - if(f != NULL) { - char linebuf[1024]; - char *p, *libName=NULL, *symName=NULL, *config=NULL; - int32_t line = 0; - - - while(fgets(linebuf,1023,f)) { - line++; - - if(!*linebuf || *linebuf=='#') { - continue; - } else { - p = linebuf; - while(*p&&isspace((int)*p)) - p++; - if(!*p || *p=='#') continue; - libName = p; - while(*p&&!isspace((int)*p)) { - p++; - } - if(!*p || *p=='#') continue; /* no tab after libname */ - *p=0; /* end of libname */ - p++; - while(*p&&isspace((int)*p)) { - p++; - } - if(!*p||*p=='#') continue; /* no symname after libname +tab */ - symName = p; - while(*p&&!isspace((int)*p)) { - p++; - } - - if(*p) { /* has config */ - *p=0; - ++p; - while(*p&&isspace((int)*p)) { - p++; - } - if(*p) { - config = p; - } - } - - /* chop whitespace at the end of the config */ - if(config!=NULL&&*config!=0) { - p = config+strlen(config); - while(p>config&&isspace((int)*(--p))) { - *p=0; - } - } - - /* OK, we're good. */ - { - UErrorCode subStatus = U_ZERO_ERROR; - UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus); - if(U_FAILURE(subStatus) && U_SUCCESS(*status)) { - *status = subStatus; - } -#if UPLUG_TRACE - DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config)); - DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus))); -#else - (void)plug; /* unused */ -#endif - } - } - } - fclose(f); - } else { -#if UPLUG_TRACE - DBG((stderr, "Can't open plugin file %s\n", plugin_file)); -#endif - } - } - uplug_loadWaitingPlugs(status); -#endif /* U_ENABLE_DYLOAD */ - gCurrentLevel = UPLUG_LEVEL_HIGH; - ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup); -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/icuplugimp.h b/deps/node/deps/icu-small/source/common/icuplugimp.h deleted file mode 100644 index 282c639b..00000000 --- a/deps/node/deps/icu-small/source/common/icuplugimp.h +++ /dev/null @@ -1,93 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2009-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : icuplugimp.h -* -* Internal functions for the ICU plugin system -* -* Date Name Description -* 10/29/2009 sl New. -****************************************************************************** -*/ - - -#ifndef ICUPLUGIMP_H -#define ICUPLUGIMP_H - -#include "unicode/icuplug.h" - -#if UCONFIG_ENABLE_PLUGINS - -/*========================*/ -/** @{ Library Manipulation - */ - -/** - * Open a library, adding a reference count if needed. - * @param libName library name to load - * @param status error code - * @return the library pointer, or NULL - * @internal internal use only - */ -U_INTERNAL void * U_EXPORT2 -uplug_openLibrary(const char *libName, UErrorCode *status); - -/** - * Close a library, if its reference count is 0 - * @param lib the library to close - * @param status error code - * @internal internal use only - */ -U_INTERNAL void U_EXPORT2 -uplug_closeLibrary(void *lib, UErrorCode *status); - -/** - * Get a library's name, or NULL if not found. - * @param lib the library's name - * @param status error code - * @return the library name, or NULL if not found. - * @internal internal use only - */ -U_INTERNAL char * U_EXPORT2 -uplug_findLibrary(void *lib, UErrorCode *status); - -/** @} */ - -/*========================*/ -/** {@ ICU Plugin internal interfaces - */ - -/** - * Initialize the plugins - * @param status error result - * @internal - Internal use only. - */ -U_INTERNAL void U_EXPORT2 -uplug_init(UErrorCode *status); - -/** - * Get raw plug N - * @internal - Internal use only - */ -U_INTERNAL UPlugData* U_EXPORT2 -uplug_getPlugInternal(int32_t n); - -/** - * Get the name of the plugin file. - * @internal - Internal use only. - */ -U_INTERNAL const char* U_EXPORT2 -uplug_getPluginFile(void); - -/** @} */ - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/loadednormalizer2impl.cpp b/deps/node/deps/icu-small/source/common/loadednormalizer2impl.cpp deleted file mode 100644 index 82cb325b..00000000 --- a/deps/node/deps/icu-small/source/common/loadednormalizer2impl.cpp +++ /dev/null @@ -1,418 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* loadednormalizer2impl.cpp -* -* created on: 2014sep03 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/udata.h" -#include "unicode/localpointer.h" -#include "unicode/normalizer2.h" -#include "unicode/ucptrie.h" -#include "unicode/unistr.h" -#include "unicode/unorm.h" -#include "cstring.h" -#include "mutex.h" -#include "norm2allmodes.h" -#include "normalizer2impl.h" -#include "uassert.h" -#include "ucln_cmn.h" -#include "uhash.h" - -U_NAMESPACE_BEGIN - -class LoadedNormalizer2Impl : public Normalizer2Impl { -public: - LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {} - virtual ~LoadedNormalizer2Impl(); - - void load(const char *packageName, const char *name, UErrorCode &errorCode); - -private: - static UBool U_CALLCONV - isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); - - UDataMemory *memory; - UCPTrie *ownedTrie; -}; - -LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { - udata_close(memory); - ucptrie_close(ownedTrie); -} - -UBool U_CALLCONV -LoadedNormalizer2Impl::isAcceptable(void * /*context*/, - const char * /* type */, const char * /*name*/, - const UDataInfo *pInfo) { - if( - pInfo->size>=20 && - pInfo->isBigEndian==U_IS_BIG_ENDIAN && - pInfo->charsetFamily==U_CHARSET_FAMILY && - pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ - pInfo->dataFormat[1]==0x72 && - pInfo->dataFormat[2]==0x6d && - pInfo->dataFormat[3]==0x32 && - pInfo->formatVersion[0]==4 - ) { - // Normalizer2Impl *me=(Normalizer2Impl *)context; - // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); - return TRUE; - } else { - return FALSE; - } -} - -void -LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); - if(U_FAILURE(errorCode)) { - return; - } - const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); - const int32_t *inIndexes=(const int32_t *)inBytes; - int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; - if(indexesLength<=IX_MIN_LCCC_CP) { - errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. - return; - } - - int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; - int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; - ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, - inBytes+offset, nextOffset-offset, NULL, - &errorCode); - if(U_FAILURE(errorCode)) { - return; - } - - offset=nextOffset; - nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; - const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset); - - // smallFCD: new in formatVersion 2 - offset=nextOffset; - const uint8_t *inSmallFCD=inBytes+offset; - - init(inIndexes, ownedTrie, inExtraData, inSmallFCD); -} - -// instance cache ---------------------------------------------------------- *** - -Norm2AllModes * -Norm2AllModes::createInstance(const char *packageName, - const char *name, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl; - if(impl==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - impl->load(packageName, name, errorCode); - return createInstance(impl, errorCode); -} - -U_CDECL_BEGIN -static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); -U_CDECL_END - -#if !NORM2_HARDCODE_NFC_DATA -static Norm2AllModes *nfcSingleton; -static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; -#endif - -static Norm2AllModes *nfkcSingleton; -static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; - -static Norm2AllModes *nfkc_cfSingleton; -static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; - -static UHashtable *cache=NULL; - -// UInitOnce singleton initialization function -static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { -#if !NORM2_HARDCODE_NFC_DATA - if (uprv_strcmp(what, "nfc") == 0) { - nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); - } else -#endif - if (uprv_strcmp(what, "nfkc") == 0) { - nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); - } else if (uprv_strcmp(what, "nfkc_cf") == 0) { - nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); - } else { - U_ASSERT(FALSE); // Unknown singleton - } - ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); -} - -U_CDECL_BEGIN - -static void U_CALLCONV deleteNorm2AllModes(void *allModes) { - delete (Norm2AllModes *)allModes; -} - -static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { -#if !NORM2_HARDCODE_NFC_DATA - delete nfcSingleton; - nfcSingleton = NULL; - nfcInitOnce.reset(); -#endif - - delete nfkcSingleton; - nfkcSingleton = NULL; - nfkcInitOnce.reset(); - - delete nfkc_cfSingleton; - nfkc_cfSingleton = NULL; - nfkc_cfInitOnce.reset(); - - uhash_close(cache); - cache=NULL; - return TRUE; -} - -U_CDECL_END - -#if !NORM2_HARDCODE_NFC_DATA -const Norm2AllModes * -Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); - return nfcSingleton; -} -#endif - -const Norm2AllModes * -Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); - return nfkcSingleton; -} - -const Norm2AllModes * -Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); - return nfkc_cfSingleton; -} - -#if !NORM2_HARDCODE_NFC_DATA -const Normalizer2 * -Normalizer2::getNFCInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->comp : NULL; -} - -const Normalizer2 * -Normalizer2::getNFDInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->decomp : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->fcd : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->fcc : NULL; -} - -const Normalizer2Impl * -Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? allModes->impl : NULL; -} -#endif - -const Normalizer2 * -Normalizer2::getNFKCInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); - return allModes!=NULL ? &allModes->comp : NULL; -} - -const Normalizer2 * -Normalizer2::getNFKDInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); - return allModes!=NULL ? &allModes->decomp : NULL; -} - -const Normalizer2 * -Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); - return allModes!=NULL ? &allModes->comp : NULL; -} - -const Normalizer2 * -Normalizer2::getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - if(name==NULL || *name==0) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - const Norm2AllModes *allModes=NULL; - if(packageName==NULL) { - if(0==uprv_strcmp(name, "nfc")) { - allModes=Norm2AllModes::getNFCInstance(errorCode); - } else if(0==uprv_strcmp(name, "nfkc")) { - allModes=Norm2AllModes::getNFKCInstance(errorCode); - } else if(0==uprv_strcmp(name, "nfkc_cf")) { - allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); - } - } - if(allModes==NULL && U_SUCCESS(errorCode)) { - { - Mutex lock; - if(cache!=NULL) { - allModes=(Norm2AllModes *)uhash_get(cache, name); - } - } - if(allModes==NULL) { - ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); - LocalPointer localAllModes( - Norm2AllModes::createInstance(packageName, name, errorCode)); - if(U_SUCCESS(errorCode)) { - Mutex lock; - if(cache==NULL) { - cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); - if(U_FAILURE(errorCode)) { - return NULL; - } - uhash_setKeyDeleter(cache, uprv_free); - uhash_setValueDeleter(cache, deleteNorm2AllModes); - } - void *temp=uhash_get(cache, name); - if(temp==NULL) { - int32_t keyLength= static_cast(uprv_strlen(name)+1); - char *nameCopy=(char *)uprv_malloc(keyLength); - if(nameCopy==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(nameCopy, name, keyLength); - allModes=localAllModes.getAlias(); - uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode); - } else { - // race condition - allModes=(Norm2AllModes *)temp; - } - } - } - } - if(allModes!=NULL && U_SUCCESS(errorCode)) { - switch(mode) { - case UNORM2_COMPOSE: - return &allModes->comp; - case UNORM2_DECOMPOSE: - return &allModes->decomp; - case UNORM2_FCD: - return &allModes->fcd; - case UNORM2_COMPOSE_CONTIGUOUS: - return &allModes->fcc; - default: - break; // do nothing - } - } - return NULL; -} - -const Normalizer2 * -Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - switch(mode) { - case UNORM_NFD: - return Normalizer2::getNFDInstance(errorCode); - case UNORM_NFKD: - return Normalizer2::getNFKDInstance(errorCode); - case UNORM_NFC: - return Normalizer2::getNFCInstance(errorCode); - case UNORM_NFKC: - return Normalizer2::getNFKCInstance(errorCode); - case UNORM_FCD: - return getFCDInstance(errorCode); - default: // UNORM_NONE - return getNoopInstance(errorCode); - } -} - -const Normalizer2Impl * -Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); - return allModes!=NULL ? allModes->impl : NULL; -} - -const Normalizer2Impl * -Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); - return allModes!=NULL ? allModes->impl : NULL; -} - -U_NAMESPACE_END - -// C API ------------------------------------------------------------------- *** - -U_NAMESPACE_USE - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); -} - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFKDInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); -} - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); -} - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); -} - -U_CFUNC UNormalizationCheckResult -unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { - if(mode<=UNORM_NONE || UNORM_FCD<=mode) { - return UNORM_YES; - } - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); - if(U_SUCCESS(errorCode)) { - return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); - } else { - return UNORM_MAYBE; - } -} - -#endif // !UCONFIG_NO_NORMALIZATION diff --git a/deps/node/deps/icu-small/source/common/localsvc.h b/deps/node/deps/icu-small/source/common/localsvc.h deleted file mode 100644 index 724216aa..00000000 --- a/deps/node/deps/icu-small/source/common/localsvc.h +++ /dev/null @@ -1,27 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -*************************************************************************** -* Copyright (C) 2006 International Business Machines Corporation * -* and others. All rights reserved. * -*************************************************************************** -*/ - -#ifndef LOCALSVC_H -#define LOCALSVC_H - -#include "unicode/utypes.h" - -#if U_LOCAL_SERVICE_HOOK -/** - * Prototype for user-supplied service hook. This function is expected to return - * a type of factory object specific to the requested service. - * - * @param what service-specific string identifying the specific user hook - * @param status error status - * @return a service-specific hook, or NULL on failure. - */ -U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status); -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/locavailable.cpp b/deps/node/deps/icu-small/source/common/locavailable.cpp deleted file mode 100644 index b3a3346a..00000000 --- a/deps/node/deps/icu-small/source/common/locavailable.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1997-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: locavailable.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010feb25 -* created by: Markus W. Scherer -* -* Code for available locales, separated out from other .cpp files -* that then do not depend on resource bundle code and res_index bundles. -*/ - -#include "unicode/utypes.h" -#include "unicode/locid.h" -#include "unicode/uloc.h" -#include "unicode/ures.h" -#include "cmemory.h" -#include "ucln_cmn.h" -#include "uassert.h" -#include "umutex.h" -#include "uresimp.h" - -// C++ API ----------------------------------------------------------------- *** - -U_NAMESPACE_BEGIN - -static icu::Locale* availableLocaleList = NULL; -static int32_t availableLocaleListCount; -static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER; - -U_NAMESPACE_END - -U_CDECL_BEGIN - -static UBool U_CALLCONV locale_available_cleanup(void) -{ - U_NAMESPACE_USE - - if (availableLocaleList) { - delete []availableLocaleList; - availableLocaleList = NULL; - } - availableLocaleListCount = 0; - gInitOnceLocale.reset(); - - return TRUE; -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -void U_CALLCONV locale_available_init() { - // This function is a friend of class Locale. - // This function is only invoked via umtx_initOnce(). - - // for now, there is a hardcoded list, so just walk through that list and set it up. - // Note: this function is a friend of class Locale. - availableLocaleListCount = uloc_countAvailable(); - if(availableLocaleListCount) { - availableLocaleList = new Locale[availableLocaleListCount]; - } - if (availableLocaleList == NULL) { - availableLocaleListCount= 0; - } - for (int32_t locCount=availableLocaleListCount-1; locCount>=0; --locCount) { - availableLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount)); - } - ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup); -} - -const Locale* U_EXPORT2 -Locale::getAvailableLocales(int32_t& count) -{ - umtx_initOnce(gInitOnceLocale, &locale_available_init); - count = availableLocaleListCount; - return availableLocaleList; -} - - -U_NAMESPACE_END - -// C API ------------------------------------------------------------------- *** - -U_NAMESPACE_USE - -/* ### Constants **************************************************/ - -/* These strings describe the resources we attempt to load from - the locale ResourceBundle data file.*/ -static const char _kIndexLocaleName[] = "res_index"; -static const char _kIndexTag[] = "InstalledLocales"; - -static char** _installedLocales = NULL; -static int32_t _installedLocalesCount = 0; -static icu::UInitOnce _installedLocalesInitOnce; - -/* ### Get available **************************************************/ - -static UBool U_CALLCONV uloc_cleanup(void) { - char ** temp; - - if (_installedLocales) { - temp = _installedLocales; - _installedLocales = NULL; - - _installedLocalesCount = 0; - _installedLocalesInitOnce.reset(); - - uprv_free(temp); - } - return TRUE; -} - -// Load Installed Locales. This function will be called exactly once -// via the initOnce mechanism. - -static void U_CALLCONV loadInstalledLocales() { - UResourceBundle *indexLocale = NULL; - UResourceBundle installed; - UErrorCode status = U_ZERO_ERROR; - int32_t i = 0; - int32_t localeCount; - - U_ASSERT(_installedLocales == NULL); - U_ASSERT(_installedLocalesCount == 0); - - _installedLocalesCount = 0; - ures_initStackObject(&installed); - indexLocale = ures_openDirect(NULL, _kIndexLocaleName, &status); - ures_getByKey(indexLocale, _kIndexTag, &installed, &status); - - if(U_SUCCESS(status)) { - localeCount = ures_getSize(&installed); - _installedLocales = (char **) uprv_malloc(sizeof(char*) * (localeCount+1)); - if (_installedLocales != NULL) { - ures_resetIterator(&installed); - while(ures_hasNext(&installed)) { - ures_getNextString(&installed, NULL, (const char **)&_installedLocales[i++], &status); - } - _installedLocales[i] = NULL; - _installedLocalesCount = localeCount; - ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup); - } - } - ures_close(&installed); - ures_close(indexLocale); -} - -static void _load_installedLocales() -{ - umtx_initOnce(_installedLocalesInitOnce, &loadInstalledLocales); -} - -U_CAPI const char* U_EXPORT2 -uloc_getAvailable(int32_t offset) -{ - - _load_installedLocales(); - - if (offset > _installedLocalesCount) - return NULL; - return _installedLocales[offset]; -} - -U_CAPI int32_t U_EXPORT2 -uloc_countAvailable() -{ - _load_installedLocales(); - return _installedLocalesCount; -} diff --git a/deps/node/deps/icu-small/source/common/locbased.cpp b/deps/node/deps/icu-small/source/common/locbased.cpp deleted file mode 100644 index ff378b4c..00000000 --- a/deps/node/deps/icu-small/source/common/locbased.cpp +++ /dev/null @@ -1,55 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2004-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: January 16 2004 -* Since: ICU 2.8 -********************************************************************** -*/ -#include "locbased.h" -#include "cstring.h" - -U_NAMESPACE_BEGIN - -Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const { - const char* id = getLocaleID(type, status); - return Locale((id != 0) ? id : ""); -} - -const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { - if (U_FAILURE(status)) { - return NULL; - } - - switch(type) { - case ULOC_VALID_LOCALE: - return valid; - case ULOC_ACTUAL_LOCALE: - return actual; - default: - status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } -} - -void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) { - if (validID != 0) { - uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY); - valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate - } - if (actualID != 0) { - uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY); - actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate - } -} - -void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) { - uprv_strcpy(valid, validID.getName()); - uprv_strcpy(actual, actualID.getName()); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/locbased.h b/deps/node/deps/icu-small/source/common/locbased.h deleted file mode 100644 index 6db6a41d..00000000 --- a/deps/node/deps/icu-small/source/common/locbased.h +++ /dev/null @@ -1,107 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2004-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: January 16 2004 -* Since: ICU 2.8 -********************************************************************** -*/ -#ifndef LOCBASED_H -#define LOCBASED_H - -#include "unicode/locid.h" -#include "unicode/uobject.h" - -/** - * Macro to declare a locale LocaleBased wrapper object for the given - * object, which must have two members named `validLocale' and - * `actualLocale' of size ULOC_FULLNAME_CAPACITY - */ -#define U_LOCALE_BASED(varname, objname) \ - LocaleBased varname((objname).validLocale, (objname).actualLocale); - -U_NAMESPACE_BEGIN - -/** - * A utility class that unifies the implementation of getLocale() by - * various ICU services. This class is likely to be removed in the - * ICU 3.0 time frame in favor of an integrated approach with the - * services framework. - * @since ICU 2.8 - */ -class U_COMMON_API LocaleBased : public UMemory { - - public: - - /** - * Construct a LocaleBased wrapper around the two pointers. These - * will be aliased for the lifetime of this object. - */ - inline LocaleBased(char* validAlias, char* actualAlias); - - /** - * Construct a LocaleBased wrapper around the two const pointers. - * These will be aliased for the lifetime of this object. - */ - inline LocaleBased(const char* validAlias, const char* actualAlias); - - /** - * Return locale meta-data for the service object wrapped by this - * object. Either the valid or the actual locale may be - * retrieved. - * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE - * @param status input-output error code - * @return the indicated locale - */ - Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; - - /** - * Return the locale ID for the service object wrapped by this - * object. Either the valid or the actual locale may be - * retrieved. - * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE - * @param status input-output error code - * @return the indicated locale ID - */ - const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; - - /** - * Set the locale meta-data for the service object wrapped by this - * object. If either parameter is zero, it is ignored. - * @param valid the ID of the valid locale - * @param actual the ID of the actual locale - */ - void setLocaleIDs(const char* valid, const char* actual); - - /** - * Set the locale meta-data for the service object wrapped by this - * object. - * @param valid the ID of the valid locale - * @param actual the ID of the actual locale - */ - void setLocaleIDs(const Locale& valid, const Locale& actual); - - private: - - char* valid; - - char* actual; -}; - -inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) : - valid(validAlias), actual(actualAlias) { -} - -inline LocaleBased::LocaleBased(const char* validAlias, - const char* actualAlias) : - // ugh: cast away const - valid((char*)validAlias), actual((char*)actualAlias) { -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/locdispnames.cpp b/deps/node/deps/icu-small/source/common/locdispnames.cpp deleted file mode 100644 index 83c7bc30..00000000 --- a/deps/node/deps/icu-small/source/common/locdispnames.cpp +++ /dev/null @@ -1,886 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: locdispnames.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010feb25 -* created by: Markus W. Scherer -* -* Code for locale display names, separated out from other .cpp files -* that then do not depend on resource bundle code and display name data. -*/ - -#include "unicode/utypes.h" -#include "unicode/brkiter.h" -#include "unicode/locid.h" -#include "unicode/uloc.h" -#include "unicode/ures.h" -#include "unicode/ustring.h" -#include "cmemory.h" -#include "cstring.h" -#include "putilimp.h" -#include "ulocimp.h" -#include "uresimp.h" -#include "ureslocs.h" -#include "ustr_imp.h" - -// C++ API ----------------------------------------------------------------- *** - -U_NAMESPACE_BEGIN - -UnicodeString& -Locale::getDisplayLanguage(UnicodeString& dispLang) const -{ - return this->getDisplayLanguage(getDefault(), dispLang); -} - -/*We cannot make any assumptions on the size of the output display strings -* Yet, since we are calling through to a C API, we need to set limits on -* buffer size. For all the following getDisplay functions we first attempt -* to fill up a stack allocated buffer. If it is to small we heap allocated -* the exact buffer we need copy it to the UnicodeString and delete it*/ - -UnicodeString& -Locale::getDisplayLanguage(const Locale &displayLocale, - UnicodeString &result) const { - UChar *buffer; - UErrorCode errorCode=U_ZERO_ERROR; - int32_t length; - - buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); - if(buffer==0) { - result.truncate(0); - return result; - } - - length=uloc_getDisplayLanguage(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - buffer=result.getBuffer(length); - if(buffer==0) { - result.truncate(0); - return result; - } - errorCode=U_ZERO_ERROR; - length=uloc_getDisplayLanguage(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - } - - return result; -} - -UnicodeString& -Locale::getDisplayScript(UnicodeString& dispScript) const -{ - return this->getDisplayScript(getDefault(), dispScript); -} - -UnicodeString& -Locale::getDisplayScript(const Locale &displayLocale, - UnicodeString &result) const { - UChar *buffer; - UErrorCode errorCode=U_ZERO_ERROR; - int32_t length; - - buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); - if(buffer==0) { - result.truncate(0); - return result; - } - - length=uloc_getDisplayScript(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - buffer=result.getBuffer(length); - if(buffer==0) { - result.truncate(0); - return result; - } - errorCode=U_ZERO_ERROR; - length=uloc_getDisplayScript(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - } - - return result; -} - -UnicodeString& -Locale::getDisplayCountry(UnicodeString& dispCntry) const -{ - return this->getDisplayCountry(getDefault(), dispCntry); -} - -UnicodeString& -Locale::getDisplayCountry(const Locale &displayLocale, - UnicodeString &result) const { - UChar *buffer; - UErrorCode errorCode=U_ZERO_ERROR; - int32_t length; - - buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); - if(buffer==0) { - result.truncate(0); - return result; - } - - length=uloc_getDisplayCountry(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - buffer=result.getBuffer(length); - if(buffer==0) { - result.truncate(0); - return result; - } - errorCode=U_ZERO_ERROR; - length=uloc_getDisplayCountry(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - } - - return result; -} - -UnicodeString& -Locale::getDisplayVariant(UnicodeString& dispVar) const -{ - return this->getDisplayVariant(getDefault(), dispVar); -} - -UnicodeString& -Locale::getDisplayVariant(const Locale &displayLocale, - UnicodeString &result) const { - UChar *buffer; - UErrorCode errorCode=U_ZERO_ERROR; - int32_t length; - - buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); - if(buffer==0) { - result.truncate(0); - return result; - } - - length=uloc_getDisplayVariant(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - buffer=result.getBuffer(length); - if(buffer==0) { - result.truncate(0); - return result; - } - errorCode=U_ZERO_ERROR; - length=uloc_getDisplayVariant(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - } - - return result; -} - -UnicodeString& -Locale::getDisplayName( UnicodeString& name ) const -{ - return this->getDisplayName(getDefault(), name); -} - -UnicodeString& -Locale::getDisplayName(const Locale &displayLocale, - UnicodeString &result) const { - UChar *buffer; - UErrorCode errorCode=U_ZERO_ERROR; - int32_t length; - - buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); - if(buffer==0) { - result.truncate(0); - return result; - } - - length=uloc_getDisplayName(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - buffer=result.getBuffer(length); - if(buffer==0) { - result.truncate(0); - return result; - } - errorCode=U_ZERO_ERROR; - length=uloc_getDisplayName(fullName, displayLocale.fullName, - buffer, result.getCapacity(), - &errorCode); - result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); - } - - return result; -} - -#if ! UCONFIG_NO_BREAK_ITERATION - -// ------------------------------------- -// Gets the objectLocale display name in the default locale language. -UnicodeString& U_EXPORT2 -BreakIterator::getDisplayName(const Locale& objectLocale, - UnicodeString& name) -{ - return objectLocale.getDisplayName(name); -} - -// ------------------------------------- -// Gets the objectLocale display name in the displayLocale language. -UnicodeString& U_EXPORT2 -BreakIterator::getDisplayName(const Locale& objectLocale, - const Locale& displayLocale, - UnicodeString& name) -{ - return objectLocale.getDisplayName(displayLocale, name); -} - -#endif - - -U_NAMESPACE_END - -// C API ------------------------------------------------------------------- *** - -U_NAMESPACE_USE - -/* ### Constants **************************************************/ - -/* These strings describe the resources we attempt to load from - the locale ResourceBundle data file.*/ -static const char _kLanguages[] = "Languages"; -static const char _kScripts[] = "Scripts"; -static const char _kScriptsStandAlone[] = "Scripts%stand-alone"; -static const char _kCountries[] = "Countries"; -static const char _kVariants[] = "Variants"; -static const char _kKeys[] = "Keys"; -static const char _kTypes[] = "Types"; -//static const char _kRootName[] = "root"; -static const char _kCurrency[] = "currency"; -static const char _kCurrencies[] = "Currencies"; -static const char _kLocaleDisplayPattern[] = "localeDisplayPattern"; -static const char _kPattern[] = "pattern"; -static const char _kSeparator[] = "separator"; - -/* ### Display name **************************************************/ - -static int32_t -_getStringOrCopyKey(const char *path, const char *locale, - const char *tableKey, - const char* subTableKey, - const char *itemKey, - const char *substitute, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - const UChar *s = NULL; - int32_t length = 0; - - if(itemKey==NULL) { - /* top-level item: normal resource bundle access */ - UResourceBundle *rb; - - rb=ures_open(path, locale, pErrorCode); - - if(U_SUCCESS(*pErrorCode)) { - s=ures_getStringByKey(rb, tableKey, &length, pErrorCode); - /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */ - ures_close(rb); - } - } else { - /* Language code should not be a number. If it is, set the error code. */ - if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) { - *pErrorCode = U_MISSING_RESOURCE_ERROR; - } else { - /* second-level item, use special fallback */ - s=uloc_getTableStringWithFallback(path, locale, - tableKey, - subTableKey, - itemKey, - &length, - pErrorCode); - } - } - - if(U_SUCCESS(*pErrorCode)) { - int32_t copyLength=uprv_min(length, destCapacity); - if(copyLength>0 && s != NULL) { - u_memcpy(dest, s, copyLength); - } - } else { - /* no string from a resource bundle: convert the substitute */ - length=(int32_t)uprv_strlen(substitute); - u_charsToUChars(substitute, dest, uprv_min(length, destCapacity)); - *pErrorCode=U_USING_DEFAULT_WARNING; - } - - return u_terminateUChars(dest, destCapacity, length, pErrorCode); -} - -typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *); - -static int32_t -_getDisplayNameForComponent(const char *locale, - const char *displayLocale, - UChar *dest, int32_t destCapacity, - UDisplayNameGetter *getter, - const char *tag, - UErrorCode *pErrorCode) { - char localeBuffer[ULOC_FULLNAME_CAPACITY*4]; - int32_t length; - UErrorCode localStatus; - const char* root = NULL; - - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - localStatus = U_ZERO_ERROR; - length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus); - if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - if(length==0) { - return u_terminateUChars(dest, destCapacity, 0, pErrorCode); - } - - root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG; - - return _getStringOrCopyKey(root, displayLocale, - tag, NULL, localeBuffer, - localeBuffer, - dest, destCapacity, - pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uloc_getDisplayLanguage(const char *locale, - const char *displayLocale, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, - uloc_getLanguage, _kLanguages, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uloc_getDisplayScript(const char* locale, - const char* displayLocale, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) -{ - UErrorCode err = U_ZERO_ERROR; - int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, - uloc_getScript, _kScriptsStandAlone, &err); - - if ( err == U_USING_DEFAULT_WARNING ) { - return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, - uloc_getScript, _kScripts, pErrorCode); - } else { - *pErrorCode = err; - return res; - } -} - -U_INTERNAL int32_t U_EXPORT2 -uloc_getDisplayScriptInContext(const char* locale, - const char* displayLocale, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) -{ - return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, - uloc_getScript, _kScripts, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uloc_getDisplayCountry(const char *locale, - const char *displayLocale, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, - uloc_getCountry, _kCountries, pErrorCode); -} - -/* - * TODO separate variant1_variant2_variant3... - * by getting each tag's display string and concatenating them with ", " - * in between - similar to uloc_getDisplayName() - */ -U_CAPI int32_t U_EXPORT2 -uloc_getDisplayVariant(const char *locale, - const char *displayLocale, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, - uloc_getVariant, _kVariants, pErrorCode); -} - -/* Instead of having a separate pass for 'special' patterns, reintegrate the two - * so we don't get bitten by preflight bugs again. We can be reasonably efficient - * without two separate code paths, this code isn't that performance-critical. - * - * This code is general enough to deal with patterns that have a prefix or swap the - * language and remainder components, since we gave developers enough rope to do such - * things if they futz with the pattern data. But since we don't give them a way to - * specify a pattern for arbitrary combinations of components, there's not much use in - * that. I don't think our data includes such patterns, the only variable I know if is - * whether there is a space before the open paren, or not. Oh, and zh uses different - * chars than the standard open/close paren (which ja and ko use, btw). - */ -U_CAPI int32_t U_EXPORT2 -uloc_getDisplayName(const char *locale, - const char *displayLocale, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) -{ - static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */ - static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */ - static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */ - static const int32_t subLen = 3; - static const UChar defaultPattern[10] = { - 0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000 - }; /* {0} ({1}) */ - static const int32_t defaultPatLen = 9; - static const int32_t defaultSub0Pos = 0; - static const int32_t defaultSub1Pos = 5; - - int32_t length; /* of formatted result */ - - const UChar *separator; - int32_t sepLen = 0; - const UChar *pattern; - int32_t patLen = 0; - int32_t sub0Pos, sub1Pos; - - UChar formatOpenParen = 0x0028; // ( - UChar formatReplaceOpenParen = 0x005B; // [ - UChar formatCloseParen = 0x0029; // ) - UChar formatReplaceCloseParen = 0x005D; // ] - - UBool haveLang = TRUE; /* assume true, set false if we find we don't have - a lang component in the locale */ - UBool haveRest = TRUE; /* assume true, set false if we find we don't have - any other component in the locale */ - UBool retry = FALSE; /* set true if we need to retry, see below */ - - int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */ - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - { - UErrorCode status = U_ZERO_ERROR; - UResourceBundle* locbundle=ures_open(U_ICUDATA_LANG, displayLocale, &status); - UResourceBundle* dspbundle=ures_getByKeyWithFallback(locbundle, _kLocaleDisplayPattern, - NULL, &status); - - separator=ures_getStringByKeyWithFallback(dspbundle, _kSeparator, &sepLen, &status); - pattern=ures_getStringByKeyWithFallback(dspbundle, _kPattern, &patLen, &status); - - ures_close(dspbundle); - ures_close(locbundle); - } - - /* If we couldn't find any data, then use the defaults */ - if(sepLen == 0) { - separator = defaultSeparator; - } - /* #10244: Even though separator is now a pattern, it is awkward to handle it as such - * here since we are trying to build the display string in place in the dest buffer, - * and to handle it as a pattern would entail having separate storage for the - * substrings that need to be combined (the first of which may be the result of - * previous such combinations). So for now we continue to treat the portion between - * {0} and {1} as a string to be appended when joining substrings, ignoring anything - * that is before {0} or after {1} (no existing separator pattern has any such thing). - * This is similar to how pattern is handled below. - */ - { - UChar *p0=u_strstr(separator, sub0); - UChar *p1=u_strstr(separator, sub1); - if (p0==NULL || p1==NULL || p1(p1 - separator); - } - - if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) { - pattern=defaultPattern; - patLen=defaultPatLen; - sub0Pos=defaultSub0Pos; - sub1Pos=defaultSub1Pos; - // use default formatOpenParen etc. set above - } else { /* non-default pattern */ - UChar *p0=u_strstr(pattern, sub0); - UChar *p1=u_strstr(pattern, sub1); - if (p0==NULL || p1==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - sub0Pos = static_cast(p0-pattern); - sub1Pos = static_cast(p1-pattern); - if (sub1Pos < sub0Pos) { /* a very odd pattern */ - int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t; - langi=1; - } - if (u_strchr(pattern, 0xFF08) != NULL) { - formatOpenParen = 0xFF08; // fullwidth ( - formatReplaceOpenParen = 0xFF3B; // fullwidth [ - formatCloseParen = 0xFF09; // fullwidth ) - formatReplaceCloseParen = 0xFF3D; // fullwidth ] - } - } - - /* We loop here because there is one case in which after the first pass we could need to - * reextract the data. If there's initial padding before the first element, we put in - * the padding and then write that element. If it turns out there's no second element, - * we didn't need the padding. If we do need the data (no preflight), and the first element - * would have fit but for the padding, we need to reextract. In this case (only) we - * adjust the parameters so padding is not added, and repeat. - */ - do { - UChar* p=dest; - int32_t patPos=0; /* position in the pattern, used for non-substitution portions */ - int32_t langLen=0; /* length of language substitution */ - int32_t langPos=0; /* position in output of language substitution */ - int32_t restLen=0; /* length of 'everything else' substitution */ - int32_t restPos=0; /* position in output of 'everything else' substitution */ - UEnumeration* kenum = NULL; /* keyword enumeration */ - - /* prefix of pattern, extremely likely to be empty */ - if(sub0Pos) { - if(destCapacity >= sub0Pos) { - while (patPos < sub0Pos) { - *p++ = pattern[patPos++]; - } - } else { - patPos=sub0Pos; - } - length=sub0Pos; - } else { - length=0; - } - - for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/ - UBool subdone = FALSE; /* set true when ready to move to next substitution */ - - /* prep p and cap for calls to get display components, pin cap to 0 since - they complain if cap is negative */ - int32_t cap=destCapacity-length; - if (cap <= 0) { - cap=0; - } else { - p=dest+length; - } - - if (subi == langi) { /* {0}*/ - if(haveLang) { - langPos=length; - langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode); - length+=langLen; - haveLang=langLen>0; - } - subdone=TRUE; - } else { /* {1} */ - if(!haveRest) { - subdone=TRUE; - } else { - int32_t len; /* length of component (plus other stuff) we just fetched */ - switch(resti++) { - case 0: - restPos=length; - len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode); - break; - case 1: - len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode); - break; - case 2: - len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode); - break; - case 3: - kenum = uloc_openKeywords(locale, pErrorCode); - U_FALLTHROUGH; - default: { - const char* kw=uenum_next(kenum, &len, pErrorCode); - if (kw == NULL) { - uenum_close(kenum); - len=0; /* mark that we didn't add a component */ - subdone=TRUE; - } else { - /* incorporating this behavior into the loop made it even more complex, - so just special case it here */ - len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode); - if(len) { - if(len < cap) { - p[len]=0x3d; /* '=', assume we'll need it */ - } - len+=1; - - /* adjust for call to get keyword */ - cap-=len; - if(cap <= 0) { - cap=0; - } else { - p+=len; - } - } - /* reset for call below */ - if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { - *pErrorCode=U_ZERO_ERROR; - } - int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale, - p, cap, pErrorCode); - if(len) { - if(vlen==0) { - --len; /* remove unneeded '=' */ - } - /* restore cap and p to what they were at start */ - cap=destCapacity-length; - if(cap <= 0) { - cap=0; - } else { - p=dest+length; - } - } - len+=vlen; /* total we added for key + '=' + value */ - } - } break; - } /* end switch */ - - if (len>0) { - /* we addeed a component, so add separator and write it if there's room. */ - if(len+sepLen<=cap) { - const UChar * plimit = p + len; - for (; p < plimit; p++) { - if (*p == formatOpenParen) { - *p = formatReplaceOpenParen; - } else if (*p == formatCloseParen) { - *p = formatReplaceCloseParen; - } - } - for(int32_t i=0;i0; - } - } - } - - if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { - *pErrorCode=U_ZERO_ERROR; - } - - if(subdone) { - if(haveLang && haveRest) { - /* append internal portion of pattern, the first time, - or last portion of pattern the second time */ - int32_t padLen; - patPos+=subLen; - padLen=(subi==0 ? sub1Pos : patLen)-patPos; - if(length+padLen < destCapacity) { - p=dest+length; - for(int32_t i=0;i0) { - /* true length is the length of just the component we got. */ - length=haveLang?langLen:restLen; - if(dest && sub0Pos!=0) { - if (sub0Pos+length<=destCapacity) { - /* first component not at start of result, - but we have full component in buffer. */ - u_memmove(dest, dest+(haveLang?langPos:restPos), length); - } else { - /* would have fit, but didn't because of pattern prefix. */ - sub0Pos=0; /* stops initial padding (and a second retry, - so we won't end up here again) */ - retry=TRUE; - } - } - } - - ++subi; /* move on to next substitution */ - } - } - } while(retry); - - return u_terminateUChars(dest, destCapacity, length, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uloc_getDisplayKeyword(const char* keyword, - const char* displayLocale, - UChar* dest, - int32_t destCapacity, - UErrorCode* status){ - - /* argument checking */ - if(status==NULL || U_FAILURE(*status)) { - return 0; - } - - if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - - /* pass itemKey=NULL to look for a top-level item */ - return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale, - _kKeys, NULL, - keyword, - keyword, - dest, destCapacity, - status); - -} - - -#define UCURRENCY_DISPLAY_NAME_INDEX 1 - -U_CAPI int32_t U_EXPORT2 -uloc_getDisplayKeywordValue( const char* locale, - const char* keyword, - const char* displayLocale, - UChar* dest, - int32_t destCapacity, - UErrorCode* status){ - - - char keywordValue[ULOC_FULLNAME_CAPACITY*4]; - int32_t capacity = ULOC_FULLNAME_CAPACITY*4; - int32_t keywordValueLen =0; - - /* argument checking */ - if(status==NULL || U_FAILURE(*status)) { - return 0; - } - - if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* get the keyword value */ - keywordValue[0]=0; - keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status); - if (*status == U_STRING_NOT_TERMINATED_WARNING) - *status = U_BUFFER_OVERFLOW_ERROR; - - /* - * if the keyword is equal to currency .. then to get the display name - * we need to do the fallback ourselves - */ - if(uprv_stricmp(keyword, _kCurrency)==0){ - - int32_t dispNameLen = 0; - const UChar *dispName = NULL; - - UResourceBundle *bundle = ures_open(U_ICUDATA_CURR, displayLocale, status); - UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status); - UResourceBundle *currency = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status); - - dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status); - - /*close the bundles */ - ures_close(currency); - ures_close(currencies); - ures_close(bundle); - - if(U_FAILURE(*status)){ - if(*status == U_MISSING_RESOURCE_ERROR){ - /* we just want to write the value over if nothing is available */ - *status = U_USING_DEFAULT_WARNING; - }else{ - return 0; - } - } - - /* now copy the dispName over if not NULL */ - if(dispName != NULL){ - if(dispNameLen <= destCapacity){ - u_memcpy(dest, dispName, dispNameLen); - return u_terminateUChars(dest, destCapacity, dispNameLen, status); - }else{ - *status = U_BUFFER_OVERFLOW_ERROR; - return dispNameLen; - } - }else{ - /* we have not found the display name for the value .. just copy over */ - if(keywordValueLen <= destCapacity){ - u_charsToUChars(keywordValue, dest, keywordValueLen); - return u_terminateUChars(dest, destCapacity, keywordValueLen, status); - }else{ - *status = U_BUFFER_OVERFLOW_ERROR; - return keywordValueLen; - } - } - - - }else{ - - return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale, - _kTypes, keyword, - keywordValue, - keywordValue, - dest, destCapacity, - status); - } -} diff --git a/deps/node/deps/icu-small/source/common/locdspnm.cpp b/deps/node/deps/icu-small/source/common/locdspnm.cpp deleted file mode 100644 index 2d9389e9..00000000 --- a/deps/node/deps/icu-small/source/common/locdspnm.cpp +++ /dev/null @@ -1,1058 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2016, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/locdspnm.h" -#include "unicode/simpleformatter.h" -#include "unicode/ucasemap.h" -#include "unicode/ures.h" -#include "unicode/udisplaycontext.h" -#include "unicode/brkiter.h" -#include "unicode/ucurr.h" -#include "cmemory.h" -#include "cstring.h" -#include "mutex.h" -#include "ulocimp.h" -#include "umutex.h" -#include "ureslocs.h" -#include "uresimp.h" - -#include - -/** - * Concatenate a number of null-terminated strings to buffer, leaving a - * null-terminated string. The last argument should be the null pointer. - * Return the length of the string in the buffer, not counting the trailing - * null. Return -1 if there is an error (buffer is null, or buflen < 1). - */ -static int32_t ncat(char *buffer, uint32_t buflen, ...) { - va_list args; - char *str; - char *p = buffer; - const char* e = buffer + buflen - 1; - - if (buffer == NULL || buflen < 1) { - return -1; - } - - va_start(args, buflen); - while ((str = va_arg(args, char *)) != 0) { - char c; - while (p != e && (c = *str++) != 0) { - *p++ = c; - } - } - *p = 0; - va_end(args); - - return static_cast(p - buffer); -} - -U_NAMESPACE_BEGIN - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -// Access resource data for locale components. -// Wrap code in uloc.c for now. -class ICUDataTable { - const char* path; - Locale locale; - -public: - ICUDataTable(const char* path, const Locale& locale); - ~ICUDataTable(); - - const Locale& getLocale(); - - UnicodeString& get(const char* tableKey, const char* itemKey, - UnicodeString& result) const; - UnicodeString& get(const char* tableKey, const char* subTableKey, const char* itemKey, - UnicodeString& result) const; - - UnicodeString& getNoFallback(const char* tableKey, const char* itemKey, - UnicodeString &result) const; - UnicodeString& getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey, - UnicodeString &result) const; -}; - -inline UnicodeString & -ICUDataTable::get(const char* tableKey, const char* itemKey, UnicodeString& result) const { - return get(tableKey, NULL, itemKey, result); -} - -inline UnicodeString & -ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeString& result) const { - return getNoFallback(tableKey, NULL, itemKey, result); -} - -ICUDataTable::ICUDataTable(const char* path, const Locale& locale) - : path(NULL), locale(Locale::getRoot()) -{ - if (path) { - int32_t len = static_cast(uprv_strlen(path)); - this->path = (const char*) uprv_malloc(len + 1); - if (this->path) { - uprv_strcpy((char *)this->path, path); - this->locale = locale; - } - } -} - -ICUDataTable::~ICUDataTable() { - if (path) { - uprv_free((void*) path); - path = NULL; - } -} - -const Locale& -ICUDataTable::getLocale() { - return locale; -} - -UnicodeString & -ICUDataTable::get(const char* tableKey, const char* subTableKey, const char* itemKey, - UnicodeString &result) const { - UErrorCode status = U_ZERO_ERROR; - int32_t len = 0; - - const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(), - tableKey, subTableKey, itemKey, - &len, &status); - if (U_SUCCESS(status) && len > 0) { - return result.setTo(s, len); - } - return result.setTo(UnicodeString(itemKey, -1, US_INV)); -} - -UnicodeString & -ICUDataTable::getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey, - UnicodeString& result) const { - UErrorCode status = U_ZERO_ERROR; - int32_t len = 0; - - const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(), - tableKey, subTableKey, itemKey, - &len, &status); - if (U_SUCCESS(status)) { - return result.setTo(s, len); - } - - result.setToBogus(); - return result; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -LocaleDisplayNames::~LocaleDisplayNames() {} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -#if 0 // currently unused - -class DefaultLocaleDisplayNames : public LocaleDisplayNames { - UDialectHandling dialectHandling; - -public: - // constructor - DefaultLocaleDisplayNames(UDialectHandling dialectHandling); - - virtual ~DefaultLocaleDisplayNames(); - - virtual const Locale& getLocale() const; - virtual UDialectHandling getDialectHandling() const; - - virtual UnicodeString& localeDisplayName(const Locale& locale, - UnicodeString& result) const; - virtual UnicodeString& localeDisplayName(const char* localeId, - UnicodeString& result) const; - virtual UnicodeString& languageDisplayName(const char* lang, - UnicodeString& result) const; - virtual UnicodeString& scriptDisplayName(const char* script, - UnicodeString& result) const; - virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode, - UnicodeString& result) const; - virtual UnicodeString& regionDisplayName(const char* region, - UnicodeString& result) const; - virtual UnicodeString& variantDisplayName(const char* variant, - UnicodeString& result) const; - virtual UnicodeString& keyDisplayName(const char* key, - UnicodeString& result) const; - virtual UnicodeString& keyValueDisplayName(const char* key, - const char* value, - UnicodeString& result) const; -}; - -DefaultLocaleDisplayNames::DefaultLocaleDisplayNames(UDialectHandling dialectHandling) - : dialectHandling(dialectHandling) { -} - -DefaultLocaleDisplayNames::~DefaultLocaleDisplayNames() { -} - -const Locale& -DefaultLocaleDisplayNames::getLocale() const { - return Locale::getRoot(); -} - -UDialectHandling -DefaultLocaleDisplayNames::getDialectHandling() const { - return dialectHandling; -} - -UnicodeString& -DefaultLocaleDisplayNames::localeDisplayName(const Locale& locale, - UnicodeString& result) const { - return result = UnicodeString(locale.getName(), -1, US_INV); -} - -UnicodeString& -DefaultLocaleDisplayNames::localeDisplayName(const char* localeId, - UnicodeString& result) const { - return result = UnicodeString(localeId, -1, US_INV); -} - -UnicodeString& -DefaultLocaleDisplayNames::languageDisplayName(const char* lang, - UnicodeString& result) const { - return result = UnicodeString(lang, -1, US_INV); -} - -UnicodeString& -DefaultLocaleDisplayNames::scriptDisplayName(const char* script, - UnicodeString& result) const { - return result = UnicodeString(script, -1, US_INV); -} - -UnicodeString& -DefaultLocaleDisplayNames::scriptDisplayName(UScriptCode scriptCode, - UnicodeString& result) const { - const char* name = uscript_getName(scriptCode); - if (name) { - return result = UnicodeString(name, -1, US_INV); - } - return result.remove(); -} - -UnicodeString& -DefaultLocaleDisplayNames::regionDisplayName(const char* region, - UnicodeString& result) const { - return result = UnicodeString(region, -1, US_INV); -} - -UnicodeString& -DefaultLocaleDisplayNames::variantDisplayName(const char* variant, - UnicodeString& result) const { - return result = UnicodeString(variant, -1, US_INV); -} - -UnicodeString& -DefaultLocaleDisplayNames::keyDisplayName(const char* key, - UnicodeString& result) const { - return result = UnicodeString(key, -1, US_INV); -} - -UnicodeString& -DefaultLocaleDisplayNames::keyValueDisplayName(const char* /* key */, - const char* value, - UnicodeString& result) const { - return result = UnicodeString(value, -1, US_INV); -} - -#endif // currently unused class DefaultLocaleDisplayNames - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -class LocaleDisplayNamesImpl : public LocaleDisplayNames { - Locale locale; - UDialectHandling dialectHandling; - ICUDataTable langData; - ICUDataTable regionData; - SimpleFormatter separatorFormat; - SimpleFormatter format; - SimpleFormatter keyTypeFormat; - UDisplayContext capitalizationContext; -#if !UCONFIG_NO_BREAK_ITERATION - BreakIterator* capitalizationBrkIter; -#else - UObject* capitalizationBrkIter; -#endif - static UMutex capitalizationBrkIterLock; - UnicodeString formatOpenParen; - UnicodeString formatReplaceOpenParen; - UnicodeString formatCloseParen; - UnicodeString formatReplaceCloseParen; - UDisplayContext nameLength; - - // Constants for capitalization context usage types. - enum CapContextUsage { - kCapContextUsageLanguage, - kCapContextUsageScript, - kCapContextUsageTerritory, - kCapContextUsageVariant, - kCapContextUsageKey, - kCapContextUsageKeyValue, - kCapContextUsageCount - }; - // Capitalization transforms. For each usage type, indicates whether to titlecase for - // the context specified in capitalizationContext (which we know at construction time) - UBool fCapitalization[kCapContextUsageCount]; - -public: - // constructor - LocaleDisplayNamesImpl(const Locale& locale, UDialectHandling dialectHandling); - LocaleDisplayNamesImpl(const Locale& locale, UDisplayContext *contexts, int32_t length); - virtual ~LocaleDisplayNamesImpl(); - - virtual const Locale& getLocale() const; - virtual UDialectHandling getDialectHandling() const; - virtual UDisplayContext getContext(UDisplayContextType type) const; - - virtual UnicodeString& localeDisplayName(const Locale& locale, - UnicodeString& result) const; - virtual UnicodeString& localeDisplayName(const char* localeId, - UnicodeString& result) const; - virtual UnicodeString& languageDisplayName(const char* lang, - UnicodeString& result) const; - virtual UnicodeString& scriptDisplayName(const char* script, - UnicodeString& result) const; - virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode, - UnicodeString& result) const; - virtual UnicodeString& regionDisplayName(const char* region, - UnicodeString& result) const; - virtual UnicodeString& variantDisplayName(const char* variant, - UnicodeString& result) const; - virtual UnicodeString& keyDisplayName(const char* key, - UnicodeString& result) const; - virtual UnicodeString& keyValueDisplayName(const char* key, - const char* value, - UnicodeString& result) const; -private: - UnicodeString& localeIdName(const char* localeId, - UnicodeString& result) const; - UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const; - UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const; - UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const; - UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const; - UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const; - UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const; - UnicodeString& keyValueDisplayName(const char* key, const char* value, - UnicodeString& result, UBool skipAdjust) const; - void initialize(void); - - struct CapitalizationContextSink; -}; - -UMutex LocaleDisplayNamesImpl::capitalizationBrkIterLock = U_MUTEX_INITIALIZER; - -LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale, - UDialectHandling dialectHandling) - : dialectHandling(dialectHandling) - , langData(U_ICUDATA_LANG, locale) - , regionData(U_ICUDATA_REGION, locale) - , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE) - , capitalizationBrkIter(NULL) - , nameLength(UDISPCTX_LENGTH_FULL) -{ - initialize(); -} - -LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale, - UDisplayContext *contexts, int32_t length) - : dialectHandling(ULDN_STANDARD_NAMES) - , langData(U_ICUDATA_LANG, locale) - , regionData(U_ICUDATA_REGION, locale) - , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE) - , capitalizationBrkIter(NULL) - , nameLength(UDISPCTX_LENGTH_FULL) -{ - while (length-- > 0) { - UDisplayContext value = *contexts++; - UDisplayContextType selector = (UDisplayContextType)((uint32_t)value >> 8); - switch (selector) { - case UDISPCTX_TYPE_DIALECT_HANDLING: - dialectHandling = (UDialectHandling)value; - break; - case UDISPCTX_TYPE_CAPITALIZATION: - capitalizationContext = value; - break; - case UDISPCTX_TYPE_DISPLAY_LENGTH: - nameLength = value; - break; - default: - break; - } - } - initialize(); -} - -struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink { - UBool hasCapitalizationUsage; - LocaleDisplayNamesImpl& parent; - - CapitalizationContextSink(LocaleDisplayNamesImpl& _parent) - : hasCapitalizationUsage(FALSE), parent(_parent) {} - virtual ~CapitalizationContextSink(); - - virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, - UErrorCode &errorCode) { - ResourceTable contexts = value.getTable(errorCode); - if (U_FAILURE(errorCode)) { return; } - for (int i = 0; contexts.getKeyAndValue(i, key, value); ++i) { - - CapContextUsage usageEnum; - if (uprv_strcmp(key, "key") == 0) { - usageEnum = kCapContextUsageKey; - } else if (uprv_strcmp(key, "keyValue") == 0) { - usageEnum = kCapContextUsageKeyValue; - } else if (uprv_strcmp(key, "languages") == 0) { - usageEnum = kCapContextUsageLanguage; - } else if (uprv_strcmp(key, "script") == 0) { - usageEnum = kCapContextUsageScript; - } else if (uprv_strcmp(key, "territory") == 0) { - usageEnum = kCapContextUsageTerritory; - } else if (uprv_strcmp(key, "variant") == 0) { - usageEnum = kCapContextUsageVariant; - } else { - continue; - } - - int32_t len = 0; - const int32_t* intVector = value.getIntVector(len, errorCode); - if (U_FAILURE(errorCode)) { return; } - if (len < 2) { continue; } - - int32_t titlecaseInt = (parent.capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU) ? intVector[0] : intVector[1]; - if (titlecaseInt == 0) { continue; } - - parent.fCapitalization[usageEnum] = TRUE; - hasCapitalizationUsage = TRUE; - } - } -}; - -// Virtual destructors must be defined out of line. -LocaleDisplayNamesImpl::CapitalizationContextSink::~CapitalizationContextSink() {} - -void -LocaleDisplayNamesImpl::initialize(void) { - LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this; - nonConstThis->locale = langData.getLocale() == Locale::getRoot() - ? regionData.getLocale() - : langData.getLocale(); - - UnicodeString sep; - langData.getNoFallback("localeDisplayPattern", "separator", sep); - if (sep.isBogus()) { - sep = UnicodeString("{0}, {1}", -1, US_INV); - } - UErrorCode status = U_ZERO_ERROR; - separatorFormat.applyPatternMinMaxArguments(sep, 2, 2, status); - - UnicodeString pattern; - langData.getNoFallback("localeDisplayPattern", "pattern", pattern); - if (pattern.isBogus()) { - pattern = UnicodeString("{0} ({1})", -1, US_INV); - } - format.applyPatternMinMaxArguments(pattern, 2, 2, status); - if (pattern.indexOf((UChar)0xFF08) >= 0) { - formatOpenParen.setTo((UChar)0xFF08); // fullwidth ( - formatReplaceOpenParen.setTo((UChar)0xFF3B); // fullwidth [ - formatCloseParen.setTo((UChar)0xFF09); // fullwidth ) - formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ] - } else { - formatOpenParen.setTo((UChar)0x0028); // ( - formatReplaceOpenParen.setTo((UChar)0x005B); // [ - formatCloseParen.setTo((UChar)0x0029); // ) - formatReplaceCloseParen.setTo((UChar)0x005D); // ] - } - - UnicodeString ktPattern; - langData.get("localeDisplayPattern", "keyTypePattern", ktPattern); - if (ktPattern.isBogus()) { - ktPattern = UnicodeString("{0}={1}", -1, US_INV); - } - keyTypeFormat.applyPatternMinMaxArguments(ktPattern, 2, 2, status); - - uprv_memset(fCapitalization, 0, sizeof(fCapitalization)); -#if !UCONFIG_NO_BREAK_ITERATION - // Only get the context data if we need it! This is a const object so we know now... - // Also check whether we will need a break iterator (depends on the data) - UBool needBrkIter = FALSE; - if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) { - LocalUResourceBundlePointer resource(ures_open(NULL, locale.getName(), &status)); - if (U_FAILURE(status)) { return; } - CapitalizationContextSink sink(*this); - ures_getAllItemsWithFallback(resource.getAlias(), "contextTransforms", sink, status); - if (status == U_MISSING_RESOURCE_ERROR) { - // Silently ignore. Not every locale has contextTransforms. - status = U_ZERO_ERROR; - } else if (U_FAILURE(status)) { - return; - } - needBrkIter = sink.hasCapitalizationUsage; - } - // Get a sentence break iterator if we will need it - if (needBrkIter || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) { - status = U_ZERO_ERROR; - capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); - if (U_FAILURE(status)) { - delete capitalizationBrkIter; - capitalizationBrkIter = NULL; - } - } -#endif -} - -LocaleDisplayNamesImpl::~LocaleDisplayNamesImpl() { -#if !UCONFIG_NO_BREAK_ITERATION - delete capitalizationBrkIter; -#endif -} - -const Locale& -LocaleDisplayNamesImpl::getLocale() const { - return locale; -} - -UDialectHandling -LocaleDisplayNamesImpl::getDialectHandling() const { - return dialectHandling; -} - -UDisplayContext -LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const { - switch (type) { - case UDISPCTX_TYPE_DIALECT_HANDLING: - return (UDisplayContext)dialectHandling; - case UDISPCTX_TYPE_CAPITALIZATION: - return capitalizationContext; - case UDISPCTX_TYPE_DISPLAY_LENGTH: - return nameLength; - default: - break; - } - return (UDisplayContext)0; -} - -UnicodeString& -LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage, - UnicodeString& result) const { -#if !UCONFIG_NO_BREAK_ITERATION - // check to see whether we need to titlecase result - if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL && - ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) { - // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE - Mutex lock(&capitalizationBrkIterLock); - result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); - } -#endif - return result; -} - -UnicodeString& -LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, - UnicodeString& result) const { - if (loc.isBogus()) { - result.setToBogus(); - return result; - } - UnicodeString resultName; - - const char* lang = loc.getLanguage(); - if (uprv_strlen(lang) == 0) { - lang = "root"; - } - const char* script = loc.getScript(); - const char* country = loc.getCountry(); - const char* variant = loc.getVariant(); - - UBool hasScript = uprv_strlen(script) > 0; - UBool hasCountry = uprv_strlen(country) > 0; - UBool hasVariant = uprv_strlen(variant) > 0; - - if (dialectHandling == ULDN_DIALECT_NAMES) { - char buffer[ULOC_FULLNAME_CAPACITY]; - do { // loop construct is so we can break early out of search - if (hasScript && hasCountry) { - ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0); - localeIdName(buffer, resultName); - if (!resultName.isBogus()) { - hasScript = FALSE; - hasCountry = FALSE; - break; - } - } - if (hasScript) { - ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0); - localeIdName(buffer, resultName); - if (!resultName.isBogus()) { - hasScript = FALSE; - break; - } - } - if (hasCountry) { - ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0); - localeIdName(buffer, resultName); - if (!resultName.isBogus()) { - hasCountry = FALSE; - break; - } - } - } while (FALSE); - } - if (resultName.isBogus() || resultName.isEmpty()) { - localeIdName(lang, resultName); - } - - UnicodeString resultRemainder; - UnicodeString temp; - UErrorCode status = U_ZERO_ERROR; - - if (hasScript) { - resultRemainder.append(scriptDisplayName(script, temp, TRUE)); - } - if (hasCountry) { - appendWithSep(resultRemainder, regionDisplayName(country, temp, TRUE)); - } - if (hasVariant) { - appendWithSep(resultRemainder, variantDisplayName(variant, temp, TRUE)); - } - resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen); - resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen); - - LocalPointer e(loc.createKeywords(status)); - if (e.isValid() && U_SUCCESS(status)) { - UnicodeString temp2; - char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY - const char* key; - while ((key = e->next((int32_t *)0, status)) != NULL) { - value[0] = 0; - loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); - if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { - return result; - } - keyDisplayName(key, temp, TRUE); - temp.findAndReplace(formatOpenParen, formatReplaceOpenParen); - temp.findAndReplace(formatCloseParen, formatReplaceCloseParen); - keyValueDisplayName(key, value, temp2, TRUE); - temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen); - temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen); - if (temp2 != UnicodeString(value, -1, US_INV)) { - appendWithSep(resultRemainder, temp2); - } else if (temp != UnicodeString(key, -1, US_INV)) { - UnicodeString temp3; - keyTypeFormat.format(temp, temp2, temp3, status); - appendWithSep(resultRemainder, temp3); - } else { - appendWithSep(resultRemainder, temp) - .append((UChar)0x3d /* = */) - .append(temp2); - } - } - } - - if (!resultRemainder.isEmpty()) { - format.format(resultName, resultRemainder, result.remove(), status); - return adjustForUsageAndContext(kCapContextUsageLanguage, result); - } - - result = resultName; - return adjustForUsageAndContext(kCapContextUsageLanguage, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::appendWithSep(UnicodeString& buffer, const UnicodeString& src) const { - if (buffer.isEmpty()) { - buffer.setTo(src); - } else { - const UnicodeString *values[2] = { &buffer, &src }; - UErrorCode status = U_ZERO_ERROR; - separatorFormat.formatAndReplace(values, 2, buffer, NULL, 0, status); - } - return buffer; -} - -UnicodeString& -LocaleDisplayNamesImpl::localeDisplayName(const char* localeId, - UnicodeString& result) const { - return localeDisplayName(Locale(localeId), result); -} - -// private -UnicodeString& -LocaleDisplayNamesImpl::localeIdName(const char* localeId, - UnicodeString& result) const { - if (nameLength == UDISPCTX_LENGTH_SHORT) { - langData.getNoFallback("Languages%short", localeId, result); - if (!result.isBogus()) { - return result; - } - } - return langData.getNoFallback("Languages", localeId, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::languageDisplayName(const char* lang, - UnicodeString& result) const { - if (uprv_strcmp("root", lang) == 0 || uprv_strchr(lang, '_') != NULL) { - return result = UnicodeString(lang, -1, US_INV); - } - if (nameLength == UDISPCTX_LENGTH_SHORT) { - langData.get("Languages%short", lang, result); - if (!result.isBogus()) { - return adjustForUsageAndContext(kCapContextUsageLanguage, result); - } - } - langData.get("Languages", lang, result); - return adjustForUsageAndContext(kCapContextUsageLanguage, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::scriptDisplayName(const char* script, - UnicodeString& result, - UBool skipAdjust) const { - if (nameLength == UDISPCTX_LENGTH_SHORT) { - langData.get("Scripts%short", script, result); - if (!result.isBogus()) { - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result); - } - } - langData.get("Scripts", script, result); - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::scriptDisplayName(const char* script, - UnicodeString& result) const { - return scriptDisplayName(script, result, FALSE); -} - -UnicodeString& -LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode, - UnicodeString& result) const { - return scriptDisplayName(uscript_getName(scriptCode), result, FALSE); -} - -UnicodeString& -LocaleDisplayNamesImpl::regionDisplayName(const char* region, - UnicodeString& result, - UBool skipAdjust) const { - if (nameLength == UDISPCTX_LENGTH_SHORT) { - regionData.get("Countries%short", region, result); - if (!result.isBogus()) { - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result); - } - } - regionData.get("Countries", region, result); - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::regionDisplayName(const char* region, - UnicodeString& result) const { - return regionDisplayName(region, result, FALSE); -} - - -UnicodeString& -LocaleDisplayNamesImpl::variantDisplayName(const char* variant, - UnicodeString& result, - UBool skipAdjust) const { - // don't have a resource for short variant names - langData.get("Variants", variant, result); - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageVariant, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::variantDisplayName(const char* variant, - UnicodeString& result) const { - return variantDisplayName(variant, result, FALSE); -} - -UnicodeString& -LocaleDisplayNamesImpl::keyDisplayName(const char* key, - UnicodeString& result, - UBool skipAdjust) const { - // don't have a resource for short key names - langData.get("Keys", key, result); - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKey, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::keyDisplayName(const char* key, - UnicodeString& result) const { - return keyDisplayName(key, result, FALSE); -} - -UnicodeString& -LocaleDisplayNamesImpl::keyValueDisplayName(const char* key, - const char* value, - UnicodeString& result, - UBool skipAdjust) const { - if (uprv_strcmp(key, "currency") == 0) { - // ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now. - UErrorCode sts = U_ZERO_ERROR; - UnicodeString ustrValue(value, -1, US_INV); - int32_t len; - UBool isChoice = FALSE; - const UChar *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(), - locale.getBaseName(), UCURR_LONG_NAME, &isChoice, &len, &sts); - if (U_FAILURE(sts)) { - // Return the value as is on failure - result = ustrValue; - return result; - } - result.setTo(currencyName, len); - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); - } - - if (nameLength == UDISPCTX_LENGTH_SHORT) { - langData.get("Types%short", key, value, result); - if (!result.isBogus()) { - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); - } - } - langData.get("Types", key, value, result); - return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); -} - -UnicodeString& -LocaleDisplayNamesImpl::keyValueDisplayName(const char* key, - const char* value, - UnicodeString& result) const { - return keyValueDisplayName(key, value, result, FALSE); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -LocaleDisplayNames* -LocaleDisplayNames::createInstance(const Locale& locale, - UDialectHandling dialectHandling) { - return new LocaleDisplayNamesImpl(locale, dialectHandling); -} - -LocaleDisplayNames* -LocaleDisplayNames::createInstance(const Locale& locale, - UDisplayContext *contexts, int32_t length) { - if (contexts == NULL) { - length = 0; - } - return new LocaleDisplayNamesImpl(locale, contexts, length); -} - -U_NAMESPACE_END - -//////////////////////////////////////////////////////////////////////////////////////////////////// - -U_NAMESPACE_USE - -U_CAPI ULocaleDisplayNames * U_EXPORT2 -uldn_open(const char * locale, - UDialectHandling dialectHandling, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (locale == NULL) { - locale = uloc_getDefault(); - } - return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), dialectHandling); -} - -U_CAPI ULocaleDisplayNames * U_EXPORT2 -uldn_openForContext(const char * locale, - UDisplayContext *contexts, int32_t length, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (locale == NULL) { - locale = uloc_getDefault(); - } - return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), contexts, length); -} - - -U_CAPI void U_EXPORT2 -uldn_close(ULocaleDisplayNames *ldn) { - delete (LocaleDisplayNames *)ldn; -} - -U_CAPI const char * U_EXPORT2 -uldn_getLocale(const ULocaleDisplayNames *ldn) { - if (ldn) { - return ((const LocaleDisplayNames *)ldn)->getLocale().getName(); - } - return NULL; -} - -U_CAPI UDialectHandling U_EXPORT2 -uldn_getDialectHandling(const ULocaleDisplayNames *ldn) { - if (ldn) { - return ((const LocaleDisplayNames *)ldn)->getDialectHandling(); - } - return ULDN_STANDARD_NAMES; -} - -U_CAPI UDisplayContext U_EXPORT2 -uldn_getContext(const ULocaleDisplayNames *ldn, - UDisplayContextType type, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return (UDisplayContext)0; - } - return ((const LocaleDisplayNames *)ldn)->getContext(type); -} - -U_CAPI int32_t U_EXPORT2 -uldn_localeDisplayName(const ULocaleDisplayNames *ldn, - const char *locale, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (ldn == NULL || locale == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString temp(result, 0, maxResultSize); - ((const LocaleDisplayNames *)ldn)->localeDisplayName(locale, temp); - if (temp.isBogus()) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - return temp.extract(result, maxResultSize, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uldn_languageDisplayName(const ULocaleDisplayNames *ldn, - const char *lang, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (ldn == NULL || lang == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString temp(result, 0, maxResultSize); - ((const LocaleDisplayNames *)ldn)->languageDisplayName(lang, temp); - return temp.extract(result, maxResultSize, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, - const char *script, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (ldn == NULL || script == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString temp(result, 0, maxResultSize); - ((const LocaleDisplayNames *)ldn)->scriptDisplayName(script, temp); - return temp.extract(result, maxResultSize, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, - UScriptCode scriptCode, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - return uldn_scriptDisplayName(ldn, uscript_getName(scriptCode), result, maxResultSize, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uldn_regionDisplayName(const ULocaleDisplayNames *ldn, - const char *region, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (ldn == NULL || region == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString temp(result, 0, maxResultSize); - ((const LocaleDisplayNames *)ldn)->regionDisplayName(region, temp); - return temp.extract(result, maxResultSize, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uldn_variantDisplayName(const ULocaleDisplayNames *ldn, - const char *variant, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (ldn == NULL || variant == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString temp(result, 0, maxResultSize); - ((const LocaleDisplayNames *)ldn)->variantDisplayName(variant, temp); - return temp.extract(result, maxResultSize, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uldn_keyDisplayName(const ULocaleDisplayNames *ldn, - const char *key, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (ldn == NULL || key == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString temp(result, 0, maxResultSize); - ((const LocaleDisplayNames *)ldn)->keyDisplayName(key, temp); - return temp.extract(result, maxResultSize, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, - const char *key, - const char *value, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (ldn == NULL || key == NULL || value == NULL || (result == NULL && maxResultSize > 0) - || maxResultSize < 0) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString temp(result, 0, maxResultSize); - ((const LocaleDisplayNames *)ldn)->keyValueDisplayName(key, value, temp); - return temp.extract(result, maxResultSize, *pErrorCode); -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/locid.cpp b/deps/node/deps/icu-small/source/common/locid.cpp deleted file mode 100644 index e0dcc8a8..00000000 --- a/deps/node/deps/icu-small/source/common/locid.cpp +++ /dev/null @@ -1,1556 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ********************************************************************** - * Copyright (C) 1997-2016, International Business Machines - * Corporation and others. All Rights Reserved. - ********************************************************************** -* -* File locid.cpp -* -* Created by: Richard Gillam -* -* Modification History: -* -* Date Name Description -* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added -* methods to get and set it. -* 04/02/97 aliu Made operator!= inline; fixed return value -* of getName(). -* 04/15/97 aliu Cleanup for AIX/Win32. -* 04/24/97 aliu Numerous changes per code review. -* 08/18/98 stephen Changed getDisplayName() -* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE -* Added getISOCountries(), getISOLanguages(), -* getLanguagesForCountry() -* 03/16/99 bertrand rehaul. -* 07/21/99 stephen Added U_CFUNC setDefault -* 11/09/99 weiv Added const char * getName() const; -* 04/12/00 srl removing unicodestring api's and cached hash code -* 08/10/01 grhoten Change the static Locales to accessor functions -****************************************************************************** -*/ - -#include - -#include "unicode/bytestream.h" -#include "unicode/locid.h" -#include "unicode/strenum.h" -#include "unicode/stringpiece.h" -#include "unicode/uloc.h" -#include "putilimp.h" -#include "mutex.h" -#include "umutex.h" -#include "uassert.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" -#include "uhash.h" -#include "ulocimp.h" -#include "ucln_cmn.h" -#include "ustr_imp.h" -#include "charstr.h" -#include "bytesinkutil.h" - -U_CDECL_BEGIN -static UBool U_CALLCONV locale_cleanup(void); -U_CDECL_END - -U_NAMESPACE_BEGIN - -static Locale *gLocaleCache = NULL; -static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER; - -// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale. -static UMutex gDefaultLocaleMutex = U_MUTEX_INITIALIZER; -static UHashtable *gDefaultLocalesHashT = NULL; -static Locale *gDefaultLocale = NULL; - -/** - * \def ULOC_STRING_LIMIT - * strings beyond this value crash in CharString - */ -#define ULOC_STRING_LIMIT 357913941 - -U_NAMESPACE_END - -typedef enum ELocalePos { - eENGLISH, - eFRENCH, - eGERMAN, - eITALIAN, - eJAPANESE, - eKOREAN, - eCHINESE, - - eFRANCE, - eGERMANY, - eITALY, - eJAPAN, - eKOREA, - eCHINA, /* Alias for PRC */ - eTAIWAN, - eUK, - eUS, - eCANADA, - eCANADA_FRENCH, - eROOT, - - - //eDEFAULT, - eMAX_LOCALES -} ELocalePos; - -U_CFUNC int32_t locale_getKeywords(const char *localeID, - char prev, - char *keywords, int32_t keywordCapacity, - char *values, int32_t valuesCapacity, int32_t *valLen, - UBool valuesToo, - UErrorCode *status); - -U_CDECL_BEGIN -// -// Deleter function for Locales owned by the default Locale hash table/ -// -static void U_CALLCONV -deleteLocale(void *obj) { - delete (icu::Locale *) obj; -} - -static UBool U_CALLCONV locale_cleanup(void) -{ - U_NAMESPACE_USE - - delete [] gLocaleCache; - gLocaleCache = NULL; - gLocaleCacheInitOnce.reset(); - - if (gDefaultLocalesHashT) { - uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func. - gDefaultLocalesHashT = NULL; - } - gDefaultLocale = NULL; - return TRUE; -} - - -static void U_CALLCONV locale_init(UErrorCode &status) { - U_NAMESPACE_USE - - U_ASSERT(gLocaleCache == NULL); - gLocaleCache = new Locale[(int)eMAX_LOCALES]; - if (gLocaleCache == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup); - gLocaleCache[eROOT] = Locale(""); - gLocaleCache[eENGLISH] = Locale("en"); - gLocaleCache[eFRENCH] = Locale("fr"); - gLocaleCache[eGERMAN] = Locale("de"); - gLocaleCache[eITALIAN] = Locale("it"); - gLocaleCache[eJAPANESE] = Locale("ja"); - gLocaleCache[eKOREAN] = Locale("ko"); - gLocaleCache[eCHINESE] = Locale("zh"); - gLocaleCache[eFRANCE] = Locale("fr", "FR"); - gLocaleCache[eGERMANY] = Locale("de", "DE"); - gLocaleCache[eITALY] = Locale("it", "IT"); - gLocaleCache[eJAPAN] = Locale("ja", "JP"); - gLocaleCache[eKOREA] = Locale("ko", "KR"); - gLocaleCache[eCHINA] = Locale("zh", "CN"); - gLocaleCache[eTAIWAN] = Locale("zh", "TW"); - gLocaleCache[eUK] = Locale("en", "GB"); - gLocaleCache[eUS] = Locale("en", "US"); - gLocaleCache[eCANADA] = Locale("en", "CA"); - gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA"); -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -Locale *locale_set_default_internal(const char *id, UErrorCode& status) { - // Synchronize this entire function. - Mutex lock(&gDefaultLocaleMutex); - - UBool canonicalize = FALSE; - - // If given a NULL string for the locale id, grab the default - // name from the system. - // (Different from most other locale APIs, where a null name means use - // the current ICU default locale.) - if (id == NULL) { - id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify. - canonicalize = TRUE; // always canonicalize host ID - } - - char localeNameBuf[512]; - - if (canonicalize) { - uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status); - } else { - uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status); - } - localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of - // a long name filling the buffer. - // (long names are truncated.) - // - if (U_FAILURE(status)) { - return gDefaultLocale; - } - - if (gDefaultLocalesHashT == NULL) { - gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); - if (U_FAILURE(status)) { - return gDefaultLocale; - } - uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale); - ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup); - } - - Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf); - if (newDefault == NULL) { - newDefault = new Locale(Locale::eBOGUS); - if (newDefault == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return gDefaultLocale; - } - newDefault->init(localeNameBuf, FALSE); - uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status); - if (U_FAILURE(status)) { - return gDefaultLocale; - } - } - gDefaultLocale = newDefault; - return gDefaultLocale; -} - -U_NAMESPACE_END - -/* sfb 07/21/99 */ -U_CFUNC void -locale_set_default(const char *id) -{ - U_NAMESPACE_USE - UErrorCode status = U_ZERO_ERROR; - locale_set_default_internal(id, status); -} -/* end */ - -U_CFUNC const char * -locale_get_default(void) -{ - U_NAMESPACE_USE - return Locale::getDefault().getName(); -} - - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale) - -/*Character separating the posix id fields*/ -// '_' -// In the platform codepage. -#define SEP_CHAR '_' - -Locale::~Locale() -{ - if (baseName != fullName) { - uprv_free(baseName); - } - baseName = NULL; - /*if fullName is on the heap, we free it*/ - if (fullName != fullNameBuffer) - { - uprv_free(fullName); - fullName = NULL; - } -} - -Locale::Locale() - : UObject(), fullName(fullNameBuffer), baseName(NULL) -{ - init(NULL, FALSE); -} - -/* - * Internal constructor to allow construction of a locale object with - * NO side effects. (Default constructor tries to get - * the default locale.) - */ -Locale::Locale(Locale::ELocaleType) - : UObject(), fullName(fullNameBuffer), baseName(NULL) -{ - setToBogus(); -} - - -Locale::Locale( const char * newLanguage, - const char * newCountry, - const char * newVariant, - const char * newKeywords) - : UObject(), fullName(fullNameBuffer), baseName(NULL) -{ - if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) ) - { - init(NULL, FALSE); /* shortcut */ - } - else - { - UErrorCode status = U_ZERO_ERROR; - int32_t size = 0; - int32_t lsize = 0; - int32_t csize = 0; - int32_t vsize = 0; - int32_t ksize = 0; - - // Calculate the size of the resulting string. - - // Language - if ( newLanguage != NULL ) - { - lsize = (int32_t)uprv_strlen(newLanguage); - if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap - setToBogus(); - return; - } - size = lsize; - } - - CharString togo(newLanguage, lsize, status); // start with newLanguage - - // _Country - if ( newCountry != NULL ) - { - csize = (int32_t)uprv_strlen(newCountry); - if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap - setToBogus(); - return; - } - size += csize; - } - - // _Variant - if ( newVariant != NULL ) - { - // remove leading _'s - while(newVariant[0] == SEP_CHAR) - { - newVariant++; - } - - // remove trailing _'s - vsize = (int32_t)uprv_strlen(newVariant); - if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap - setToBogus(); - return; - } - while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) ) - { - vsize--; - } - } - - if( vsize > 0 ) - { - size += vsize; - } - - // Separator rules: - if ( vsize > 0 ) - { - size += 2; // at least: __v - } - else if ( csize > 0 ) - { - size += 1; // at least: _v - } - - if ( newKeywords != NULL) - { - ksize = (int32_t)uprv_strlen(newKeywords); - if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) { - setToBogus(); - return; - } - size += ksize + 1; - } - - // NOW we have the full locale string.. - // Now, copy it back. - - // newLanguage is already copied - - if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v - { // ^ - togo.append(SEP_CHAR, status); - } - - if ( csize != 0 ) - { - togo.append(newCountry, status); - } - - if ( vsize != 0) - { - togo.append(SEP_CHAR, status) - .append(newVariant, vsize, status); - } - - if ( ksize != 0) - { - if (uprv_strchr(newKeywords, '=')) { - togo.append('@', status); /* keyword parsing */ - } - else { - togo.append('_', status); /* Variant parsing with a script */ - if ( vsize == 0) { - togo.append('_', status); /* No country found */ - } - } - togo.append(newKeywords, status); - } - - if (U_FAILURE(status)) { - // Something went wrong with appending, etc. - setToBogus(); - return; - } - // Parse it, because for example 'language' might really be a complete - // string. - init(togo.data(), FALSE); - } -} - -Locale::Locale(const Locale &other) - : UObject(other), fullName(fullNameBuffer), baseName(NULL) -{ - *this = other; -} - -Locale::Locale(Locale&& other) U_NOEXCEPT - : UObject(other), fullName(fullNameBuffer), baseName(fullName) { - *this = std::move(other); -} - -Locale& Locale::operator=(const Locale& other) { - if (this == &other) { - return *this; - } - - setToBogus(); - - if (other.fullName == other.fullNameBuffer) { - uprv_strcpy(fullNameBuffer, other.fullNameBuffer); - } else if (other.fullName == nullptr) { - fullName = nullptr; - } else { - fullName = uprv_strdup(other.fullName); - if (fullName == nullptr) return *this; - } - - if (other.baseName == other.fullName) { - baseName = fullName; - } else if (other.baseName != nullptr) { - baseName = uprv_strdup(other.baseName); - if (baseName == nullptr) return *this; - } - - uprv_strcpy(language, other.language); - uprv_strcpy(script, other.script); - uprv_strcpy(country, other.country); - - variantBegin = other.variantBegin; - fIsBogus = other.fIsBogus; - - return *this; -} - -Locale& Locale::operator=(Locale&& other) U_NOEXCEPT { - if (baseName != fullName) uprv_free(baseName); - if (fullName != fullNameBuffer) uprv_free(fullName); - - if (other.fullName == other.fullNameBuffer) { - uprv_strcpy(fullNameBuffer, other.fullNameBuffer); - fullName = fullNameBuffer; - } else { - fullName = other.fullName; - } - - if (other.baseName == other.fullName) { - baseName = fullName; - } else { - baseName = other.baseName; - } - - uprv_strcpy(language, other.language); - uprv_strcpy(script, other.script); - uprv_strcpy(country, other.country); - - variantBegin = other.variantBegin; - fIsBogus = other.fIsBogus; - - other.baseName = other.fullName = other.fullNameBuffer; - - return *this; -} - -Locale * -Locale::clone() const { - return new Locale(*this); -} - -UBool -Locale::operator==( const Locale& other) const -{ - return (uprv_strcmp(other.fullName, fullName) == 0); -} - -#define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) - -/*This function initializes a Locale from a C locale ID*/ -Locale& Locale::init(const char* localeID, UBool canonicalize) -{ - fIsBogus = FALSE; - /* Free our current storage */ - if (baseName != fullName) { - uprv_free(baseName); - } - baseName = NULL; - if(fullName != fullNameBuffer) { - uprv_free(fullName); - fullName = fullNameBuffer; - } - - // not a loop: - // just an easy way to have a common error-exit - // without goto and without another function - do { - char *separator; - char *field[5] = {0}; - int32_t fieldLen[5] = {0}; - int32_t fieldIdx; - int32_t variantField; - int32_t length; - UErrorCode err; - - if(localeID == NULL) { - // not an error, just set the default locale - return *this = getDefault(); - } - - /* preset all fields to empty */ - language[0] = script[0] = country[0] = 0; - - // "canonicalize" the locale ID to ICU/Java format - err = U_ZERO_ERROR; - length = canonicalize ? - uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) : - uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err); - - if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) { - /*Go to heap for the fullName if necessary*/ - fullName = (char *)uprv_malloc(sizeof(char)*(length + 1)); - if(fullName == 0) { - fullName = fullNameBuffer; - break; // error: out of memory - } - err = U_ZERO_ERROR; - length = canonicalize ? - uloc_canonicalize(localeID, fullName, length+1, &err) : - uloc_getName(localeID, fullName, length+1, &err); - } - if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) { - /* should never occur */ - break; - } - - variantBegin = length; - - /* after uloc_getName/canonicalize() we know that only '_' are separators */ - separator = field[0] = fullName; - fieldIdx = 1; - while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 && fieldIdx < UPRV_LENGTHOF(field)-1) { - field[fieldIdx] = separator + 1; - fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]); - fieldIdx++; - } - // variant may contain @foo or .foo POSIX cruft; remove it - separator = uprv_strchr(field[fieldIdx-1], '@'); - char* sep2 = uprv_strchr(field[fieldIdx-1], '.'); - if (separator!=NULL || sep2!=NULL) { - if (separator==NULL || (sep2!=NULL && separator > sep2)) { - separator = sep2; - } - fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]); - } else { - fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName); - } - - if (fieldLen[0] >= (int32_t)(sizeof(language))) - { - break; // error: the language field is too long - } - - variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */ - if (fieldLen[0] > 0) { - /* We have a language */ - uprv_memcpy(language, fullName, fieldLen[0]); - language[fieldLen[0]] = 0; - } - if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) && - ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) && - ISASCIIALPHA(field[1][3])) { - /* We have at least a script */ - uprv_memcpy(script, field[1], fieldLen[1]); - script[fieldLen[1]] = 0; - variantField++; - } - - if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) { - /* We have a country */ - uprv_memcpy(country, field[variantField], fieldLen[variantField]); - country[fieldLen[variantField]] = 0; - variantField++; - } else if (fieldLen[variantField] == 0) { - variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */ - } - - if (fieldLen[variantField] > 0) { - /* We have a variant */ - variantBegin = (int32_t)(field[variantField] - fullName); - } - - err = U_ZERO_ERROR; - initBaseName(err); - if (U_FAILURE(err)) { - break; - } - - // successful end of init() - return *this; - } while(0); /*loop doesn't iterate*/ - - // when an error occurs, then set this object to "bogus" (there is no UErrorCode here) - setToBogus(); - - return *this; -} - -/* - * Set up the base name. - * If there are no key words, it's exactly the full name. - * If key words exist, it's the full name truncated at the '@' character. - * Need to set up both at init() and after setting a keyword. - */ -void -Locale::initBaseName(UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - U_ASSERT(baseName==NULL || baseName==fullName); - const char *atPtr = uprv_strchr(fullName, '@'); - const char *eqPtr = uprv_strchr(fullName, '='); - if (atPtr && eqPtr && atPtr < eqPtr) { - // Key words exist. - int32_t baseNameLength = (int32_t)(atPtr - fullName); - baseName = (char *)uprv_malloc(baseNameLength + 1); - if (baseName == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_strncpy(baseName, fullName, baseNameLength); - baseName[baseNameLength] = 0; - - // The original computation of variantBegin leaves it equal to the length - // of fullName if there is no variant. It should instead be - // the length of the baseName. - if (variantBegin > baseNameLength) { - variantBegin = baseNameLength; - } - } else { - baseName = fullName; - } -} - - -int32_t -Locale::hashCode() const -{ - return ustr_hashCharsN(fullName, static_cast(uprv_strlen(fullName))); -} - -void -Locale::setToBogus() { - /* Free our current storage */ - if(baseName != fullName) { - uprv_free(baseName); - } - baseName = NULL; - if(fullName != fullNameBuffer) { - uprv_free(fullName); - fullName = fullNameBuffer; - } - *fullNameBuffer = 0; - *language = 0; - *script = 0; - *country = 0; - fIsBogus = TRUE; - variantBegin = 0; -} - -const Locale& U_EXPORT2 -Locale::getDefault() -{ - { - Mutex lock(&gDefaultLocaleMutex); - if (gDefaultLocale != NULL) { - return *gDefaultLocale; - } - } - UErrorCode status = U_ZERO_ERROR; - return *locale_set_default_internal(NULL, status); -} - - - -void U_EXPORT2 -Locale::setDefault( const Locale& newLocale, - UErrorCode& status) -{ - if (U_FAILURE(status)) { - return; - } - - /* Set the default from the full name string of the supplied locale. - * This is a convenient way to access the default locale caching mechanisms. - */ - const char *localeID = newLocale.getName(); - locale_set_default_internal(localeID, status); -} - -void -Locale::addLikelySubtags(UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - - // The maximized locale ID string is often longer, but there is no good - // heuristic to estimate just how much longer. Leave that to CharString. - CharString maximizedLocaleID; - int32_t maximizedLocaleIDCapacity = static_cast(uprv_strlen(fullName)); - - char* buffer; - int32_t reslen; - - for (;;) { - buffer = maximizedLocaleID.getAppendBuffer( - /*minCapacity=*/maximizedLocaleIDCapacity, - /*desiredCapacityHint=*/maximizedLocaleIDCapacity, - maximizedLocaleIDCapacity, - status); - - if (U_FAILURE(status)) { - return; - } - - reslen = uloc_addLikelySubtags( - fullName, - buffer, - maximizedLocaleIDCapacity, - &status); - - if (status != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - maximizedLocaleIDCapacity = reslen; - status = U_ZERO_ERROR; - } - - if (U_FAILURE(status)) { - return; - } - - maximizedLocaleID.append(buffer, reslen, status); - if (status == U_STRING_NOT_TERMINATED_WARNING) { - status = U_ZERO_ERROR; // Terminators provided by CharString. - } - - if (U_FAILURE(status)) { - return; - } - - init(maximizedLocaleID.data(), /*canonicalize=*/FALSE); - if (isBogus()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } -} - -void -Locale::minimizeSubtags(UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - - // Except for a few edge cases (like the empty string, that is minimized to - // "en__POSIX"), minimized locale ID strings will be either the same length - // or shorter than their input. - CharString minimizedLocaleID; - int32_t minimizedLocaleIDCapacity = static_cast(uprv_strlen(fullName)); - - char* buffer; - int32_t reslen; - - for (;;) { - buffer = minimizedLocaleID.getAppendBuffer( - /*minCapacity=*/minimizedLocaleIDCapacity, - /*desiredCapacityHint=*/minimizedLocaleIDCapacity, - minimizedLocaleIDCapacity, - status); - - if (U_FAILURE(status)) { - return; - } - - reslen = uloc_minimizeSubtags( - fullName, - buffer, - minimizedLocaleIDCapacity, - &status); - - if (status != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - // Because of the internal minimal buffer size of CharString, I can't - // think of any input data for which this could possibly ever happen. - // Maybe it would be better replaced with an assertion instead? - minimizedLocaleIDCapacity = reslen; - status = U_ZERO_ERROR; - } - - if (U_FAILURE(status)) { - return; - } - - minimizedLocaleID.append(buffer, reslen, status); - if (status == U_STRING_NOT_TERMINATED_WARNING) { - status = U_ZERO_ERROR; // Terminators provided by CharString. - } - - if (U_FAILURE(status)) { - return; - } - - init(minimizedLocaleID.data(), /*canonicalize=*/FALSE); - if (isBogus()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } -} - -Locale U_EXPORT2 -Locale::forLanguageTag(StringPiece tag, UErrorCode& status) -{ - Locale result(Locale::eBOGUS); - - if (U_FAILURE(status)) { - return result; - } - - // If a BCP-47 language tag is passed as the language parameter to the - // normal Locale constructor, it will actually fall back to invoking - // uloc_forLanguageTag() to parse it if it somehow is able to detect that - // the string actually is BCP-47. This works well for things like strings - // using BCP-47 extensions, but it does not at all work for things like - // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also - // interpret as ICU locale IDs and because of that won't trigger the BCP-47 - // parsing. Therefore the code here explicitly calls uloc_forLanguageTag() - // and then Locale::init(), instead of just calling the normal constructor. - - // All simple language tags will have the exact same length as ICU locale - // ID strings as they have as BCP-47 strings (like "en_US" for "en-US"). - CharString localeID; - int32_t resultCapacity = tag.size(); - - char* buffer; - int32_t parsedLength, reslen; - - for (;;) { - buffer = localeID.getAppendBuffer( - /*minCapacity=*/resultCapacity, - /*desiredCapacityHint=*/resultCapacity, - resultCapacity, - status); - - if (U_FAILURE(status)) { - return result; - } - - reslen = ulocimp_forLanguageTag( - tag.data(), - tag.length(), - buffer, - resultCapacity, - &parsedLength, - &status); - - if (status != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - // For all BCP-47 language tags that use extensions, the corresponding - // ICU locale ID will be longer but uloc_forLanguageTag() does compute - // the exact length needed so this memory reallocation will be done at - // most once. - resultCapacity = reslen; - status = U_ZERO_ERROR; - } - - if (U_FAILURE(status)) { - return result; - } - - if (parsedLength != tag.size()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return result; - } - - localeID.append(buffer, reslen, status); - if (status == U_STRING_NOT_TERMINATED_WARNING) { - status = U_ZERO_ERROR; // Terminators provided by CharString. - } - - if (U_FAILURE(status)) { - return result; - } - - result.init(localeID.data(), /*canonicalize=*/FALSE); - if (result.isBogus()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } - return result; -} - -void -Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const -{ - if (U_FAILURE(status)) { - return; - } - - if (fIsBogus) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - // All simple language tags will have the exact same length as BCP-47 - // strings as they have as ICU locale IDs (like "en-US" for "en_US"). - LocalMemory scratch; - int32_t scratch_capacity = static_cast(uprv_strlen(fullName)); - - if (scratch_capacity == 0) { - scratch_capacity = 3; // "und" - } - - char* buffer; - int32_t result_capacity, reslen; - - for (;;) { - if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - buffer = sink.GetAppendBuffer( - /*min_capacity=*/scratch_capacity, - /*desired_capacity_hint=*/scratch_capacity, - scratch.getAlias(), - scratch_capacity, - &result_capacity); - - reslen = uloc_toLanguageTag( - fullName, - buffer, - result_capacity, - /*strict=*/FALSE, - &status); - - if (status != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - // For some very few edge cases a language tag will be longer as a - // BCP-47 string than it is as an ICU locale ID. Most notoriously "C" - // expands to the BCP-47 tag "en-US-u-va-posix", 16 times longer, and - // it'll take several calls to uloc_toLanguageTag() to figure that out. - // https://unicode-org.atlassian.net/browse/ICU-20132 - scratch_capacity = reslen; - status = U_ZERO_ERROR; - } - - if (U_FAILURE(status)) { - return; - } - - sink.Append(buffer, reslen); - if (status == U_STRING_NOT_TERMINATED_WARNING) { - status = U_ZERO_ERROR; // Terminators not used. - } -} - -Locale U_EXPORT2 -Locale::createFromName (const char *name) -{ - if (name) { - Locale l(""); - l.init(name, FALSE); - return l; - } - else { - return getDefault(); - } -} - -Locale U_EXPORT2 -Locale::createCanonical(const char* name) { - Locale loc(""); - loc.init(name, TRUE); - return loc; -} - -const char * -Locale::getISO3Language() const -{ - return uloc_getISO3Language(fullName); -} - - -const char * -Locale::getISO3Country() const -{ - return uloc_getISO3Country(fullName); -} - -/** - * Return the LCID value as specified in the "LocaleID" resource for this - * locale. The LocaleID must be expressed as a hexadecimal number, from - * one to four digits. If the LocaleID resource is not present, or is - * in an incorrect format, 0 is returned. The LocaleID is for use in - * Windows (it is an LCID), but is available on all platforms. - */ -uint32_t -Locale::getLCID() const -{ - return uloc_getLCID(fullName); -} - -const char* const* U_EXPORT2 Locale::getISOCountries() -{ - return uloc_getISOCountries(); -} - -const char* const* U_EXPORT2 Locale::getISOLanguages() -{ - return uloc_getISOLanguages(); -} - -// Set the locale's data based on a posix id. -void Locale::setFromPOSIXID(const char *posixID) -{ - init(posixID, TRUE); -} - -const Locale & U_EXPORT2 -Locale::getRoot(void) -{ - return getLocale(eROOT); -} - -const Locale & U_EXPORT2 -Locale::getEnglish(void) -{ - return getLocale(eENGLISH); -} - -const Locale & U_EXPORT2 -Locale::getFrench(void) -{ - return getLocale(eFRENCH); -} - -const Locale & U_EXPORT2 -Locale::getGerman(void) -{ - return getLocale(eGERMAN); -} - -const Locale & U_EXPORT2 -Locale::getItalian(void) -{ - return getLocale(eITALIAN); -} - -const Locale & U_EXPORT2 -Locale::getJapanese(void) -{ - return getLocale(eJAPANESE); -} - -const Locale & U_EXPORT2 -Locale::getKorean(void) -{ - return getLocale(eKOREAN); -} - -const Locale & U_EXPORT2 -Locale::getChinese(void) -{ - return getLocale(eCHINESE); -} - -const Locale & U_EXPORT2 -Locale::getSimplifiedChinese(void) -{ - return getLocale(eCHINA); -} - -const Locale & U_EXPORT2 -Locale::getTraditionalChinese(void) -{ - return getLocale(eTAIWAN); -} - - -const Locale & U_EXPORT2 -Locale::getFrance(void) -{ - return getLocale(eFRANCE); -} - -const Locale & U_EXPORT2 -Locale::getGermany(void) -{ - return getLocale(eGERMANY); -} - -const Locale & U_EXPORT2 -Locale::getItaly(void) -{ - return getLocale(eITALY); -} - -const Locale & U_EXPORT2 -Locale::getJapan(void) -{ - return getLocale(eJAPAN); -} - -const Locale & U_EXPORT2 -Locale::getKorea(void) -{ - return getLocale(eKOREA); -} - -const Locale & U_EXPORT2 -Locale::getChina(void) -{ - return getLocale(eCHINA); -} - -const Locale & U_EXPORT2 -Locale::getPRC(void) -{ - return getLocale(eCHINA); -} - -const Locale & U_EXPORT2 -Locale::getTaiwan(void) -{ - return getLocale(eTAIWAN); -} - -const Locale & U_EXPORT2 -Locale::getUK(void) -{ - return getLocale(eUK); -} - -const Locale & U_EXPORT2 -Locale::getUS(void) -{ - return getLocale(eUS); -} - -const Locale & U_EXPORT2 -Locale::getCanada(void) -{ - return getLocale(eCANADA); -} - -const Locale & U_EXPORT2 -Locale::getCanadaFrench(void) -{ - return getLocale(eCANADA_FRENCH); -} - -const Locale & -Locale::getLocale(int locid) -{ - Locale *localeCache = getLocaleCache(); - U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0)); - if (localeCache == NULL) { - // Failure allocating the locale cache. - // The best we can do is return a NULL reference. - locid = 0; - } - return localeCache[locid]; /*operating on NULL*/ -} - -/* -This function is defined this way in order to get around static -initialization and static destruction. - */ -Locale * -Locale::getLocaleCache(void) -{ - UErrorCode status = U_ZERO_ERROR; - umtx_initOnce(gLocaleCacheInitOnce, locale_init, status); - return gLocaleCache; -} - -class KeywordEnumeration : public StringEnumeration { -private: - char *keywords; - char *current; - int32_t length; - UnicodeString currUSKey; - static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */ - -public: - static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; } - virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); } -public: - KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status) - : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) { - if(U_SUCCESS(status) && keywordLen != 0) { - if(keys == NULL || keywordLen < 0) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - keywords = (char *)uprv_malloc(keywordLen+1); - if (keywords == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } - else { - uprv_memcpy(keywords, keys, keywordLen); - keywords[keywordLen] = 0; - current = keywords + currentIndex; - length = keywordLen; - } - } - } - } - - virtual ~KeywordEnumeration(); - - virtual StringEnumeration * clone() const - { - UErrorCode status = U_ZERO_ERROR; - return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status); - } - - virtual int32_t count(UErrorCode &/*status*/) const { - char *kw = keywords; - int32_t result = 0; - while(*kw) { - result++; - kw += uprv_strlen(kw)+1; - } - return result; - } - - virtual const char* next(int32_t* resultLength, UErrorCode& status) { - const char* result; - int32_t len; - if(U_SUCCESS(status) && *current != 0) { - result = current; - len = (int32_t)uprv_strlen(current); - current += len+1; - if(resultLength != NULL) { - *resultLength = len; - } - } else { - if(resultLength != NULL) { - *resultLength = 0; - } - result = NULL; - } - return result; - } - - virtual const UnicodeString* snext(UErrorCode& status) { - int32_t resultLength = 0; - const char *s = next(&resultLength, status); - return setChars(s, resultLength, status); - } - - virtual void reset(UErrorCode& /*status*/) { - current = keywords; - } -}; - -const char KeywordEnumeration::fgClassID = '\0'; - -KeywordEnumeration::~KeywordEnumeration() { - uprv_free(keywords); -} - -// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in -// the next() method for each keyword before returning it. -class UnicodeKeywordEnumeration : public KeywordEnumeration { -public: - using KeywordEnumeration::KeywordEnumeration; - virtual ~UnicodeKeywordEnumeration(); - - virtual const char* next(int32_t* resultLength, UErrorCode& status) { - const char* legacy_key = KeywordEnumeration::next(nullptr, status); - if (U_SUCCESS(status) && legacy_key != nullptr) { - const char* key = uloc_toUnicodeLocaleKey(legacy_key); - if (key == nullptr) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - if (resultLength != nullptr) { - *resultLength = static_cast(uprv_strlen(key)); - } - return key; - } - } - if (resultLength != nullptr) *resultLength = 0; - return nullptr; - } -}; - -// Out-of-line virtual destructor to serve as the "key function". -UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default; - -StringEnumeration * -Locale::createKeywords(UErrorCode &status) const -{ - char keywords[256]; - int32_t keywordCapacity = sizeof keywords; - StringEnumeration *result = NULL; - - if (U_FAILURE(status)) { - return result; - } - - const char* variantStart = uprv_strchr(fullName, '@'); - const char* assignment = uprv_strchr(fullName, '='); - if(variantStart) { - if(assignment > variantStart) { - int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status); - if(U_SUCCESS(status) && keyLen) { - result = new KeywordEnumeration(keywords, keyLen, 0, status); - if (!result) { - status = U_MEMORY_ALLOCATION_ERROR; - } - } - } else { - status = U_INVALID_FORMAT_ERROR; - } - } - return result; -} - -StringEnumeration * -Locale::createUnicodeKeywords(UErrorCode &status) const -{ - char keywords[256]; - int32_t keywordCapacity = sizeof keywords; - StringEnumeration *result = NULL; - - if (U_FAILURE(status)) { - return result; - } - - const char* variantStart = uprv_strchr(fullName, '@'); - const char* assignment = uprv_strchr(fullName, '='); - if(variantStart) { - if(assignment > variantStart) { - int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status); - if(U_SUCCESS(status) && keyLen) { - result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status); - if (!result) { - status = U_MEMORY_ALLOCATION_ERROR; - } - } - } else { - status = U_INVALID_FORMAT_ERROR; - } - } - return result; -} - -int32_t -Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const -{ - return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status); -} - -void -Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const { - if (U_FAILURE(status)) { - return; - } - - if (fIsBogus) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString keywordName_nul(keywordName, status); - if (U_FAILURE(status)) { - return; - } - - LocalMemory scratch; - int32_t scratch_capacity = 16; // Arbitrarily chosen default size. - - char* buffer; - int32_t result_capacity, reslen; - - for (;;) { - if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - buffer = sink.GetAppendBuffer( - /*min_capacity=*/scratch_capacity, - /*desired_capacity_hint=*/scratch_capacity, - scratch.getAlias(), - scratch_capacity, - &result_capacity); - - reslen = uloc_getKeywordValue( - fullName, - keywordName_nul.data(), - buffer, - result_capacity, - &status); - - if (status != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - scratch_capacity = reslen; - status = U_ZERO_ERROR; - } - - if (U_FAILURE(status)) { - return; - } - - sink.Append(buffer, reslen); - if (status == U_STRING_NOT_TERMINATED_WARNING) { - status = U_ZERO_ERROR; // Terminators not used. - } -} - -void -Locale::getUnicodeKeywordValue(StringPiece keywordName, - ByteSink& sink, - UErrorCode& status) const { - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString keywordName_nul(keywordName, status); - if (U_FAILURE(status)) { - return; - } - - const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); - - if (legacy_key == nullptr) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - CharString legacy_value; - { - CharStringByteSink sink(&legacy_value); - getKeywordValue(legacy_key, sink, status); - } - - if (U_FAILURE(status)) { - return; - } - - const char* unicode_value = uloc_toUnicodeLocaleType( - keywordName_nul.data(), legacy_value.data()); - - if (unicode_value == nullptr) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - sink.Append(unicode_value, static_cast(uprv_strlen(unicode_value))); -} - -void -Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) -{ - uloc_setKeywordValue(keywordName, keywordValue, fullName, ULOC_FULLNAME_CAPACITY, &status); - if (U_SUCCESS(status) && baseName == fullName) { - // May have added the first keyword, meaning that the fullName is no longer also the baseName. - initBaseName(status); - } -} - -void -Locale::setKeywordValue(StringPiece keywordName, - StringPiece keywordValue, - UErrorCode& status) { - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString keywordName_nul(keywordName, status); - const CharString keywordValue_nul(keywordValue, status); - setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status); -} - -void -Locale::setUnicodeKeywordValue(StringPiece keywordName, - StringPiece keywordValue, - UErrorCode& status) { - // TODO: Remove the need for a const char* to a NUL terminated buffer. - const CharString keywordName_nul(keywordName, status); - const CharString keywordValue_nul(keywordValue, status); - - if (U_FAILURE(status)) { - return; - } - - const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); - - if (legacy_key == nullptr) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - const char* legacy_value = - uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data()); - - if (legacy_value == nullptr) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - setKeywordValue(legacy_key, legacy_value, status); -} - -const char * -Locale::getBaseName() const { - return baseName; -} - -//eof -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/loclikely.cpp b/deps/node/deps/icu-small/source/common/loclikely.cpp deleted file mode 100644 index e5876e2e..00000000 --- a/deps/node/deps/icu-small/source/common/loclikely.cpp +++ /dev/null @@ -1,1385 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: loclikely.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010feb25 -* created by: Markus W. Scherer -* -* Code for likely and minimized locale subtags, separated out from other .cpp files -* that then do not depend on resource bundle code and likely-subtags data. -*/ - -#include "unicode/utypes.h" -#include "unicode/locid.h" -#include "unicode/putil.h" -#include "unicode/uchar.h" -#include "unicode/uloc.h" -#include "unicode/ures.h" -#include "unicode/uscript.h" -#include "cmemory.h" -#include "cstring.h" -#include "ulocimp.h" -#include "ustr_imp.h" - -/** - * This function looks for the localeID in the likelySubtags resource. - * - * @param localeID The tag to find. - * @param buffer A buffer to hold the matching entry - * @param bufferLength The length of the output buffer - * @return A pointer to "buffer" if found, or a null pointer if not. - */ -static const char* U_CALLCONV -findLikelySubtags(const char* localeID, - char* buffer, - int32_t bufferLength, - UErrorCode* err) { - const char* result = NULL; - - if (!U_FAILURE(*err)) { - int32_t resLen = 0; - const UChar* s = NULL; - UErrorCode tmpErr = U_ZERO_ERROR; - UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); - if (U_SUCCESS(tmpErr)) { - s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); - - if (U_FAILURE(tmpErr)) { - /* - * If a resource is missing, it's not really an error, it's - * just that we don't have any data for that particular locale ID. - */ - if (tmpErr != U_MISSING_RESOURCE_ERROR) { - *err = tmpErr; - } - } - else if (resLen >= bufferLength) { - /* The buffer should never overflow. */ - *err = U_INTERNAL_PROGRAM_ERROR; - } - else { - u_UCharsToChars(s, buffer, resLen + 1); - result = buffer; - } - - ures_close(subtags); - } else { - *err = tmpErr; - } - } - - return result; -} - -/** - * Append a tag to a buffer, adding the separator if necessary. The buffer - * must be large enough to contain the resulting tag plus any separator - * necessary. The tag must not be a zero-length string. - * - * @param tag The tag to add. - * @param tagLength The length of the tag. - * @param buffer The output buffer. - * @param bufferLength The length of the output buffer. This is an input/ouput parameter. - **/ -static void U_CALLCONV -appendTag( - const char* tag, - int32_t tagLength, - char* buffer, - int32_t* bufferLength) { - - if (*bufferLength > 0) { - buffer[*bufferLength] = '_'; - ++(*bufferLength); - } - - uprv_memmove( - &buffer[*bufferLength], - tag, - tagLength); - - *bufferLength += tagLength; -} - -/** - * These are the canonical strings for unknown languages, scripts and regions. - **/ -static const char* const unknownLanguage = "und"; -static const char* const unknownScript = "Zzzz"; -static const char* const unknownRegion = "ZZ"; - -/** - * Create a tag string from the supplied parameters. The lang, script and region - * parameters may be NULL pointers. If they are, their corresponding length parameters - * must be less than or equal to 0. - * - * If any of the language, script or region parameters are empty, and the alternateTags - * parameter is not NULL, it will be parsed for potential language, script and region tags - * to be used when constructing the new tag. If the alternateTags parameter is NULL, or - * it contains no language tag, the default tag for the unknown language is used. - * - * If the length of the new string exceeds the capacity of the output buffer, - * the function copies as many bytes to the output buffer as it can, and returns - * the error U_BUFFER_OVERFLOW_ERROR. - * - * If an illegal argument is provided, the function returns the error - * U_ILLEGAL_ARGUMENT_ERROR. - * - * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if - * the tag string fits in the output buffer, but the null terminator doesn't. - * - * @param lang The language tag to use. - * @param langLength The length of the language tag. - * @param script The script tag to use. - * @param scriptLength The length of the script tag. - * @param region The region tag to use. - * @param regionLength The length of the region tag. - * @param trailing Any trailing data to append to the new tag. - * @param trailingLength The length of the trailing data. - * @param alternateTags A string containing any alternate tags. - * @param tag The output buffer. - * @param tagCapacity The capacity of the output buffer. - * @param err A pointer to a UErrorCode for error reporting. - * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. - **/ -static int32_t U_CALLCONV -createTagStringWithAlternates( - const char* lang, - int32_t langLength, - const char* script, - int32_t scriptLength, - const char* region, - int32_t regionLength, - const char* trailing, - int32_t trailingLength, - const char* alternateTags, - char* tag, - int32_t tagCapacity, - UErrorCode* err) { - - if (U_FAILURE(*err)) { - goto error; - } - else if (tag == NULL || - tagCapacity <= 0 || - langLength >= ULOC_LANG_CAPACITY || - scriptLength >= ULOC_SCRIPT_CAPACITY || - regionLength >= ULOC_COUNTRY_CAPACITY) { - goto error; - } - else { - /** - * ULOC_FULLNAME_CAPACITY will provide enough capacity - * that we can build a string that contains the language, - * script and region code without worrying about overrunning - * the user-supplied buffer. - **/ - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - int32_t tagLength = 0; - int32_t capacityRemaining = tagCapacity; - UBool regionAppended = FALSE; - - if (langLength > 0) { - appendTag( - lang, - langLength, - tagBuffer, - &tagLength); - } - else if (alternateTags == NULL) { - /* - * Append the value for an unknown language, if - * we found no language. - */ - appendTag( - unknownLanguage, - (int32_t)uprv_strlen(unknownLanguage), - tagBuffer, - &tagLength); - } - else { - /* - * Parse the alternateTags string for the language. - */ - char alternateLang[ULOC_LANG_CAPACITY]; - int32_t alternateLangLength = sizeof(alternateLang); - - alternateLangLength = - uloc_getLanguage( - alternateTags, - alternateLang, - alternateLangLength, - err); - if(U_FAILURE(*err) || - alternateLangLength >= ULOC_LANG_CAPACITY) { - goto error; - } - else if (alternateLangLength == 0) { - /* - * Append the value for an unknown language, if - * we found no language. - */ - appendTag( - unknownLanguage, - (int32_t)uprv_strlen(unknownLanguage), - tagBuffer, - &tagLength); - } - else { - appendTag( - alternateLang, - alternateLangLength, - tagBuffer, - &tagLength); - } - } - - if (scriptLength > 0) { - appendTag( - script, - scriptLength, - tagBuffer, - &tagLength); - } - else if (alternateTags != NULL) { - /* - * Parse the alternateTags string for the script. - */ - char alternateScript[ULOC_SCRIPT_CAPACITY]; - - const int32_t alternateScriptLength = - uloc_getScript( - alternateTags, - alternateScript, - sizeof(alternateScript), - err); - - if (U_FAILURE(*err) || - alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { - goto error; - } - else if (alternateScriptLength > 0) { - appendTag( - alternateScript, - alternateScriptLength, - tagBuffer, - &tagLength); - } - } - - if (regionLength > 0) { - appendTag( - region, - regionLength, - tagBuffer, - &tagLength); - - regionAppended = TRUE; - } - else if (alternateTags != NULL) { - /* - * Parse the alternateTags string for the region. - */ - char alternateRegion[ULOC_COUNTRY_CAPACITY]; - - const int32_t alternateRegionLength = - uloc_getCountry( - alternateTags, - alternateRegion, - sizeof(alternateRegion), - err); - if (U_FAILURE(*err) || - alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { - goto error; - } - else if (alternateRegionLength > 0) { - appendTag( - alternateRegion, - alternateRegionLength, - tagBuffer, - &tagLength); - - regionAppended = TRUE; - } - } - - { - const int32_t toCopy = - tagLength >= tagCapacity ? tagCapacity : tagLength; - - /** - * Copy the partial tag from our internal buffer to the supplied - * target. - **/ - uprv_memcpy( - tag, - tagBuffer, - toCopy); - - capacityRemaining -= toCopy; - } - - if (trailingLength > 0) { - if (*trailing != '@' && capacityRemaining > 0) { - tag[tagLength++] = '_'; - --capacityRemaining; - if (capacityRemaining > 0 && !regionAppended) { - /* extra separator is required */ - tag[tagLength++] = '_'; - --capacityRemaining; - } - } - - if (capacityRemaining > 0) { - /* - * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we - * don't know if the user-supplied buffers overlap. - */ - const int32_t toCopy = - trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; - - uprv_memmove( - &tag[tagLength], - trailing, - toCopy); - } - } - - tagLength += trailingLength; - - return u_terminateChars( - tag, - tagCapacity, - tagLength, - err); - } - -error: - - /** - * An overflow indicates the locale ID passed in - * is ill-formed. If we got here, and there was - * no previous error, it's an implicit overflow. - **/ - if (*err == U_BUFFER_OVERFLOW_ERROR || - U_SUCCESS(*err)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - return -1; -} - -/** - * Create a tag string from the supplied parameters. The lang, script and region - * parameters may be NULL pointers. If they are, their corresponding length parameters - * must be less than or equal to 0. If the lang parameter is an empty string, the - * default value for an unknown language is written to the output buffer. - * - * If the length of the new string exceeds the capacity of the output buffer, - * the function copies as many bytes to the output buffer as it can, and returns - * the error U_BUFFER_OVERFLOW_ERROR. - * - * If an illegal argument is provided, the function returns the error - * U_ILLEGAL_ARGUMENT_ERROR. - * - * @param lang The language tag to use. - * @param langLength The length of the language tag. - * @param script The script tag to use. - * @param scriptLength The length of the script tag. - * @param region The region tag to use. - * @param regionLength The length of the region tag. - * @param trailing Any trailing data to append to the new tag. - * @param trailingLength The length of the trailing data. - * @param tag The output buffer. - * @param tagCapacity The capacity of the output buffer. - * @param err A pointer to a UErrorCode for error reporting. - * @return The length of the tag string, which may be greater than tagCapacity. - **/ -static int32_t U_CALLCONV -createTagString( - const char* lang, - int32_t langLength, - const char* script, - int32_t scriptLength, - const char* region, - int32_t regionLength, - const char* trailing, - int32_t trailingLength, - char* tag, - int32_t tagCapacity, - UErrorCode* err) -{ - return createTagStringWithAlternates( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - trailing, - trailingLength, - NULL, - tag, - tagCapacity, - err); -} - -/** - * Parse the language, script, and region subtags from a tag string, and copy the - * results into the corresponding output parameters. The buffers are null-terminated, - * unless overflow occurs. - * - * The langLength, scriptLength, and regionLength parameters are input/output - * parameters, and must contain the capacity of their corresponding buffers on - * input. On output, they will contain the actual length of the buffers, not - * including the null terminator. - * - * If the length of any of the output subtags exceeds the capacity of the corresponding - * buffer, the function copies as many bytes to the output buffer as it can, and returns - * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow - * occurs. - * - * If an illegal argument is provided, the function returns the error - * U_ILLEGAL_ARGUMENT_ERROR. - * - * @param localeID The locale ID to parse. - * @param lang The language tag buffer. - * @param langLength The length of the language tag. - * @param script The script tag buffer. - * @param scriptLength The length of the script tag. - * @param region The region tag buffer. - * @param regionLength The length of the region tag. - * @param err A pointer to a UErrorCode for error reporting. - * @return The number of chars of the localeID parameter consumed. - **/ -static int32_t U_CALLCONV -parseTagString( - const char* localeID, - char* lang, - int32_t* langLength, - char* script, - int32_t* scriptLength, - char* region, - int32_t* regionLength, - UErrorCode* err) -{ - const char* position = localeID; - int32_t subtagLength = 0; - - if(U_FAILURE(*err) || - localeID == NULL || - lang == NULL || - langLength == NULL || - script == NULL || - scriptLength == NULL || - region == NULL || - regionLength == NULL) { - goto error; - } - - subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); - u_terminateChars(lang, *langLength, subtagLength, err); - - /* - * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING - * to be an error, because it indicates the user-supplied tag is - * not well-formed. - */ - if(U_FAILURE(*err)) { - goto error; - } - - *langLength = subtagLength; - - /* - * If no language was present, use the value of unknownLanguage - * instead. Otherwise, move past any separator. - */ - if (*langLength == 0) { - uprv_strcpy( - lang, - unknownLanguage); - *langLength = (int32_t)uprv_strlen(lang); - } - if (_isIDSeparator(*position)) { - ++position; - } - - subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); - u_terminateChars(script, *scriptLength, subtagLength, err); - - if(U_FAILURE(*err)) { - goto error; - } - - *scriptLength = subtagLength; - - if (*scriptLength > 0) { - if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { - /** - * If the script part is the "unknown" script, then don't return it. - **/ - *scriptLength = 0; - } - - /* - * Move past any separator. - */ - if (_isIDSeparator(*position)) { - ++position; - } - } - - subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); - u_terminateChars(region, *regionLength, subtagLength, err); - - if(U_FAILURE(*err)) { - goto error; - } - - *regionLength = subtagLength; - - if (*regionLength > 0) { - if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { - /** - * If the region part is the "unknown" region, then don't return it. - **/ - *regionLength = 0; - } - } else if (*position != 0 && *position != '@') { - /* back up over consumed trailing separator */ - --position; - } - -exit: - - return (int32_t)(position - localeID); - -error: - - /** - * If we get here, we have no explicit error, it's the result of an - * illegal argument. - **/ - if (!U_FAILURE(*err)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - goto exit; -} - -static int32_t U_CALLCONV -createLikelySubtagsString( - const char* lang, - int32_t langLength, - const char* script, - int32_t scriptLength, - const char* region, - int32_t regionLength, - const char* variants, - int32_t variantsLength, - char* tag, - int32_t tagCapacity, - UErrorCode* err) -{ - /** - * ULOC_FULLNAME_CAPACITY will provide enough capacity - * that we can build a string that contains the language, - * script and region code without worrying about overrunning - * the user-supplied buffer. - **/ - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; - - if(U_FAILURE(*err)) { - goto error; - } - - /** - * Try the language with the script and region first. - **/ - if (scriptLength > 0 && regionLength > 0) { - - const char* likelySubtags = NULL; - - createTagString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer, - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != NULL) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - return createTagStringWithAlternates( - NULL, - 0, - NULL, - 0, - NULL, - 0, - variants, - variantsLength, - likelySubtags, - tag, - tagCapacity, - err); - } - } - - /** - * Try the language with just the script. - **/ - if (scriptLength > 0) { - - const char* likelySubtags = NULL; - - createTagString( - lang, - langLength, - script, - scriptLength, - NULL, - 0, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer, - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != NULL) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - return createTagStringWithAlternates( - NULL, - 0, - NULL, - 0, - region, - regionLength, - variants, - variantsLength, - likelySubtags, - tag, - tagCapacity, - err); - } - } - - /** - * Try the language with just the region. - **/ - if (regionLength > 0) { - - const char* likelySubtags = NULL; - - createTagString( - lang, - langLength, - NULL, - 0, - region, - regionLength, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer, - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != NULL) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - return createTagStringWithAlternates( - NULL, - 0, - script, - scriptLength, - NULL, - 0, - variants, - variantsLength, - likelySubtags, - tag, - tagCapacity, - err); - } - } - - /** - * Finally, try just the language. - **/ - { - const char* likelySubtags = NULL; - - createTagString( - lang, - langLength, - NULL, - 0, - NULL, - 0, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - likelySubtags = - findLikelySubtags( - tagBuffer, - likelySubtagsBuffer, - sizeof(likelySubtagsBuffer), - err); - if(U_FAILURE(*err)) { - goto error; - } - - if (likelySubtags != NULL) { - /* Always use the language tag from the - maximal string, since it may be more - specific than the one provided. */ - return createTagStringWithAlternates( - NULL, - 0, - script, - scriptLength, - region, - regionLength, - variants, - variantsLength, - likelySubtags, - tag, - tagCapacity, - err); - } - } - - return u_terminateChars( - tag, - tagCapacity, - 0, - err); - -error: - - if (!U_FAILURE(*err)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - return -1; -} - -#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ - { int32_t count = 0; \ - int32_t i; \ - for (i = 0; i < trailingLength; i++) { \ - if (trailing[i] == '-' || trailing[i] == '_') { \ - count = 0; \ - if (count > 8) { \ - goto error; \ - } \ - } else if (trailing[i] == '@') { \ - break; \ - } else if (count > 8) { \ - goto error; \ - } else { \ - count++; \ - } \ - } \ - } - -static int32_t -_uloc_addLikelySubtags(const char* localeID, - char* maximizedLocaleID, - int32_t maximizedLocaleIDCapacity, - UErrorCode* err) -{ - char lang[ULOC_LANG_CAPACITY]; - int32_t langLength = sizeof(lang); - char script[ULOC_SCRIPT_CAPACITY]; - int32_t scriptLength = sizeof(script); - char region[ULOC_COUNTRY_CAPACITY]; - int32_t regionLength = sizeof(region); - const char* trailing = ""; - int32_t trailingLength = 0; - int32_t trailingIndex = 0; - int32_t resultLength = 0; - - if(U_FAILURE(*err)) { - goto error; - } - else if (localeID == NULL || - maximizedLocaleID == NULL || - maximizedLocaleIDCapacity <= 0) { - goto error; - } - - trailingIndex = parseTagString( - localeID, - lang, - &langLength, - script, - &scriptLength, - region, - ®ionLength, - err); - if(U_FAILURE(*err)) { - /* Overflow indicates an illegal argument error */ - if (*err == U_BUFFER_OVERFLOW_ERROR) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - goto error; - } - - /* Find the length of the trailing portion. */ - while (_isIDSeparator(localeID[trailingIndex])) { - trailingIndex++; - } - trailing = &localeID[trailingIndex]; - trailingLength = (int32_t)uprv_strlen(trailing); - - CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); - - resultLength = - createLikelySubtagsString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - trailing, - trailingLength, - maximizedLocaleID, - maximizedLocaleIDCapacity, - err); - - if (resultLength == 0) { - const int32_t localIDLength = (int32_t)uprv_strlen(localeID); - - /* - * If we get here, we need to return localeID. - */ - uprv_memcpy( - maximizedLocaleID, - localeID, - localIDLength <= maximizedLocaleIDCapacity ? - localIDLength : maximizedLocaleIDCapacity); - - resultLength = - u_terminateChars( - maximizedLocaleID, - maximizedLocaleIDCapacity, - localIDLength, - err); - } - - return resultLength; - -error: - - if (!U_FAILURE(*err)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - return -1; -} - -static int32_t -_uloc_minimizeSubtags(const char* localeID, - char* minimizedLocaleID, - int32_t minimizedLocaleIDCapacity, - UErrorCode* err) -{ - /** - * ULOC_FULLNAME_CAPACITY will provide enough capacity - * that we can build a string that contains the language, - * script and region code without worrying about overrunning - * the user-supplied buffer. - **/ - char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; - int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); - - char lang[ULOC_LANG_CAPACITY]; - int32_t langLength = sizeof(lang); - char script[ULOC_SCRIPT_CAPACITY]; - int32_t scriptLength = sizeof(script); - char region[ULOC_COUNTRY_CAPACITY]; - int32_t regionLength = sizeof(region); - const char* trailing = ""; - int32_t trailingLength = 0; - int32_t trailingIndex = 0; - - if(U_FAILURE(*err)) { - goto error; - } - else if (localeID == NULL || - minimizedLocaleID == NULL || - minimizedLocaleIDCapacity <= 0) { - goto error; - } - - trailingIndex = - parseTagString( - localeID, - lang, - &langLength, - script, - &scriptLength, - region, - ®ionLength, - err); - if(U_FAILURE(*err)) { - - /* Overflow indicates an illegal argument error */ - if (*err == U_BUFFER_OVERFLOW_ERROR) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - goto error; - } - - /* Find the spot where the variants or the keywords begin, if any. */ - while (_isIDSeparator(localeID[trailingIndex])) { - trailingIndex++; - } - trailing = &localeID[trailingIndex]; - trailingLength = (int32_t)uprv_strlen(trailing); - - CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); - - createTagString( - lang, - langLength, - script, - scriptLength, - region, - regionLength, - NULL, - 0, - maximizedTagBuffer, - maximizedTagBufferLength, - err); - if(U_FAILURE(*err)) { - goto error; - } - - /** - * First, we need to first get the maximization - * from AddLikelySubtags. - **/ - maximizedTagBufferLength = - uloc_addLikelySubtags( - maximizedTagBuffer, - maximizedTagBuffer, - maximizedTagBufferLength, - err); - - if(U_FAILURE(*err)) { - goto error; - } - - /** - * Start first with just the language. - **/ - { - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - - const int32_t tagBufferLength = - createLikelySubtagsString( - lang, - langLength, - NULL, - 0, - NULL, - 0, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); - - if(U_FAILURE(*err)) { - goto error; - } - else if (uprv_strnicmp( - maximizedTagBuffer, - tagBuffer, - tagBufferLength) == 0) { - - return createTagString( - lang, - langLength, - NULL, - 0, - NULL, - 0, - trailing, - trailingLength, - minimizedLocaleID, - minimizedLocaleIDCapacity, - err); - } - } - - /** - * Next, try the language and region. - **/ - if (regionLength > 0) { - - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - - const int32_t tagBufferLength = - createLikelySubtagsString( - lang, - langLength, - NULL, - 0, - region, - regionLength, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); - - if(U_FAILURE(*err)) { - goto error; - } - else if (uprv_strnicmp( - maximizedTagBuffer, - tagBuffer, - tagBufferLength) == 0) { - - return createTagString( - lang, - langLength, - NULL, - 0, - region, - regionLength, - trailing, - trailingLength, - minimizedLocaleID, - minimizedLocaleIDCapacity, - err); - } - } - - /** - * Finally, try the language and script. This is our last chance, - * since trying with all three subtags would only yield the - * maximal version that we already have. - **/ - if (scriptLength > 0 && regionLength > 0) { - char tagBuffer[ULOC_FULLNAME_CAPACITY]; - - const int32_t tagBufferLength = - createLikelySubtagsString( - lang, - langLength, - script, - scriptLength, - NULL, - 0, - NULL, - 0, - tagBuffer, - sizeof(tagBuffer), - err); - - if(U_FAILURE(*err)) { - goto error; - } - else if (uprv_strnicmp( - maximizedTagBuffer, - tagBuffer, - tagBufferLength) == 0) { - - return createTagString( - lang, - langLength, - script, - scriptLength, - NULL, - 0, - trailing, - trailingLength, - minimizedLocaleID, - minimizedLocaleIDCapacity, - err); - } - } - - { - /** - * If we got here, return the locale ID parameter. - **/ - const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); - - uprv_memcpy( - minimizedLocaleID, - localeID, - localeIDLength <= minimizedLocaleIDCapacity ? - localeIDLength : minimizedLocaleIDCapacity); - - return u_terminateChars( - minimizedLocaleID, - minimizedLocaleIDCapacity, - localeIDLength, - err); - } - -error: - - if (!U_FAILURE(*err)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } - - return -1; - - -} - -static UBool -do_canonicalize(const char* localeID, - char* buffer, - int32_t bufferCapacity, - UErrorCode* err) -{ - uloc_canonicalize( - localeID, - buffer, - bufferCapacity, - err); - - if (*err == U_STRING_NOT_TERMINATED_WARNING || - *err == U_BUFFER_OVERFLOW_ERROR) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - - return FALSE; - } - else if (U_FAILURE(*err)) { - - return FALSE; - } - else { - return TRUE; - } -} - -U_CAPI int32_t U_EXPORT2 -uloc_addLikelySubtags(const char* localeID, - char* maximizedLocaleID, - int32_t maximizedLocaleIDCapacity, - UErrorCode* err) -{ - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - - if (!do_canonicalize( - localeID, - localeBuffer, - sizeof(localeBuffer), - err)) { - return -1; - } - else { - return _uloc_addLikelySubtags( - localeBuffer, - maximizedLocaleID, - maximizedLocaleIDCapacity, - err); - } -} - -U_CAPI int32_t U_EXPORT2 -uloc_minimizeSubtags(const char* localeID, - char* minimizedLocaleID, - int32_t minimizedLocaleIDCapacity, - UErrorCode* err) -{ - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - - if (!do_canonicalize( - localeID, - localeBuffer, - sizeof(localeBuffer), - err)) { - return -1; - } - else { - return _uloc_minimizeSubtags( - localeBuffer, - minimizedLocaleID, - minimizedLocaleIDCapacity, - err); - } -} - -// Pairs of (language subtag, + or -) for finding out fast if common languages -// are LTR (minus) or RTL (plus). -static const char LANG_DIR_STRING[] = - "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; - -// Implemented here because this calls uloc_addLikelySubtags(). -U_CAPI UBool U_EXPORT2 -uloc_isRightToLeft(const char *locale) { - UErrorCode errorCode = U_ZERO_ERROR; - char script[8]; - int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); - if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || - scriptLength == 0) { - // Fastpath: We know the likely scripts and their writing direction - // for some common languages. - errorCode = U_ZERO_ERROR; - char lang[8]; - int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); - if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || - langLength == 0) { - return FALSE; - } - const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); - if (langPtr != NULL) { - switch (langPtr[langLength]) { - case '-': return FALSE; - case '+': return TRUE; - default: break; // partial match of a longer code - } - } - // Otherwise, find the likely script. - errorCode = U_ZERO_ERROR; - char likely[ULOC_FULLNAME_CAPACITY]; - (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode); - if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { - return FALSE; - } - scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode); - if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || - scriptLength == 0) { - return FALSE; - } - } - UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); - return uscript_isRightToLeft(scriptCode); -} - -U_NAMESPACE_BEGIN - -UBool -Locale::isRightToLeft() const { - return uloc_isRightToLeft(getBaseName()); -} - -U_NAMESPACE_END - -// The following must at least allow for rg key value (6) plus terminator (1). -#define ULOC_RG_BUFLEN 8 - -U_CAPI int32_t U_EXPORT2 -ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, - char *region, int32_t regionCapacity, UErrorCode* status) { - if (U_FAILURE(*status)) { - return 0; - } - char rgBuf[ULOC_RG_BUFLEN]; - UErrorCode rgStatus = U_ZERO_ERROR; - - // First check for rg keyword value - int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); - if (U_FAILURE(rgStatus) || rgLen != 6) { - rgLen = 0; - } else { - // rgBuf guaranteed to be zero terminated here, with text len 6 - char *rgPtr = rgBuf; - for (; *rgPtr!= 0; rgPtr++) { - *rgPtr = uprv_toupper(*rgPtr); - } - rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; - } - - if (rgLen == 0) { - // No valid rg keyword value, try for unicode_region_subtag - rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status); - if (U_FAILURE(*status)) { - rgLen = 0; - } else if (rgLen == 0 && inferRegion) { - // no unicode_region_subtag but inferRegion TRUE, try likely subtags - char locBuf[ULOC_FULLNAME_CAPACITY]; - rgStatus = U_ZERO_ERROR; - (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus); - if (U_SUCCESS(rgStatus)) { - rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status); - if (U_FAILURE(*status)) { - rgLen = 0; - } - } - } - } - - rgBuf[rgLen] = 0; - uprv_strncpy(region, rgBuf, regionCapacity); - return u_terminateChars(region, regionCapacity, rgLen, status); -} diff --git a/deps/node/deps/icu-small/source/common/locmap.cpp b/deps/node/deps/icu-small/source/common/locmap.cpp deleted file mode 100644 index a3cf2d5e..00000000 --- a/deps/node/deps/icu-small/source/common/locmap.cpp +++ /dev/null @@ -1,1320 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ********************************************************************** - * Copyright (C) 1996-2016, International Business Machines - * Corporation and others. All Rights Reserved. - ********************************************************************** - * - * Provides functionality for mapping between - * LCID and Posix IDs or ICU locale to codepage - * - * Note: All classes and code in this file are - * intended for internal use only. - * - * Methods of interest: - * unsigned long convertToLCID(const char*); - * const char* convertToPosix(unsigned long); - * - * Kathleen Wilson, 4/30/96 - * - * Date Name Description - * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added - * setId() method and safety check against - * MAX_ID_LENGTH. - * 04/23/99 stephen Added C wrapper for convertToPosix. - * 09/18/00 george Removed the memory leaks. - * 08/23/01 george Convert to C - */ - -#include "locmap.h" -#include "cstring.h" -#include "cmemory.h" -#include "unicode/uloc.h" - -#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500) -/* - * TODO: It seems like we should widen this to - * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW) - * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin) - * but those use gcc and won't have defined(_MSC_VER). - * We might need to #include some Windows header and test for some version macro from there. - * Or call some Windows function and see what it returns. - */ -#define USE_WINDOWS_LCID_MAPPING_API -#include -#include -#endif - -/* - * Note: - * The mapping from Win32 locale ID numbers to POSIX locale strings should - * be the faster one. - * - * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx - * [MS-LCID] Windows Language Code Identifier (LCID) Reference - */ - -/* -//////////////////////////////////////////////// -// -// Internal Classes for LCID <--> POSIX Mapping -// -///////////////////////////////////////////////// -*/ - -typedef struct ILcidPosixElement -{ - const uint32_t hostID; - const char * const posixID; -} ILcidPosixElement; - -typedef struct ILcidPosixMap -{ - const uint32_t numRegions; - const struct ILcidPosixElement* const regionMaps; -} ILcidPosixMap; - - -/* -///////////////////////////////////////////////// -// -// Easy macros to make the LCID <--> POSIX Mapping -// -///////////////////////////////////////////////// -*/ - -/** - * The standard one language/one country mapping for LCID. - * The first element must be the language, and the following - * elements are the language with the country. - * @param hostID LCID in host format such as 0x044d - * @param languageID posix ID of just the language such as 'de' - * @param posixID posix ID of the language_TERRITORY such as 'de_CH' - */ -#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \ -static const ILcidPosixElement locmap_ ## languageID [] = { \ - {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \ - {hostID, #posixID}, \ -}; - -/** - * Define a subtable by ID - * @param id the POSIX ID, either a language or language_TERRITORY - */ -#define ILCID_POSIX_SUBTABLE(id) \ -static const ILcidPosixElement locmap_ ## id [] = - - -/** - * Create the map for the posixID. This macro supposes that the language string - * name is the same as the global variable name, and that the first element - * in the ILcidPosixElement is just the language. - * @param _posixID the full POSIX ID for this entry. - */ -#define ILCID_POSIX_MAP(_posixID) \ - {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID} - -/* -//////////////////////////////////////////// -// -// Create the table of LCID to POSIX Mapping -// None of it should be dynamically created. -// -// Keep static locale variables inside the function so that -// it can be created properly during static init. -// -// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier -// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx -// -// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be -// maintained for support of older Windows version. -// Update: Windows 7 (091130) -// -// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain -// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is -// called from uloc_getLCID(), keywords other than collation are already removed. If we really need -// to support other keywords in this mapping data, we must update the implementation. -//////////////////////////////////////////// -*/ - -// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as -// LocaleNameToLCID and LCIDToLocaleName provide 90% of these. - -ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA) - -ILCID_POSIX_SUBTABLE(ar) { - {0x01, "ar"}, - {0x3801, "ar_AE"}, - {0x3c01, "ar_BH"}, - {0x1401, "ar_DZ"}, - {0x0c01, "ar_EG"}, - {0x0801, "ar_IQ"}, - {0x2c01, "ar_JO"}, - {0x3401, "ar_KW"}, - {0x3001, "ar_LB"}, - {0x1001, "ar_LY"}, - {0x1801, "ar_MA"}, - {0x1801, "ar_MO"}, - {0x2001, "ar_OM"}, - {0x4001, "ar_QA"}, - {0x0401, "ar_SA"}, - {0x2801, "ar_SY"}, - {0x1c01, "ar_TN"}, - {0x2401, "ar_YE"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET) -ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL) - -ILCID_POSIX_SUBTABLE(az) { - {0x2c, "az"}, - {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */ - {0x742c, "az_Cyrl"}, /* Cyrillic based */ - {0x042c, "az_Latn_AZ"}, /* Latin based */ - {0x782c, "az_Latn"}, /* Latin based */ - {0x042c, "az_AZ"} /* Latin based */ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU) -ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY) - -/*ILCID_POSIX_SUBTABLE(ber) { - {0x5f, "ber"}, - {0x045f, "ber_Arab_DZ"}, - {0x045f, "ber_Arab"}, - {0x085f, "ber_Latn_DZ"}, - {0x085f, "ber_Latn"} -};*/ - -ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG) - -ILCID_POSIX_SUBTABLE(bin) { - {0x66, "bin"}, - {0x0466, "bin_NG"} -}; - -ILCID_POSIX_SUBTABLE(bn) { - {0x45, "bn"}, - {0x0845, "bn_BD"}, - {0x0445, "bn_IN"} -}; - -ILCID_POSIX_SUBTABLE(bo) { - {0x51, "bo"}, - {0x0851, "bo_BT"}, - {0x0451, "bo_CN"}, - {0x0c51, "dz_BT"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR) - -ILCID_POSIX_SUBTABLE(ca) { - {0x03, "ca"}, - {0x0403, "ca_ES"}, - {0x0803, "ca_ES_VALENCIA"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR) - -ILCID_POSIX_SUBTABLE(chr) { - {0x05c, "chr"}, - {0x7c5c, "chr_Cher"}, - {0x045c, "chr_Cher_US"}, - {0x045c, "chr_US"} -}; - -// ICU has chosen different names for these. -ILCID_POSIX_SUBTABLE(ckb) { - {0x92, "ckb"}, - {0x7c92, "ckb_Arab"}, - {0x0492, "ckb_Arab_IQ"} -}; - -/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */ -ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ) - -ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB) -ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK) - -// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names -ILCID_POSIX_SUBTABLE(de) { - {0x07, "de"}, - {0x0c07, "de_AT"}, - {0x0807, "de_CH"}, - {0x0407, "de_DE"}, - {0x1407, "de_LI"}, - {0x1007, "de_LU"}, - {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/ - {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV) -ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR) - -// Windows uses an empty string for 'invariant' -ILCID_POSIX_SUBTABLE(en) { - {0x09, "en"}, - {0x0c09, "en_AU"}, - {0x2809, "en_BZ"}, - {0x1009, "en_CA"}, - {0x0809, "en_GB"}, - {0x3c09, "en_HK"}, - {0x3809, "en_ID"}, - {0x1809, "en_IE"}, - {0x4009, "en_IN"}, - {0x2009, "en_JM"}, - {0x4409, "en_MY"}, - {0x1409, "en_NZ"}, - {0x3409, "en_PH"}, - {0x4809, "en_SG"}, - {0x2C09, "en_TT"}, - {0x0409, "en_US"}, - {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */ - {0x2409, "en_029"}, - {0x1c09, "en_ZA"}, - {0x3009, "en_ZW"}, - {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */ - {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ - {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ - {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ - {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ - {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ -}; - -ILCID_POSIX_SUBTABLE(en_US_POSIX) { - {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */ -}; - -// Windows doesn't know POSIX or BCP47 Unicode traditional sort names -ILCID_POSIX_SUBTABLE(es) { - {0x0a, "es"}, - {0x2c0a, "es_AR"}, - {0x400a, "es_BO"}, - {0x340a, "es_CL"}, - {0x240a, "es_CO"}, - {0x140a, "es_CR"}, - {0x5c0a, "es_CU"}, - {0x1c0a, "es_DO"}, - {0x300a, "es_EC"}, - {0x0c0a, "es_ES"}, /*Modern sort.*/ - {0x100a, "es_GT"}, - {0x480a, "es_HN"}, - {0x080a, "es_MX"}, - {0x4c0a, "es_NI"}, - {0x180a, "es_PA"}, - {0x280a, "es_PE"}, - {0x500a, "es_PR"}, - {0x3c0a, "es_PY"}, - {0x440a, "es_SV"}, - {0x540a, "es_US"}, - {0x380a, "es_UY"}, - {0x200a, "es_VE"}, - {0x580a, "es_419"}, - {0x040a, "es_ES@collation=traditional"}, - {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE) -ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES) - -/* ISO-639 doesn't distinguish between Persian and Dari.*/ -ILCID_POSIX_SUBTABLE(fa) { - {0x29, "fa"}, - {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */ - {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ -}; - - -/* duplicate for roundtripping */ -ILCID_POSIX_SUBTABLE(fa_AF) { - {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */ - {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ -}; - -ILCID_POSIX_SUBTABLE(ff) { - {0x67, "ff"}, - {0x7c67, "ff_Latn"}, - {0x0867, "ff_Latn_SN"}, - {0x0467, "ff_NG"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI) -ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH) -ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO) - -ILCID_POSIX_SUBTABLE(fr) { - {0x0c, "fr"}, - {0x080c, "fr_BE"}, - {0x0c0c, "fr_CA"}, - {0x240c, "fr_CD"}, - {0x240c, "fr_CG"}, - {0x100c, "fr_CH"}, - {0x300c, "fr_CI"}, - {0x2c0c, "fr_CM"}, - {0x040c, "fr_FR"}, - {0x3c0c, "fr_HT"}, - {0x140c, "fr_LU"}, - {0x380c, "fr_MA"}, - {0x180c, "fr_MC"}, - {0x340c, "fr_ML"}, - {0x200c, "fr_RE"}, - {0x280c, "fr_SN"}, - {0xe40c, "fr_015"}, - {0x1c0c, "fr_029"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG) - -ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL) - -ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */ - {0x3c, "ga"}, - {0x083c, "ga_IE"}, - {0x043c, "gd_GB"} -}; - -ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */ - {0x91, "gd"}, - {0x0491, "gd_GB"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES) -ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY) -ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR) - -ILCID_POSIX_SUBTABLE(ha) { - {0x68, "ha"}, - {0x7c68, "ha_Latn"}, - {0x0468, "ha_Latn_NG"}, -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US) -ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL) -ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN) - -/* This LCID is really four different locales.*/ -ILCID_POSIX_SUBTABLE(hr) { - {0x1a, "hr"}, - {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */ - {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */ - {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */ - {0x781a, "bs"}, /* Bosnian */ - {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */ - {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */ - {0x101a, "hr_BA"}, /* Croatian in Bosnia */ - {0x041a, "hr_HR"}, /* Croatian*/ - {0x2c1a, "sr_Latn_ME"}, - {0x241a, "sr_Latn_RS"}, - {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */ - {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/ - {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */ - {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */ - {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/ - {0x301a, "sr_Cyrl_ME"}, - {0x281a, "sr_Cyrl_RS"}, - {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */ - {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */ -}; - -ILCID_POSIX_SUBTABLE(hsb) { - {0x2E, "hsb"}, - {0x042E, "hsb_DE"}, - {0x082E, "dsb_DE"}, - {0x7C2E, "dsb"}, -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU) -ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM) - -ILCID_POSIX_SUBTABLE(ibb) { - {0x69, "ibb"}, - {0x0469, "ibb_NG"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID) -ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG) -ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN) -ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS) - -ILCID_POSIX_SUBTABLE(it) { - {0x10, "it"}, - {0x0810, "it_CH"}, - {0x0410, "it_IT"} -}; - -ILCID_POSIX_SUBTABLE(iu) { - {0x5d, "iu"}, - {0x045d, "iu_Cans_CA"}, - {0x785d, "iu_Cans"}, - {0x085d, "iu_Latn_CA"}, - {0x7c5d, "iu_Latn"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/ -ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP) -ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE) -ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ) -ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL) -ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH) -ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN) - -ILCID_POSIX_SUBTABLE(ko) { - {0x12, "ko"}, - {0x0812, "ko_KP"}, - {0x0412, "ko_KR"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG) - -ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */ - {0x60, "ks"}, - {0x0460, "ks_Arab_IN"}, - {0x0860, "ks_Deva_IN"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */ - -ILCID_POSIX_SUBTABLE(la) { - {0x76, "la"}, - {0x0476, "la_001"}, - {0x0476, "la_IT"} /*Left in for compatibility*/ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU) -ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA) -ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT) -ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV) -ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ) -ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK) -ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN) - -ILCID_POSIX_SUBTABLE(mn) { - {0x50, "mn"}, - {0x0450, "mn_MN"}, - {0x7c50, "mn_Mong"}, - {0x0850, "mn_Mong_CN"}, - {0x0850, "mn_CN"}, - {0x7850, "mn_Cyrl"}, - {0x0c50, "mn_Mong_MN"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA) -ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN) - -ILCID_POSIX_SUBTABLE(ms) { - {0x3e, "ms"}, - {0x083e, "ms_BN"}, /* Brunei Darussalam*/ - {0x043e, "ms_MY"} /* Malaysia*/ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT) -ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM) - -ILCID_POSIX_SUBTABLE(ne) { - {0x61, "ne"}, - {0x0861, "ne_IN"}, /* India*/ - {0x0461, "ne_NP"} /* Nepal*/ -}; - -ILCID_POSIX_SUBTABLE(nl) { - {0x13, "nl"}, - {0x0813, "nl_BE"}, - {0x0413, "nl_NL"} -}; - -/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/ -// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here. -ILCID_POSIX_SUBTABLE(no) { - {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */ - {0x7c14, "nb"}, /* really nb */ - {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */ - {0x0414, "no_NO"}, /* really nb_NO */ - {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */ - {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */ - {0x0814, "no_NO_NY"}/* really nn_NO */ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */ -ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR) - -ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */ - {0x72, "om"}, - {0x0472, "om_ET"}, - {0x0472, "gaz_ET"} -}; - -/* Declared as or_IN to get around compiler errors*/ -ILCID_POSIX_SUBTABLE(or_IN) { - {0x48, "or"}, - {0x0448, "or_IN"}, -}; - -ILCID_POSIX_SUBTABLE(pa) { - {0x46, "pa"}, - {0x0446, "pa_IN"}, - {0x0846, "pa_Arab_PK"}, - {0x0846, "pa_PK"} -}; - -ILCID_POSIX_SUBTABLE(pap) { - {0x79, "pap"}, - {0x0479, "pap_029"}, - {0x0479, "pap_AN"} /*Left in for compatibility*/ -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL) -ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF) - -ILCID_POSIX_SUBTABLE(pt) { - {0x16, "pt"}, - {0x0416, "pt_BR"}, - {0x0816, "pt_PT"} -}; - -ILCID_POSIX_SUBTABLE(qu) { - {0x6b, "qu"}, - {0x046b, "qu_BO"}, - {0x086b, "qu_EC"}, - {0x0C6b, "qu_PE"}, - {0x046b, "quz_BO"}, - {0x086b, "quz_EC"}, - {0x0C6b, "quz_PE"} -}; - -ILCID_POSIX_SUBTABLE(quc) { - {0x93, "quc"}, - {0x0493, "quc_CO"}, - /* - "quc_Latn_GT" is an exceptional case. Language ID of "quc" - is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be - under the group of "qut". "qut" is a retired ISO 639-3 language - code for West Central Quiche, and merged to "quc". - It looks Windows previously reserved "qut" for K'iche', but, - decided to use "quc" when adding a locale for K'iche' (Guatemala). - - This data structure used here assumes language ID bits in - LCID is unique for alphabetic language code. But this is not true - for "quc_Latn_GT". If we don't have the data below, LCID look up - by alphabetic locale ID (POSIX) will fail. The same entry is found - under "qut" below, which is required for reverse look up. - */ - {0x0486, "quc_Latn_GT"} -}; - -ILCID_POSIX_SUBTABLE(qut) { - {0x86, "qut"}, - {0x0486, "qut_GT"}, - /* - See the note in "quc" above. - */ - {0x0486, "quc_Latn_GT"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH) - -ILCID_POSIX_SUBTABLE(ro) { - {0x18, "ro"}, - {0x0418, "ro_RO"}, - {0x0818, "ro_MD"} -}; - -// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT. -// More likely this is a similar concept to the Windows 0x7f Invariant locale "" -// (Except that it's not invariant in ICU) -ILCID_POSIX_SUBTABLE(root) { - {0x00, "root"} -}; - -ILCID_POSIX_SUBTABLE(ru) { - {0x19, "ru"}, - {0x0419, "ru_RU"}, - {0x0819, "ru_MD"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW) -ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN) -ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU) - -ILCID_POSIX_SUBTABLE(sd) { - {0x59, "sd"}, - {0x0459, "sd_Deva_IN"}, - {0x0459, "sd_IN"}, - {0x0859, "sd_Arab_PK"}, - {0x0859, "sd_PK"}, - {0x7c59, "sd_Arab"} -}; - -ILCID_POSIX_SUBTABLE(se) { - {0x3b, "se"}, - {0x0c3b, "se_FI"}, - {0x043b, "se_NO"}, - {0x083b, "se_SE"}, - {0x783b, "sma"}, - {0x183b, "sma_NO"}, - {0x1c3b, "sma_SE"}, - {0x7c3b, "smj"}, - {0x703b, "smn"}, - {0x743b, "sms"}, - {0x103b, "smj_NO"}, - {0x143b, "smj_SE"}, - {0x243b, "smn_FI"}, - {0x203b, "sms_FI"}, -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK) -ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK) -ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI) - -ILCID_POSIX_SUBTABLE(so) { - {0x77, "so"}, - {0x0477, "so_SO"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL) -ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA) - -ILCID_POSIX_SUBTABLE(sv) { - {0x1d, "sv"}, - {0x081d, "sv_FI"}, - {0x041d, "sv_SE"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE) -ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY) - -ILCID_POSIX_SUBTABLE(ta) { - {0x49, "ta"}, - {0x0449, "ta_IN"}, - {0x0849, "ta_LK"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN) - -/* Cyrillic based by default */ -ILCID_POSIX_SUBTABLE(tg) { - {0x28, "tg"}, - {0x7c28, "tg_Cyrl"}, - {0x0428, "tg_Cyrl_TJ"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH) - -ILCID_POSIX_SUBTABLE(ti) { - {0x73, "ti"}, - {0x0873, "ti_ER"}, - {0x0473, "ti_ET"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM) - -ILCID_POSIX_SUBTABLE(tn) { - {0x32, "tn"}, - {0x0832, "tn_BW"}, - {0x0432, "tn_ZA"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR) -ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA) -ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU) - -ILCID_POSIX_SUBTABLE(tzm) { - {0x5f, "tzm"}, - {0x7c5f, "tzm_Latn"}, - {0x085f, "tzm_Latn_DZ"}, - {0x105f, "tzm_Tfng_MA"}, - {0x045f, "tzm_Arab_MA"}, - {0x045f, "tmz"} -}; - -ILCID_POSIX_SUBTABLE(ug) { - {0x80, "ug"}, - {0x0480, "ug_CN"}, - {0x0480, "ug_Arab_CN"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA) - -ILCID_POSIX_SUBTABLE(ur) { - {0x20, "ur"}, - {0x0820, "ur_IN"}, - {0x0420, "ur_PK"} -}; - -ILCID_POSIX_SUBTABLE(uz) { - {0x43, "uz"}, - {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */ - {0x7843, "uz_Cyrl"}, /* Cyrillic based */ - {0x0843, "uz_UZ"}, /* Cyrillic based */ - {0x0443, "uz_Latn_UZ"}, /* Latin based */ - {0x7c43, "uz_Latn"} /* Latin based */ -}; - -ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */ - {0x33, "ve"}, - {0x0433, "ve_ZA"}, - {0x0433, "ven_ZA"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN) -ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN) -ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA) - -ILCID_POSIX_SUBTABLE(yi) { - {0x003d, "yi"}, - {0x043d, "yi_001"} -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG) - -// Windows & ICU tend to different names for some of these -// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works. -ILCID_POSIX_SUBTABLE(zh) { - {0x0004, "zh_Hans"}, - {0x7804, "zh"}, - {0x0804, "zh_CN"}, - {0x0804, "zh_Hans_CN"}, - {0x0c04, "zh_Hant_HK"}, - {0x0c04, "zh_HK"}, - {0x1404, "zh_Hant_MO"}, - {0x1404, "zh_MO"}, - {0x1004, "zh_Hans_SG"}, - {0x1004, "zh_SG"}, - {0x0404, "zh_Hant_TW"}, - {0x7c04, "zh_Hant"}, - {0x0404, "zh_TW"}, - {0x30404,"zh_Hant_TW"}, /* Bopomofo order */ - {0x30404,"zh_TW"}, /* Bopomofo order */ - {0x20004,"zh@collation=stroke"}, - {0x20404,"zh_Hant@collation=stroke"}, - {0x20404,"zh_Hant_TW@collation=stroke"}, - {0x20404,"zh_TW@collation=stroke"}, - {0x20804,"zh_Hans@collation=stroke"}, - {0x20804,"zh_Hans_CN@collation=stroke"}, - {0x20804,"zh_CN@collation=stroke"} - // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804 -}; - -ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA) - -/* This must be static and grouped by LCID. */ -static const ILcidPosixMap gPosixIDmap[] = { - ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */ - ILCID_POSIX_MAP(am), /* am Amharic 0x5e */ - ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */ - ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */ - ILCID_POSIX_MAP(as), /* as Assamese 0x4d */ - ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */ - ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */ - ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */ -/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */ - ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */ - ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */ - ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */ - ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */ - ILCID_POSIX_MAP(br), /* br Breton 0x7e */ - ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */ - ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */ - ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */ - ILCID_POSIX_MAP(co), /* co Corsican 0x83 */ - ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */ - ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */ - ILCID_POSIX_MAP(da), /* da Danish 0x06 */ - ILCID_POSIX_MAP(de), /* de German 0x07 */ - ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */ - ILCID_POSIX_MAP(el), /* el Greek 0x08 */ - ILCID_POSIX_MAP(en), /* en English 0x09 */ - ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */ - ILCID_POSIX_MAP(es), /* es Spanish 0x0a */ - ILCID_POSIX_MAP(et), /* et Estonian 0x25 */ - ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */ - ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */ - ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */ - ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */ - ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */ - ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */ - ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */ - ILCID_POSIX_MAP(fr), /* fr French 0x0c */ - ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */ - ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */ - ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */ - ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */ - ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */ - ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */ - ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */ - ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */ - ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */ - ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */ - ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */ - ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */ - ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */ - ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */ - ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */ - ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */ - ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */ - ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */ - ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */ - ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */ - ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */ - ILCID_POSIX_MAP(it), /* it Italian 0x10 */ - ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */ - ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */ - ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */ - ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */ - ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */ - ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */ - ILCID_POSIX_MAP(km), /* km Khmer 0x53 */ - ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */ - ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */ - ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */ - ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */ - ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */ - ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */ - ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */ - ILCID_POSIX_MAP(la), /* la Latin 0x76 */ - ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */ - ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */ - ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */ - ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */ - ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */ - ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */ - ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */ - ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */ - ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */ - ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */ - ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */ - ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */ - ILCID_POSIX_MAP(my), /* my Burmese 0x55 */ -/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */ - ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */ - ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */ -/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */ - ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */ - ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */ - ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */ - ILCID_POSIX_MAP(om), /* om Oromo 0x72 */ - ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */ - ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */ - ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */ - ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */ - ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */ - ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */ - ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */ - ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */ - ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */ - ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */ - ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */ - ILCID_POSIX_MAP(root), /* root 0x00 */ - ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */ - ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */ - ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */ - ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */ - ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */ - ILCID_POSIX_MAP(se), /* se Sami 0x3b */ -/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */ - ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */ - ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */ - ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */ - ILCID_POSIX_MAP(so), /* so Somali 0x77 */ - ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */ -/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */ - ILCID_POSIX_MAP(st), /* st Sutu 0x30 */ - ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */ - ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */ - ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */ - ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */ - ILCID_POSIX_MAP(te), /* te Telugu 0x4a */ - ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */ - ILCID_POSIX_MAP(th), /* th Thai 0x1e */ - ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */ - ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */ - ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */ - ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */ - ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */ - ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */ - ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */ - ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */ - ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */ - ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */ - ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */ - ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */ - ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */ - ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */ - ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */ - ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */ - ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */ - ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */ - ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */ -}; - -static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap); - -/** - * Do not call this function. It is called by hostID. - * The function is not private because this struct must stay as a C struct, - * and this is an internal class. - */ -static int32_t -idCmp(const char* id1, const char* id2) -{ - int32_t diffIdx = 0; - while (*id1 == *id2 && *id1 != 0) { - diffIdx++; - id1++; - id2++; - } - return diffIdx; -} - -/** - * Searches for a Windows LCID - * - * @param posixid the Posix style locale id. - * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has - * no equivalent Windows LCID. - * @return the LCID - */ -static uint32_t -getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status) -{ - int32_t bestIdx = 0; - int32_t bestIdxDiff = 0; - int32_t posixIDlen = (int32_t)uprv_strlen(posixID); - uint32_t idx; - - for (idx = 0; idx < this_0->numRegions; idx++ ) { - int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID); - if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) { - if (posixIDlen == sameChars) { - /* Exact match */ - return this_0->regionMaps[idx].hostID; - } - bestIdxDiff = sameChars; - bestIdx = idx; - } - } - /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */ - /* We also have to make sure that sid and si and similar string subsets don't match. */ - if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@') - && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0) - { - *status = U_USING_FALLBACK_WARNING; - return this_0->regionMaps[bestIdx].hostID; - } - - /*no match found */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return this_0->regionMaps->hostID; -} - -static const char* -getPosixID(const ILcidPosixMap *this_0, uint32_t hostID) -{ - uint32_t i; - for (i = 0; i < this_0->numRegions; i++) - { - if (this_0->regionMaps[i].hostID == hostID) - { - return this_0->regionMaps[i].posixID; - } - } - - /* If you get here, then no matching region was found, - so return the language id with the wild card region. */ - return this_0->regionMaps[0].posixID; -} - -/* -////////////////////////////////////// -// -// LCID --> POSIX -// -///////////////////////////////////// -*/ -#ifdef USE_WINDOWS_LCID_MAPPING_API -/* - * Various language tags needs to be changed: - * quz -> qu - * prs -> fa - */ -#define FIX_LANGUAGE_ID_TAG(buffer, len) \ - if (len >= 3) { \ - if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\ - buffer[2] = 0; \ - uprv_strcat(buffer, buffer+3); \ - } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\ - buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \ - uprv_strcat(buffer, buffer+3); \ - } \ - } - -#endif -U_CAPI int32_t -uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status) -{ - uint16_t langID; - uint32_t localeIndex; - UBool bLookup = TRUE; - const char *pPosixID = NULL; - -#ifdef USE_WINDOWS_LCID_MAPPING_API - char locName[LOCALE_NAME_MAX_LENGTH] = {}; // ICU name can't be longer than Windows name - - // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and - // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for - // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot - // use the Windows API to resolve locale ID for this specific case. - if ((hostid & 0x3FF) != 0x92) { - int32_t tmpLen = 0; - UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH]; // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH - - // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names. - tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES); - if (tmpLen > 1) { - int32_t i = 0; - // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort. - bLookup = FALSE; - for (i = 0; i < UPRV_LENGTHOF(locName); i++) - { - locName[i] = (char)(windowsLocaleName[i]); - - // Windows locale name may contain sorting variant, such as "es-ES_tradnl". - // In such cases, we need special mapping data found in the hardcoded table - // in this source file. - if (windowsLocaleName[i] == L'_') - { - // Keep the base locale, without variant - // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.? - locName[i] = '\0'; - tmpLen = i; - bLookup = TRUE; - break; - } - else if (windowsLocaleName[i] == L'-') - { - // Windows names use -, ICU uses _ - locName[i] = '_'; - } - else if (windowsLocaleName[i] == L'\0') - { - // No point in doing more work than necessary - break; - } - } - // TODO: Need to understand this better, why isn't it an alias? - FIX_LANGUAGE_ID_TAG(locName, tmpLen); - pPosixID = locName; - } - } -#endif // USE_WINDOWS_LCID_MAPPING_API - - if (bLookup) { - const char *pCandidate = NULL; - langID = LANGUAGE_LCID(hostid); - - for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) { - if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) { - pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid); - break; - } - } - - /* On Windows, when locale name has a variant, we still look up the hardcoded table. - If a match in the hardcoded table is longer than the Windows locale name without - variant, we use the one as the result */ - if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) { - pPosixID = pCandidate; - } - } - - if (pPosixID) { - int32_t resLen = static_cast(uprv_strlen(pPosixID)); - int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity; - uprv_memcpy(posixID, pPosixID, copyLen); - if (resLen < posixIDCapacity) { - posixID[resLen] = 0; - if (*status == U_STRING_NOT_TERMINATED_WARNING) { - *status = U_ZERO_ERROR; - } - } else if (resLen == posixIDCapacity) { - *status = U_STRING_NOT_TERMINATED_WARNING; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - } - return resLen; - } - - /* no match found */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return -1; -} - -/* -////////////////////////////////////// -// -// POSIX --> LCID -// This should only be called from uloc_getLCID. -// The locale ID must be in canonical form. -// -///////////////////////////////////// -*/ -U_CAPI uint32_t -uprv_convertToLCIDPlatform(const char* localeID) -{ - // The purpose of this function is to leverage native platform name->lcid - // conversion functionality when available. -#ifdef USE_WINDOWS_LCID_MAPPING_API - DWORD nameLCIDFlags = 0; - UErrorCode myStatus = U_ZERO_ERROR; - - // First check for a Windows name->LCID match, fall through to catch - // ICU special cases, but Windows may know it already. -#if LOCALE_ALLOW_NEUTRAL_NAMES - nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES; -#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */ - - int32_t len; - char collVal[ULOC_KEYWORDS_CAPACITY] = {}; - char baseName[ULOC_FULLNAME_CAPACITY] = {}; - const char * mylocaleID = localeID; - - // Check any for keywords. - if (uprv_strchr(localeID, '@')) - { - len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus); - if (U_SUCCESS(myStatus) && len > 0) - { - // If it contains the keyword collation, return 0 so that the LCID lookup table will be used. - return 0; - } - else - { - // If the locale ID contains keywords other than collation, just use the base name. - len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus); - - if (U_SUCCESS(myStatus) && len > 0) - { - baseName[len] = 0; - mylocaleID = baseName; - } - } - } - - char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; - // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form - (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus); - - if (U_SUCCESS(myStatus)) - { - // Need it to be UTF-16, not 8-bit - wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {}; - int32_t i; - for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++) - { - if (asciiBCP47Tag[i] == '\0') - { - break; - } - else - { - // Copy the character - bcp47Tag[i] = static_cast(asciiBCP47Tag[i]); - } - } - - if (i < (UPRV_LENGTHOF(bcp47Tag) - 1)) - { - // Ensure it's null terminated - bcp47Tag[i] = L'\0'; - LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags); - if (lcid > 0) - { - // Found LCID from windows, return that one, unless its completely ambiguous - // LOCALE_USER_DEFAULT and transients are OK because they will round trip - // for this process. - if (lcid != LOCALE_CUSTOM_UNSPECIFIED) - { - return lcid; - } - } - } - } -#else - (void)localeID; // Suppress unused variable warning. -#endif /* USE_WINDOWS_LCID_MAPPING_API */ - - // No found, or not implemented on platforms without native name->lcid conversion - return 0; -} - -U_CAPI uint32_t -uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status) -{ - // This function does the table lookup when native platform name->lcid conversion isn't available, - // or for locales that don't follow patterns the platform expects. - uint32_t low = 0; - uint32_t high = gLocaleCount; - uint32_t mid; - uint32_t oldmid = 0; - int32_t compVal; - - uint32_t value = 0; - uint32_t fallbackValue = (uint32_t)-1; - UErrorCode myStatus; - uint32_t idx; - - /* Check for incomplete id. */ - if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) { - return 0; - } - - /*Binary search for the map entry for normal cases */ - - while (high > low) /*binary search*/{ - - mid = (high+low) >> 1; /*Finds median*/ - - if (mid == oldmid) - break; - - compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID); - if (compVal < 0){ - high = mid; - } - else if (compVal > 0){ - low = mid; - } - else /*we found it*/{ - return getHostID(&gPosixIDmap[mid], posixID, status); - } - oldmid = mid; - } - - /* - * Sometimes we can't do a binary search on posixID because some LCIDs - * go to different locales. We hit one of those special cases. - */ - for (idx = 0; idx < gLocaleCount; idx++ ) { - myStatus = U_ZERO_ERROR; - value = getHostID(&gPosixIDmap[idx], posixID, &myStatus); - if (myStatus == U_ZERO_ERROR) { - return value; - } - else if (myStatus == U_USING_FALLBACK_WARNING) { - fallbackValue = value; - } - } - - if (fallbackValue != (uint32_t)-1) { - *status = U_USING_FALLBACK_WARNING; - return fallbackValue; - } - - /* no match found */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; /* return international (root) */ -} diff --git a/deps/node/deps/icu-small/source/common/locmap.h b/deps/node/deps/icu-small/source/common/locmap.h deleted file mode 100644 index 2d7a3d37..00000000 --- a/deps/node/deps/icu-small/source/common/locmap.h +++ /dev/null @@ -1,39 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1996-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File locmap.h : Locale Mapping Classes -* -* -* Created by: Helena Shih -* -* Modification History: -* -* Date Name Description -* 3/11/97 aliu Added setId(). -* 4/20/99 Madhu Added T_convertToPosix() -* 09/18/00 george Removed the memory leaks. -* 08/23/01 george Convert to C -*============================================================================ -*/ - -#ifndef LOCMAP_H -#define LOCMAP_H - -#include "unicode/utypes.h" - -#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID) - -U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status); - -/* Don't call these functions directly. Use uloc_getLCID instead. */ -U_CAPI uint32_t uprv_convertToLCIDPlatform(const char *localeID); // Leverage platform conversion if possible -U_CAPI uint32_t uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status); - -#endif /* LOCMAP_H */ diff --git a/deps/node/deps/icu-small/source/common/locresdata.cpp b/deps/node/deps/icu-small/source/common/locresdata.cpp deleted file mode 100644 index f890411c..00000000 --- a/deps/node/deps/icu-small/source/common/locresdata.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1997-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: loclikely.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010feb25 -* created by: Markus W. Scherer -* -* Code for miscellaneous locale-related resource bundle data access, -* separated out from other .cpp files -* that then do not depend on resource bundle code and this data. -*/ - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/uloc.h" -#include "unicode/ures.h" -#include "cstring.h" -#include "ulocimp.h" -#include "uresimp.h" - -/* - * Lookup a resource bundle table item with fallback on the table level. - * Regular resource bundle lookups perform fallback to parent locale bundles - * and eventually the root bundle, but only for top-level items. - * This function takes the name of a top-level table and of an item in that table - * and performs a lookup of both, falling back until a bundle contains a table - * with this item. - * - * Note: Only the opening of entire bundles falls back through the default locale - * before root. Once a bundle is open, item lookups do not go through the - * default locale because that would result in a mix of languages that is - * unpredictable to the programmer and most likely useless. - */ -U_CAPI const UChar * U_EXPORT2 -uloc_getTableStringWithFallback(const char *path, const char *locale, - const char *tableKey, const char *subTableKey, - const char *itemKey, - int32_t *pLength, - UErrorCode *pErrorCode) -{ -/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/ - UResourceBundle *rb=NULL, table, subTable; - const UChar *item=NULL; - UErrorCode errorCode; - char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0}; - - /* - * open the bundle for the current locale - * this falls back through the locale's chain to root - */ - errorCode=U_ZERO_ERROR; - rb=ures_open(path, locale, &errorCode); - - if(U_FAILURE(errorCode)) { - /* total failure, not even root could be opened */ - *pErrorCode=errorCode; - return NULL; - } else if(errorCode==U_USING_DEFAULT_WARNING || - (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING) - ) { - /* set the "strongest" error code (success->fallback->default->failure) */ - *pErrorCode=errorCode; - } - - for(;;){ - ures_initStackObject(&table); - ures_initStackObject(&subTable); - ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode); - - if (subTableKey != NULL) { - /* - ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode); - item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode); - if(U_FAILURE(errorCode)){ - *pErrorCode = errorCode; - } - - break;*/ - - ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode); - } - if(U_SUCCESS(errorCode)){ - item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode); - if(U_FAILURE(errorCode)){ - const char* replacement = NULL; - *pErrorCode = errorCode; /*save the errorCode*/ - errorCode = U_ZERO_ERROR; - /* may be a deprecated code */ - if(uprv_strcmp(tableKey, "Countries")==0){ - replacement = uloc_getCurrentCountryID(itemKey); - }else if(uprv_strcmp(tableKey, "Languages")==0){ - replacement = uloc_getCurrentLanguageID(itemKey); - } - /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/ - if(replacement!=NULL && itemKey != replacement){ - item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode); - if(U_SUCCESS(errorCode)){ - *pErrorCode = errorCode; - break; - } - } - }else{ - break; - } - } - - if(U_FAILURE(errorCode)){ - - /* still can't figure out ?.. try the fallback mechanism */ - int32_t len = 0; - const UChar* fallbackLocale = NULL; - *pErrorCode = errorCode; - errorCode = U_ZERO_ERROR; - - fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode); - if(U_FAILURE(errorCode)){ - *pErrorCode = errorCode; - break; - } - - u_UCharsToChars(fallbackLocale, explicitFallbackName, len); - - /* guard against recursive fallback */ - if(uprv_strcmp(explicitFallbackName, locale)==0){ - *pErrorCode = U_INTERNAL_PROGRAM_ERROR; - break; - } - ures_close(rb); - rb = ures_open(path, explicitFallbackName, &errorCode); - if(U_FAILURE(errorCode)){ - *pErrorCode = errorCode; - break; - } - /* succeeded in opening the fallback bundle .. continue and try to fetch the item */ - }else{ - break; - } - } - /* done with the locale string - ready to close table and rb */ - ures_close(&subTable); - ures_close(&table); - ures_close(rb); - return item; -} - -static ULayoutType -_uloc_getOrientationHelper(const char* localeId, - const char* key, - UErrorCode *status) -{ - ULayoutType result = ULOC_LAYOUT_UNKNOWN; - - if (!U_FAILURE(*status)) { - int32_t length = 0; - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - - uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status); - - if (!U_FAILURE(*status)) { - const UChar* const value = - uloc_getTableStringWithFallback( - NULL, - localeBuffer, - "layout", - NULL, - key, - &length, - status); - - if (!U_FAILURE(*status) && length != 0) { - switch(value[0]) - { - case 0x0062: /* 'b' */ - result = ULOC_LAYOUT_BTT; - break; - case 0x006C: /* 'l' */ - result = ULOC_LAYOUT_LTR; - break; - case 0x0072: /* 'r' */ - result = ULOC_LAYOUT_RTL; - break; - case 0x0074: /* 't' */ - result = ULOC_LAYOUT_TTB; - break; - default: - *status = U_INTERNAL_PROGRAM_ERROR; - break; - } - } - } - } - - return result; -} - -U_CAPI ULayoutType U_EXPORT2 -uloc_getCharacterOrientation(const char* localeId, - UErrorCode *status) -{ - return _uloc_getOrientationHelper(localeId, "characters", status); -} - -/** - * Get the layout line orientation for the specified locale. - * - * @param localeID locale name - * @param status Error status - * @return an enum indicating the layout orientation for lines. - */ -U_CAPI ULayoutType U_EXPORT2 -uloc_getLineOrientation(const char* localeId, - UErrorCode *status) -{ - return _uloc_getOrientationHelper(localeId, "lines", status); -} diff --git a/deps/node/deps/icu-small/source/common/locutil.cpp b/deps/node/deps/icu-small/source/common/locutil.cpp deleted file mode 100644 index 02d2be50..00000000 --- a/deps/node/deps/icu-small/source/common/locutil.cpp +++ /dev/null @@ -1,273 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * Copyright (C) 2002-2014, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION - -#include "unicode/resbund.h" -#include "cmemory.h" -#include "ustrfmt.h" -#include "locutil.h" -#include "charstr.h" -#include "ucln_cmn.h" -#include "uassert.h" -#include "umutex.h" - -// see LocaleUtility::getAvailableLocaleNames -static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER; -static icu::Hashtable * LocaleUtility_cache = NULL; - -#define UNDERSCORE_CHAR ((UChar)0x005f) -#define AT_SIGN_CHAR ((UChar)64) -#define PERIOD_CHAR ((UChar)46) - -/* - ****************************************************************** - */ - -/** - * Release all static memory held by Locale Utility. - */ -U_CDECL_BEGIN -static UBool U_CALLCONV service_cleanup(void) { - if (LocaleUtility_cache) { - delete LocaleUtility_cache; - LocaleUtility_cache = NULL; - } - return TRUE; -} - - -static void U_CALLCONV locale_utility_init(UErrorCode &status) { - using namespace icu; - U_ASSERT(LocaleUtility_cache == NULL); - ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup); - LocaleUtility_cache = new Hashtable(status); - if (U_FAILURE(status)) { - delete LocaleUtility_cache; - LocaleUtility_cache = NULL; - return; - } - if (LocaleUtility_cache == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable); -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -UnicodeString& -LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result) -{ - if (id == NULL) { - result.setToBogus(); - } else { - // Fix case only (no other changes) up to the first '@' or '.' or - // end of string, whichever comes first. In 3.0 I changed this to - // stop at first '@' or '.'. It used to run out to the end of - // string. My fix makes the tests pass but is probably - // structurally incorrect. See below. [alan 3.0] - - // TODO: Doug, you might want to revise this... - result = *id; - int32_t i = 0; - int32_t end = result.indexOf(AT_SIGN_CHAR); - int32_t n = result.indexOf(PERIOD_CHAR); - if (n >= 0 && n < end) { - end = n; - } - if (end < 0) { - end = result.length(); - } - n = result.indexOf(UNDERSCORE_CHAR); - if (n < 0) { - n = end; - } - for (; i < n; ++i) { - UChar c = result.charAt(i); - if (c >= 0x0041 && c <= 0x005a) { - c += 0x20; - result.setCharAt(i, c); - } - } - for (n = end; i < n; ++i) { - UChar c = result.charAt(i); - if (c >= 0x0061 && c <= 0x007a) { - c -= 0x20; - result.setCharAt(i, c); - } - } - } - return result; - -#if 0 - // This code does a proper full level 2 canonicalization of id. - // It's nasty to go from UChar to char to char to UChar -- but - // that's what you have to do to use the uloc_canonicalize - // function on UnicodeStrings. - - // I ended up doing the alternate fix (see above) not for - // performance reasons, although performance will certainly be - // better, but because doing a full level 2 canonicalization - // causes some tests to fail. [alan 3.0] - - // TODO: Doug, you might want to revisit this... - result.setToBogus(); - if (id != 0) { - int32_t buflen = id->length() + 8; // space for NUL - char* buf = (char*) uprv_malloc(buflen); - char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen); - if (buf != 0 && canon != 0) { - U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen); - UErrorCode ec = U_ZERO_ERROR; - uloc_canonicalize(buf, canon, buflen, &ec); - if (U_SUCCESS(ec)) { - result = UnicodeString(canon); - } - } - uprv_free(buf); - uprv_free(canon); - } - return result; -#endif -} - -Locale& -LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result) -{ - enum { BUFLEN = 128 }; // larger than ever needed - - if (id.isBogus() || id.length() >= BUFLEN) { - result.setToBogus(); - } else { - /* - * We need to convert from a UnicodeString to char * in order to - * create a Locale. - * - * Problem: Locale ID strings may contain '@' which is a variant - * character and cannot be handled by invariant-character conversion. - * - * Hack: Since ICU code can handle locale IDs with multiple encodings - * of '@' (at least for EBCDIC; it's not known to be a problem for - * ASCII-based systems), - * we use regular invariant-character conversion for everything else - * and manually convert U+0040 into a compiler-char-constant '@'. - * While this compilation-time constant may not match the runtime - * encoding of '@', it should be one of the encodings which ICU - * recognizes. - * - * There should be only at most one '@' in a locale ID. - */ - char buffer[BUFLEN]; - int32_t prev, i; - prev = 0; - for(;;) { - i = id.indexOf((UChar)0x40, prev); - if(i < 0) { - // no @ between prev and the rest of the string - id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV); - break; // done - } else { - // normal invariant-character conversion for text between @s - id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV); - // manually "convert" U+0040 at id[i] into '@' at buffer[i] - buffer[i] = '@'; - prev = i + 1; - } - } - result = Locale::createFromName(buffer); - } - return result; -} - -UnicodeString& -LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result) -{ - if (locale.isBogus()) { - result.setToBogus(); - } else { - result.append(UnicodeString(locale.getName(), -1, US_INV)); - } - return result; -} - -const Hashtable* -LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID) -{ - // LocaleUtility_cache is a hash-of-hashes. The top-level keys - // are path strings ('bundleID') passed to - // ures_openAvailableLocales. The top-level values are - // second-level hashes. The second-level keys are result strings - // from ures_openAvailableLocales. The second-level values are - // garbage ((void*)1 or other random pointer). - - UErrorCode status = U_ZERO_ERROR; - umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status); - Hashtable *cache = LocaleUtility_cache; - if (cache == NULL) { - // Catastrophic failure. - return NULL; - } - - Hashtable* htp; - umtx_lock(NULL); - htp = (Hashtable*) cache->get(bundleID); - umtx_unlock(NULL); - - if (htp == NULL) { - htp = new Hashtable(status); - if (htp && U_SUCCESS(status)) { - CharString cbundleID; - cbundleID.appendInvariantChars(bundleID, status); - const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data(); - UEnumeration *uenum = ures_openAvailableLocales(path, &status); - for (;;) { - const UChar* id = uenum_unext(uenum, NULL, &status); - if (id == NULL) { - break; - } - htp->put(UnicodeString(id), (void*)htp, status); - } - uenum_close(uenum); - if (U_FAILURE(status)) { - delete htp; - return NULL; - } - umtx_lock(NULL); - Hashtable *t = static_cast(cache->get(bundleID)); - if (t != NULL) { - // Another thread raced through this code, creating the cache entry first. - // Discard ours and return theirs. - umtx_unlock(NULL); - delete htp; - htp = t; - } else { - cache->put(bundleID, (void*)htp, status); - umtx_unlock(NULL); - } - } - } - return htp; -} - -UBool -LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child) -{ - return child.indexOf(root) == 0 && - (child.length() == root.length() || - child.charAt(root.length()) == UNDERSCORE_CHAR); -} - -U_NAMESPACE_END - -/* !UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/locutil.h b/deps/node/deps/icu-small/source/common/locutil.h deleted file mode 100644 index 31bfffd7..00000000 --- a/deps/node/deps/icu-small/source/common/locutil.h +++ /dev/null @@ -1,39 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2002-2005, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * - ******************************************************************************* - */ -#ifndef LOCUTIL_H -#define LOCUTIL_H - -#include "unicode/utypes.h" -#include "hash.h" - -#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION - - -U_NAMESPACE_BEGIN - -// temporary utility functions, till I know where to find them -// in header so tests can also access them - -class U_COMMON_API LocaleUtility { -public: - static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result); - static Locale& initLocaleFromName(const UnicodeString& id, Locale& result); - static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result); - static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID); - static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child); -}; - -U_NAMESPACE_END - - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/messageimpl.h b/deps/node/deps/icu-small/source/common/messageimpl.h deleted file mode 100644 index dc7a6edd..00000000 --- a/deps/node/deps/icu-small/source/common/messageimpl.h +++ /dev/null @@ -1,65 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: messageimpl.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011apr04 -* created by: Markus W. Scherer -*/ - -#ifndef __MESSAGEIMPL_H__ -#define __MESSAGEIMPL_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/messagepattern.h" - -U_NAMESPACE_BEGIN - -/** - * Helper functions for use of MessagePattern. - * In Java, these are package-private methods in MessagePattern itself. - * In C++, they are declared here and implemented in messagepattern.cpp. - */ -class U_COMMON_API MessageImpl { -public: - /** - * @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED - */ - static UBool jdkAposMode(const MessagePattern &msgPattern) { - return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED; - } - - /** - * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes - * according to JDK pattern behavior. - */ - static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, - UnicodeString &sb); - - /** - * Appends the sub-message to the result string. - * Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes(). - */ - static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, - int32_t msgStart, - UnicodeString &result); - -private: - MessageImpl(); // no constructor: all static methods -}; - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_FORMATTING - -#endif // __MESSAGEIMPL_H__ diff --git a/deps/node/deps/icu-small/source/common/messagepattern.cpp b/deps/node/deps/icu-small/source/common/messagepattern.cpp deleted file mode 100644 index 2f79780b..00000000 --- a/deps/node/deps/icu-small/source/common/messagepattern.cpp +++ /dev/null @@ -1,1233 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: messagepattern.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011mar14 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/messagepattern.h" -#include "unicode/unistr.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "cstring.h" -#include "messageimpl.h" -#include "patternprops.h" -#include "putilimp.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -// Unicode character/code point constants ---------------------------------- *** - -static const UChar u_pound=0x23; -static const UChar u_apos=0x27; -static const UChar u_plus=0x2B; -static const UChar u_comma=0x2C; -static const UChar u_minus=0x2D; -static const UChar u_dot=0x2E; -static const UChar u_colon=0x3A; -static const UChar u_lessThan=0x3C; -static const UChar u_equal=0x3D; -static const UChar u_A=0x41; -static const UChar u_C=0x43; -static const UChar u_D=0x44; -static const UChar u_E=0x45; -static const UChar u_H=0x48; -static const UChar u_I=0x49; -static const UChar u_L=0x4C; -static const UChar u_N=0x4E; -static const UChar u_O=0x4F; -static const UChar u_P=0x50; -static const UChar u_R=0x52; -static const UChar u_S=0x53; -static const UChar u_T=0x54; -static const UChar u_U=0x55; -static const UChar u_Z=0x5A; -static const UChar u_a=0x61; -static const UChar u_c=0x63; -static const UChar u_d=0x64; -static const UChar u_e=0x65; -static const UChar u_f=0x66; -static const UChar u_h=0x68; -static const UChar u_i=0x69; -static const UChar u_l=0x6C; -static const UChar u_n=0x6E; -static const UChar u_o=0x6F; -static const UChar u_p=0x70; -static const UChar u_r=0x72; -static const UChar u_s=0x73; -static const UChar u_t=0x74; -static const UChar u_u=0x75; -static const UChar u_z=0x7A; -static const UChar u_leftCurlyBrace=0x7B; -static const UChar u_pipe=0x7C; -static const UChar u_rightCurlyBrace=0x7D; -static const UChar u_lessOrEqual=0x2264; // U+2264 is <= - -static const UChar kOffsetColon[]={ // "offset:" - u_o, u_f, u_f, u_s, u_e, u_t, u_colon -}; - -static const UChar kOther[]={ // "other" - u_o, u_t, u_h, u_e, u_r -}; - -// MessagePatternList ------------------------------------------------------ *** - -template -class MessagePatternList : public UMemory { -public: - MessagePatternList() {} - void copyFrom(const MessagePatternList &other, - int32_t length, - UErrorCode &errorCode); - UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode); - UBool equals(const MessagePatternList &other, int32_t length) const { - for(int32_t i=0; i a; -}; - -template -void -MessagePatternList::copyFrom( - const MessagePatternList &other, - int32_t length, - UErrorCode &errorCode) { - if(U_SUCCESS(errorCode) && length>0) { - if(length>a.getCapacity() && NULL==a.resize(length)) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T)); - } -} - -template -UBool -MessagePatternList::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) { - return TRUE; - } - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; -} - -// MessagePatternList specializations -------------------------------------- *** - -class MessagePatternDoubleList : public MessagePatternList { -}; - -class MessagePatternPartsList : public MessagePatternList { -}; - -// MessagePattern constructors etc. ---------------------------------------- *** - -MessagePattern::MessagePattern(UErrorCode &errorCode) - : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { - init(errorCode); -} - -MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode) - : aposMode(mode), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { - init(errorCode); -} - -MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) - : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { - if(init(errorCode)) { - parse(pattern, parseError, errorCode); - } -} - -UBool -MessagePattern::init(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - partsList=new MessagePatternPartsList(); - if(partsList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - parts=partsList->a.getAlias(); - return TRUE; -} - -MessagePattern::MessagePattern(const MessagePattern &other) - : UObject(other), aposMode(other.aposMode), msg(other.msg), - partsList(NULL), parts(NULL), partsLength(0), - numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), - hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers), - needsAutoQuoting(other.needsAutoQuoting) { - UErrorCode errorCode=U_ZERO_ERROR; - if(!copyStorage(other, errorCode)) { - clear(); - } -} - -MessagePattern & -MessagePattern::operator=(const MessagePattern &other) { - if(this==&other) { - return *this; - } - aposMode=other.aposMode; - msg=other.msg; - hasArgNames=other.hasArgNames; - hasArgNumbers=other.hasArgNumbers; - needsAutoQuoting=other.needsAutoQuoting; - UErrorCode errorCode=U_ZERO_ERROR; - if(!copyStorage(other, errorCode)) { - clear(); - } - return *this; -} - -UBool -MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - parts=NULL; - partsLength=0; - numericValues=NULL; - numericValuesLength=0; - if(partsList==NULL) { - partsList=new MessagePatternPartsList(); - if(partsList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - parts=partsList->a.getAlias(); - } - if(other.partsLength>0) { - partsList->copyFrom(*other.partsList, other.partsLength, errorCode); - if(U_FAILURE(errorCode)) { - return FALSE; - } - parts=partsList->a.getAlias(); - partsLength=other.partsLength; - } - if(other.numericValuesLength>0) { - if(numericValuesList==NULL) { - numericValuesList=new MessagePatternDoubleList(); - if(numericValuesList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - numericValues=numericValuesList->a.getAlias(); - } - numericValuesList->copyFrom( - *other.numericValuesList, other.numericValuesLength, errorCode); - if(U_FAILURE(errorCode)) { - return FALSE; - } - numericValues=numericValuesList->a.getAlias(); - numericValuesLength=other.numericValuesLength; - } - return TRUE; -} - -MessagePattern::~MessagePattern() { - delete partsList; - delete numericValuesList; -} - -// MessagePattern API ------------------------------------------------------ *** - -MessagePattern & -MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode); - postParse(); - return *this; -} - -MessagePattern & -MessagePattern::parseChoiceStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parseChoiceStyle(0, 0, parseError, errorCode); - postParse(); - return *this; -} - -MessagePattern & -MessagePattern::parsePluralStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode); - postParse(); - return *this; -} - -MessagePattern & -MessagePattern::parseSelectStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode) { - preParse(pattern, parseError, errorCode); - parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode); - postParse(); - return *this; -} - -void -MessagePattern::clear() { - // Mostly the same as preParse(). - msg.remove(); - hasArgNames=hasArgNumbers=FALSE; - needsAutoQuoting=FALSE; - partsLength=0; - numericValuesLength=0; -} - -UBool -MessagePattern::operator==(const MessagePattern &other) const { - if(this==&other) { - return TRUE; - } - return - aposMode==other.aposMode && - msg==other.msg && - // parts.equals(o.parts) - partsLength==other.partsLength && - (partsLength==0 || partsList->equals(*other.partsList, partsLength)); - // No need to compare numericValues if msg and parts are the same. -} - -int32_t -MessagePattern::hashCode() const { - int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength; - for(int32_t i=0; i0;) { - const Part &part=getPart(--i); - if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) { - modified.insert(part.index, (UChar)part.value); - } - } - return modified; -} - -double -MessagePattern::getNumericValue(const Part &part) const { - UMessagePatternPartType type=part.type; - if(type==UMSGPAT_PART_TYPE_ARG_INT) { - return part.value; - } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) { - return numericValues[part.value]; - } else { - return UMSGPAT_NO_NUMERIC_VALUE; - } -} - -/** - * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. - * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) - * @return the "offset:" value. - * @draft ICU 4.8 - */ -double -MessagePattern::getPluralOffset(int32_t pluralStart) const { - const Part &part=getPart(pluralStart); - if(Part::hasNumericValue(part.type)) { - return getNumericValue(part); - } else { - return 0; - } -} - -// MessagePattern::Part ---------------------------------------------------- *** - -UBool -MessagePattern::Part::operator==(const Part &other) const { - if(this==&other) { - return TRUE; - } - return - type==other.type && - index==other.index && - length==other.length && - value==other.value && - limitPartIndex==other.limitPartIndex; -} - -// MessagePattern parser --------------------------------------------------- *** - -void -MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - if(parseError!=NULL) { - parseError->line=0; - parseError->offset=0; - parseError->preContext[0]=0; - parseError->postContext[0]=0; - } - msg=pattern; - hasArgNames=hasArgNumbers=FALSE; - needsAutoQuoting=FALSE; - partsLength=0; - numericValuesLength=0; -} - -void -MessagePattern::postParse() { - if(partsList!=NULL) { - parts=partsList->a.getAlias(); - } - if(numericValuesList!=NULL) { - numericValues=numericValuesList->a.getAlias(); - } -} - -int32_t -MessagePattern::parseMessage(int32_t index, int32_t msgStartLength, - int32_t nestingLevel, UMessagePatternArgType parentType, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - if(nestingLevel>Part::MAX_VALUE) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - int32_t msgStart=partsLength; - addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode); - index+=msgStartLength; - for(;;) { // while(index=msg.length()) { - break; - } - UChar c=msg.charAt(index++); - if(c==u_apos) { - if(index==msg.length()) { - // The apostrophe is the last character in the pattern. - // Add a Part for auto-quoting. - addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, - u_apos, errorCode); // value=char to be inserted - needsAutoQuoting=TRUE; - } else { - c=msg.charAt(index); - if(c==u_apos) { - // double apostrophe, skip the second one - addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); - } else if( - aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED || - c==u_leftCurlyBrace || c==u_rightCurlyBrace || - (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) || - (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) - ) { - // skip the quote-starting apostrophe - addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode); - // find the end of the quoted literal text - for(;;) { - index=msg.indexOf(u_apos, index+1); - if(index>=0) { - if(/*(index+1)0 && c==u_rightCurlyBrace) || - (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) { - // Finish the message before the terminator. - // In a choice style, report the "}" substring only for the following ARG_LIMIT, - // not for this MSG_LIMIT. - int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1; - addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength, - nestingLevel, errorCode); - if(parentType==UMSGPAT_ARG_TYPE_CHOICE) { - // Let the choice style parser see the '}' or '|'. - return index-1; - } else { - // continue parsing after the '}' - return index; - } - } // else: c is part of literal text - } - if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; - } - addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode); - return index; -} - -int32_t -MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, - UParseError *parseError, UErrorCode &errorCode) { - int32_t argStart=partsLength; - UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE; - addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t nameIndex=index=skipWhiteSpace(index+argStartLength); - if(index==msg.length()) { - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; - } - // parse argument name or number - index=skipIdentifier(index); - int32_t number=parseArgNumber(nameIndex, index); - if(number>=0) { - int32_t length=index-nameIndex; - if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) { - setParseError(parseError, nameIndex); // Argument number too large. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - hasArgNumbers=TRUE; - addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode); - } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) { - int32_t length=index-nameIndex; - if(length>Part::MAX_LENGTH) { - setParseError(parseError, nameIndex); // Argument name too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - hasArgNames=TRUE; - addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode); - } else { // number<-1 (ARG_NAME_NOT_VALID) - setParseError(parseError, nameIndex); // Bad argument syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - index=skipWhiteSpace(index); - if(index==msg.length()) { - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; - } - UChar c=msg.charAt(index); - if(c==u_rightCurlyBrace) { - // all done - } else if(c!=u_comma) { - setParseError(parseError, nameIndex); // Bad argument syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } else /* ',' */ { - // parse argument type: case-sensitive a-zA-Z - int32_t typeIndex=index=skipWhiteSpace(index+1); - while(indexPart::MAX_LENGTH) { - setParseError(parseError, nameIndex); // Argument type name too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - argType=UMSGPAT_ARG_TYPE_SIMPLE; - if(length==6) { - // case-insensitive comparisons for complex-type names - if(isChoice(typeIndex)) { - argType=UMSGPAT_ARG_TYPE_CHOICE; - } else if(isPlural(typeIndex)) { - argType=UMSGPAT_ARG_TYPE_PLURAL; - } else if(isSelect(typeIndex)) { - argType=UMSGPAT_ARG_TYPE_SELECT; - } - } else if(length==13) { - if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { - argType=UMSGPAT_ARG_TYPE_SELECTORDINAL; - } - } - // change the ARG_START type from NONE to argType - partsList->a[argStart].value=(int16_t)argType; - if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { - addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode); - } - // look for an argument style (pattern) - if(c==u_rightCurlyBrace) { - if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) { - setParseError(parseError, nameIndex); // No style field for complex argument. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - } else /* ',' */ { - ++index; - if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { - index=parseSimpleStyle(index, parseError, errorCode); - } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { - index=parseChoiceStyle(index, nestingLevel, parseError, errorCode); - } else { - index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode); - } - } - } - // Argument parsing stopped on the '}'. - addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode); - return index+1; -} - -int32_t -MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t start=index; - int32_t nestedBraces=0; - while(index0) { - --nestedBraces; - } else { - int32_t length=--index-start; - if(length>Part::MAX_LENGTH) { - setParseError(parseError, start); // Argument style text too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode); - return index; - } - } // c is part of literal text - } - setParseError(parseError, 0); // Unmatched '{' braces in message. - errorCode=U_UNMATCHED_BRACES; - return 0; -} - -int32_t -MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t start=index; - index=skipWhiteSpace(index); - if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) { - setParseError(parseError, 0); // Missing choice argument pattern. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - for(;;) { - // The choice argument style contains |-separated (number, separator, message) triples. - // Parse the number. - int32_t numberIndex=index; - index=skipDouble(index); - int32_t length=index-numberIndex; - if(length==0) { - setParseError(parseError, start); // Bad choice pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if(length>Part::MAX_LENGTH) { - setParseError(parseError, numberIndex); // Choice number too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE - if(U_FAILURE(errorCode)) { - return 0; - } - // Parse the separator. - index=skipWhiteSpace(index); - if(index==msg.length()) { - setParseError(parseError, start); // Bad choice pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - UChar c=msg.charAt(index); - if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <= - setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode); - // Parse the message fragment. - index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). - if(index==msg.length()) { - return index; - } - if(msg.charAt(index)==u_rightCurlyBrace) { - if(!inMessageFormatPattern(nestingLevel)) { - setParseError(parseError, start); // Bad choice pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - return index; - } // else the terminator is '|' - index=skipWhiteSpace(index+1); - } -} - -int32_t -MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType, - int32_t index, int32_t nestingLevel, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - int32_t start=index; - UBool isEmpty=TRUE; - UBool hasOther=FALSE; - for(;;) { - // First, collect the selector looking for a small set of terminators. - // It would be a little faster to consider the syntax of each possible - // token right here, but that makes the code too complicated. - index=skipWhiteSpace(index); - UBool eos=index==msg.length(); - if(eos || msg.charAt(index)==u_rightCurlyBrace) { - if(eos==inMessageFormatPattern(nestingLevel)) { - setParseError(parseError, start); // Bad plural/select pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if(!hasOther) { - setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern. - errorCode=U_DEFAULT_KEYWORD_MISSING; - return 0; - } - return index; - } - int32_t selectorIndex=index; - if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) { - // explicit-value plural selector: =double - index=skipDouble(index+1); - int32_t length=index-selectorIndex; - if(length==1) { - setParseError(parseError, start); // Bad plural/select pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - if(length>Part::MAX_LENGTH) { - setParseError(parseError, selectorIndex); // Argument selector too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); - parseDouble(selectorIndex+1, index, FALSE, - parseError, errorCode); // adds ARG_INT or ARG_DOUBLE - } else { - index=skipIdentifier(index); - int32_t length=index-selectorIndex; - if(length==0) { - setParseError(parseError, start); // Bad plural/select pattern syntax. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. - if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && indexPart::MAX_LENGTH) { - setParseError(parseError, valueIndex); // Plural offset value too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - parseDouble(valueIndex, index, FALSE, - parseError, errorCode); // adds ARG_INT or ARG_DOUBLE - if(U_FAILURE(errorCode)) { - return 0; - } - isEmpty=FALSE; - continue; // no message fragment after the offset - } else { - // normal selector word - if(length>Part::MAX_LENGTH) { - setParseError(parseError, selectorIndex); // Argument selector too long. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); - if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) { - hasOther=TRUE; - } - } - } - if(U_FAILURE(errorCode)) { - return 0; - } - - // parse the message fragment following the selector - index=skipWhiteSpace(index); - if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) { - setParseError(parseError, selectorIndex); // No message fragment after plural/select selector. - errorCode=U_PATTERN_SYNTAX_ERROR; - return 0; - } - index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - isEmpty=FALSE; - } -} - -int32_t -MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) { - // If the identifier contains only ASCII digits, then it is an argument _number_ - // and must not have leading zeros (except "0" itself). - // Otherwise it is an argument _name_. - if(start>=limit) { - return UMSGPAT_ARG_NAME_NOT_VALID; - } - int32_t number; - // Defer numeric errors until we know there are only digits. - UBool badNumber; - UChar c=s.charAt(start++); - if(c==0x30) { - if(start==limit) { - return 0; - } else { - number=0; - badNumber=TRUE; // leading zero - } - } else if(0x31<=c && c<=0x39) { - number=c-0x30; - badNumber=FALSE; - } else { - return UMSGPAT_ARG_NAME_NOT_NUMBER; - } - while(start=INT32_MAX/10) { - badNumber=TRUE; // overflow - } - number=number*10+(c-0x30); - } else { - return UMSGPAT_ARG_NAME_NOT_NUMBER; - } - } - // There are only ASCII digits. - if(badNumber) { - return UMSGPAT_ARG_NAME_NOT_VALID; - } else { - return number; - } -} - -void -MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity, - UParseError *parseError, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - U_ASSERT(start(Part::MAX_VALUE+isNegative)) { - break; // not a small-enough integer - } - if(index==limit) { - addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start, - isNegative!=0 ? -value : value, errorCode); - return; - } - c=msg.charAt(index++); - } - // Let Double.parseDouble() throw a NumberFormatException. - char numberChars[128]; - int32_t capacity=(int32_t)sizeof(numberChars); - int32_t length=limit-start; - if(length>=capacity) { - break; // number too long - } - msg.extract(start, length, numberChars, capacity, US_INV); - if((int32_t)uprv_strlen(numberChars)0x39 && c!=u_e && c!=u_E && c!=0x221e)) { - break; - } - ++index; - } - return index; -} - -UBool -MessagePattern::isArgTypeChar(UChar32 c) { - return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z); -} - -UBool -MessagePattern::isChoice(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_c || c==u_C) && - ((c=msg.charAt(index++))==u_h || c==u_H) && - ((c=msg.charAt(index++))==u_o || c==u_O) && - ((c=msg.charAt(index++))==u_i || c==u_I) && - ((c=msg.charAt(index++))==u_c || c==u_C) && - ((c=msg.charAt(index))==u_e || c==u_E); -} - -UBool -MessagePattern::isPlural(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_p || c==u_P) && - ((c=msg.charAt(index++))==u_l || c==u_L) && - ((c=msg.charAt(index++))==u_u || c==u_U) && - ((c=msg.charAt(index++))==u_r || c==u_R) && - ((c=msg.charAt(index++))==u_a || c==u_A) && - ((c=msg.charAt(index))==u_l || c==u_L); -} - -UBool -MessagePattern::isSelect(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_s || c==u_S) && - ((c=msg.charAt(index++))==u_e || c==u_E) && - ((c=msg.charAt(index++))==u_l || c==u_L) && - ((c=msg.charAt(index++))==u_e || c==u_E) && - ((c=msg.charAt(index++))==u_c || c==u_C) && - ((c=msg.charAt(index))==u_t || c==u_T); -} - -UBool -MessagePattern::isOrdinal(int32_t index) { - UChar c; - return - ((c=msg.charAt(index++))==u_o || c==u_O) && - ((c=msg.charAt(index++))==u_r || c==u_R) && - ((c=msg.charAt(index++))==u_d || c==u_D) && - ((c=msg.charAt(index++))==u_i || c==u_I) && - ((c=msg.charAt(index++))==u_n || c==u_N) && - ((c=msg.charAt(index++))==u_a || c==u_A) && - ((c=msg.charAt(index))==u_l || c==u_L); -} - -UBool -MessagePattern::inMessageFormatPattern(int32_t nestingLevel) { - return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START; -} - -UBool -MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) { - return - nestingLevel==1 && - parentType==UMSGPAT_ARG_TYPE_CHOICE && - partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START; -} - -void -MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length, - int32_t value, UErrorCode &errorCode) { - if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) { - Part &part=partsList->a[partsLength++]; - part.type=type; - part.index=index; - part.length=(uint16_t)length; - part.value=(int16_t)value; - part.limitPartIndex=0; - } -} - -void -MessagePattern::addLimitPart(int32_t start, - UMessagePatternPartType type, int32_t index, int32_t length, - int32_t value, UErrorCode &errorCode) { - partsList->a[start].limitPartIndex=partsLength; - addPart(type, index, length, value, errorCode); -} - -void -MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - int32_t numericIndex=numericValuesLength; - if(numericValuesList==NULL) { - numericValuesList=new MessagePatternDoubleList(); - if(numericValuesList==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) { - return; - } else { - if(numericIndex>Part::MAX_VALUE) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - } - numericValuesList->a[numericValuesLength++]=numericValue; - addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode); -} - -void -MessagePattern::setParseError(UParseError *parseError, int32_t index) { - if(parseError==NULL) { - return; - } - parseError->offset=index; - - // Set preContext to some of msg before index. - // Avoid splitting a surrogate pair. - int32_t length=index; - if(length>=U_PARSE_CONTEXT_LEN) { - length=U_PARSE_CONTEXT_LEN-1; - if(length>0 && U16_IS_TRAIL(msg[index-length])) { - --length; - } - } - msg.extract(index-length, length, parseError->preContext); - parseError->preContext[length]=0; - - // Set postContext to some of msg starting at index. - length=msg.length()-index; - if(length>=U_PARSE_CONTEXT_LEN) { - length=U_PARSE_CONTEXT_LEN-1; - if(length>0 && U16_IS_LEAD(msg[index+length-1])) { - --length; - } - } - msg.extract(index, length, parseError->postContext); - parseError->postContext[length]=0; -} - -// MessageImpl ------------------------------------------------------------- *** - -void -MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, - UnicodeString &sb) { - int32_t doubleApos=-1; - for(;;) { - int32_t i=s.indexOf(u_apos, start); - if(i<0 || i>=limit) { - sb.append(s, start, limit-start); - break; - } - if(i==doubleApos) { - // Double apostrophe at start-1 and start==i, append one. - sb.append(u_apos); - ++start; - doubleApos=-1; - } else { - // Append text between apostrophes and skip this one. - sb.append(s, start, i-start); - doubleApos=start=i+1; - } - } -} - -// Ported from second half of ICU4J SelectFormat.format(String). -UnicodeString & -MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, - int32_t msgStart, - UnicodeString &result) { - const UnicodeString &msgString=msgPattern.getPatternString(); - int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); - for(int32_t i=msgStart;;) { - const MessagePattern::Part &part=msgPattern.getPart(++i); - UMessagePatternPartType type=part.getType(); - int32_t index=part.getIndex(); - if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { - return result.append(msgString, prevIndex, index-prevIndex); - } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) { - result.append(msgString, prevIndex, index-prevIndex); - prevIndex=part.getLimit(); - } else if(type==UMSGPAT_PART_TYPE_ARG_START) { - result.append(msgString, prevIndex, index-prevIndex); - prevIndex=index; - i=msgPattern.getLimitPartIndex(i); - index=msgPattern.getPart(i).getLimit(); - appendReducedApostrophes(msgString, prevIndex, index, result); - prevIndex=index; - } - } -} - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_FORMATTING diff --git a/deps/node/deps/icu-small/source/common/msvcres.h b/deps/node/deps/icu-small/source/common/msvcres.h deleted file mode 100644 index d6581b27..00000000 --- a/deps/node/deps/icu-small/source/common/msvcres.h +++ /dev/null @@ -1,25 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -//{{NO_DEPENDENCIES}} -// Copyright (c) 2003-2010 International Business Machines -// Corporation and others. All Rights Reserved. -// -// Used by common.rc and other .rc files. -//Do not edit with Microsoft Developer Studio because it will modify this -//header the wrong way. This is here to prevent Visual Studio .NET from -//unnessarily building the resource files when it's not needed. -// - -/* -These are defined before unicode/uversion.h in order to prevent -STLPort's broken stddef.h from being used when rc.exe parses this file. -*/ -#define _STLP_OUTERMOST_HEADER_ID 0 -#define _STLP_WINCE 1 - -#include "unicode/uversion.h" - -#define ICU_WEBSITE "http://icu-project.org" -#define ICU_COMPANY "The ICU Project" -#define ICU_PRODUCT_PREFIX "ICU" -#define ICU_PRODUCT "International Components for Unicode" diff --git a/deps/node/deps/icu-small/source/common/mutex.h b/deps/node/deps/icu-small/source/common/mutex.h deleted file mode 100644 index 5223397b..00000000 --- a/deps/node/deps/icu-small/source/common/mutex.h +++ /dev/null @@ -1,79 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -*/ -//---------------------------------------------------------------------------- -// File: mutex.h -// -// Lightweight C++ wrapper for umtx_ C mutex functions -// -// Author: Alan Liu 1/31/97 -// History: -// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop. -// 04/07/1999 srl refocused as a thin wrapper -// -//---------------------------------------------------------------------------- -#ifndef MUTEX_H -#define MUTEX_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "umutex.h" - -U_NAMESPACE_BEGIN - -//---------------------------------------------------------------------------- -// Code within that accesses shared static or global data should -// should instantiate a Mutex object while doing so. You should make your own -// private mutex where possible. - -// For example: -// -// UMutex myMutex = U_MUTEX_INITIALIZER; -// -// void Function(int arg1, int arg2) -// { -// static Object* foo; // Shared read-write object -// Mutex mutex(&myMutex); // or no args for the global lock -// foo->Method(); -// // When 'mutex' goes out of scope and gets destroyed here, the lock is released -// } -// -// Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function -// returning a Mutex. This is a common mistake which silently slips through the -// compiler!! -// - -class U_COMMON_API Mutex : public UMemory { -public: - inline Mutex(UMutex *mutex = NULL); - inline ~Mutex(); - -private: - UMutex *fMutex; - - Mutex(const Mutex &other); // forbid copying of this class - Mutex &operator=(const Mutex &other); // forbid copying of this class -}; - -inline Mutex::Mutex(UMutex *mutex) - : fMutex(mutex) -{ - umtx_lock(fMutex); -} - -inline Mutex::~Mutex() -{ - umtx_unlock(fMutex); -} - -U_NAMESPACE_END - -#endif //_MUTEX_ -//eof diff --git a/deps/node/deps/icu-small/source/common/norm2_nfc_data.h b/deps/node/deps/icu-small/source/common/norm2_nfc_data.h deleted file mode 100644 index 82a68097..00000000 --- a/deps/node/deps/icu-small/source/common/norm2_nfc_data.h +++ /dev/null @@ -1,1139 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// Copyright (C) 1999-2016, International Business Machines -// Corporation and others. All Rights Reserved. -// -// file name: norm2_nfc_data.h -// -// machine-generated by: icu/source/tools/gennorm2/n2builder.cpp - - -#ifdef INCLUDED_FROM_NORMALIZER2_CPP - -static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0}; -static const UVersionInfo norm2_nfc_data_dataVersion={0xb,0,0,0}; - -static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={ -0x50,0x4ab0,0x8708,0x8808,0x8808,0x8808,0x8808,0x8808,0xc0,0x300,0xadc,0x29d0,0x3c56,0xfc00,0x1282,0x3b8c, -0x3c24,0x3c56,0x300,0 -}; - -static const uint16_t norm2_nfc_data_trieIndex[1690]={ -0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353, -0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3, -0x631,0x65f,0x226,0x68c,0x6cc,0x709,0x729,0x768,0x7a7,0x7e4,0x803,0x840,0x729,0x879,0x8a7,0x8e6, -0x226,0x920,0x937,0x977,0x98e,0x9cd,0x226,0xa03,0xa23,0xa5e,0xa6a,0xaa4,0xacc,0xb09,0xb49,0xb83, -0xb9e,0x226,0xbd9,0x226,0xc19,0xc38,0xc6e,0xcab,0x226,0x226,0x226,0x226,0x226,0xcce,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xcfa,0x226,0x226,0xd2f, -0x226,0x226,0xd4d,0x226,0xd77,0x226,0x226,0x226,0xdb3,0xdd3,0xe13,0x226,0xe51,0xe91,0xec5,0xef1, -0x808,0x226,0x226,0xf25,0x226,0x226,0x226,0xf65,0xfa5,0xfe5,0x1025,0x1065,0x10a5,0x10e5,0x1125,0x1165, -0x11a5,0x226,0x226,0x11d5,0x1206,0x226,0x1236,0x1269,0x12a6,0x12e5,0x1325,0x135b,0x1389,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x13b4,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0xcbc,0x226,0x13d1,0x226,0x1411,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x1451,0x148b,0x14c9,0x1509,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1548,0x1586,0x15a6,0x226,0x226,0x226,0x226, -0x15e0,0x226,0x226,0x161c,0x164e,0x167c,0x80c,0x168f,0x226,0x226,0x169f,0x16df,0x226,0x226,0x226,0x13e3, -0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727, -0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737, -0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b, -0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f, -0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f, -0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723, -0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733, -0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727, -0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737, -0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b, -0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x176b,0x226, -0x17ab,0x17e6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x1826,0x1866,0x18a6,0x18e6,0x1926,0x1966,0x19a6,0x19e6,0x1a09,0x1a49,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a69,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x61f,0x62e,0x644,0x663,0x678,0x678,0x678,0x67c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a9c,0x226,0x226,0x1aac,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0xdc5,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1abc,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac6,0x54f, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7eb,0x226,0x226,0x9ba,0x226,0x1ad6, -0x1ae3,0x1aef,0x226,0x226,0x226,0x226,0x414,0x226,0x1afa,0x1b0a,0x226,0x226,0x226,0x7e0,0x226,0x226, -0x226,0x226,0x1b1a,0x226,0x226,0x226,0x1b25,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x1b2c,0x226,0x226,0x226,0x226,0x1b37,0x1b46,0x8f6,0x1b54,0x412,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x1b62,0x798,0x226,0x226,0x226,0x226,0x226,0x1b72,0x1b81,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x8d6,0x1b89,0x1b99,0x226,0x226,0x226,0x9ba, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ba3,0x226,0x226,0x226,0x226,0x226,0x226,0x7e6,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ba0,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7ed,0x7ea,0x226,0x226,0x226,0x226,0x7e8, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd3,0x226,0x226,0x226,0x226,0x7ea,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bb3,0x226,0x226,0x226, -0xebe,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bb8,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x1bc7,0x1bd7,0x1be5,0x1bf2,0x226,0x1bfe,0x1c0c,0x1c1c,0x226,0x226,0x226,0x226, -0xce9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c2c,0x1c34,0x1c42,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c52,0x226,0x226,0x226, -0x226,0x226,0x226,0x1c5e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c6e, -0x1c7e,0x1c8e,0x1c9e,0x1cae,0x1cbe,0x1cce,0x1cde,0x1cee,0x1cfe,0x1d0e,0x1d1e,0x1d2e,0x1d3e,0x1d4e,0x1d5e,0x1d6e, -0x1d7e,0x1d8e,0x1d9e,0x1dae,0x1dbe,0x1dce,0x1dde,0x1dee,0x1dfe,0x1e0e,0x1e1e,0x1e2e,0x1e3e,0x1e4e,0x1e5e,0x1e6e, -0x1e7e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x408, -0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46a,0x486,0x4a3,0x4bf,0x4dc,0x4f9,0x516,0x533,0xc4,0xc4, -0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, -0xc4,0xc4,0xc4,0x54d,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, -0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, -0xc4,0xc4,0x564,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x56f,0x58c,0xc4,0xc4,0xc4, -0xc4,0xc4,0xc4,0x5ac,0xc4,0xc4,0xc4,0x5bf,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, -0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, -0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5df,0x5ff -}; - -static const uint16_t norm2_nfc_data_trieData[7822]={ -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,4,8,0xc,1, -1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0,0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4, -0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298,0x2b0,0x2b8,0x2dc,1,1,1,1,1, -1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4,0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c, -0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584,0x5a0,0x5a8,0x5d0,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x5e8,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0x1284,0x128a,0xade,0x1290,0xaf4, -0xafe,0x5f4,0xb08,0x1296,0x129c,0xb12,0x12a2,0x12a8,0x12ae,0x12b4,0xb28,1,0x12ba,0x12c0,0x12c6,0xb32, -0xb48,0xb5a,1,0x5fc,0x12cc,0x12d2,0x12d8,0xb64,0x12de,1,1,0x12e4,0x12ea,0xb7a,0x12f0,0xb90, -0xb9a,0x600,0xba4,0x12f6,0x12fc,0xbae,0x1302,0x1308,0x130e,0x1314,0xbc4,1,0x131a,0x1320,0x1326,0xbce, -0xbe4,0xbf6,1,0x608,0x132c,0x1332,0x1338,0xc00,0x133e,1,0x1344,0x134a,0x1350,0xc16,0xc2c,0x1357, -0x135d,0x1362,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398,1,1,0xc42,0xc50,0x139e, -0x13a4,0x13aa,0x13b0,0x13b7,0x13bd,0x13c2,0x13c8,0x13ce,0x13d4,0x13da,0x13e0,0x13e6,0x13ec,0x13f3,0x13f9,0x13fe, -0x1404,1,1,0x140a,0x1410,0x1416,0x141c,0x1422,0x1428,0x142f,0x1435,0x143a,1,1,1,0x1441, -0x1447,0x144d,0x1453,1,0x1458,0x145e,0x1465,0x146b,0x1470,0x1476,1,1,1,0x147c,0x1482,0x1489, -0x148f,0x1494,0x149a,1,1,1,0xc5e,0xc6c,0x14a0,0x14a6,0x14ac,0x14b2,1,1,0x14b8,0x14be, -0x14c5,0x14cb,0x14d0,0x14d6,0xc7a,0xc84,0x14dc,0x14e2,0x14e9,0x14ef,0xc8e,0xc98,0x14f5,0x14fb,0x1500,0x1506, -1,1,0xca2,0xcac,0xcb6,0xcc0,0x150c,0x1512,0x1518,0x151e,0x1524,0x152a,0x1531,0x1537,0x153c,0x1542, -0x1548,0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x60c,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xcca,0xce4,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xcfe,0xd18,1,1,1,1,1, -1,0x610,1,1,1,1,1,1,1,1,1,1,1,1,1,0x157e, -0x1584,0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15b0,0x15ba,0x15c4,0x15ce,0x15d8,0x15e2,0x15ec,0x15f6,1, -0x1600,0x160a,0x1614,0x161e,0x1627,0x162d,1,1,0x1632,0x1638,0x163e,0x1644,0xd32,0xd3c,0x164d,0x1657, -0x165f,0x1665,0x166b,1,1,1,0x1670,0x1676,1,1,0x167c,0x1682,0x168a,0x1694,0x169d,0x16a3, -0x16a9,0x16af,0x16b4,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de,0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702, -0x1708,0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e,0x1744,0x174a,0x1750,0x1756,1,1, -0x175c,0x1762,1,1,1,1,1,1,0xd46,0xd50,0xd5a,0xd64,0x176a,0x1774,0x177e,0x1788, -0xd6e,0xd78,0x1792,0x179c,0x17a4,0x17aa,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x614,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc, -0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0, -0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8,0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8, -0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8,0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8, -0xffb8,0xffcc,0xffcc,0xffcc,0x3c26,0x3c2c,0xfdcc,0x3c32,0x3c38,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, -0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc, -0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,0x29d1,1,1,1,1,1,1,1, -1,1,0x29d5,1,1,1,1,1,0x17b1,0x17b7,0x29d9,0x17bd,0x17c3,0x17c9,1,0x17cf, -1,0x17d5,0x17db,0x17e3,0x618,1,1,1,0x634,1,0x644,1,0x658,1,1,1, -1,1,0x674,1,0x684,1,1,1,0x688,1,1,1,0x6a0,0x17eb,0x17f1,0xd82, -0x17f7,0xd8c,0x17fd,0x1805,0x6b4,1,1,1,0x6d4,1,0x6e4,1,0x6fc,1,1,1, -1,1,0x71c,1,0x72c,1,1,1,0x734,1,1,1,0x754,0xd96,0xda8,0x180d, -0x1813,0xdba,1,1,1,0x76c,0x1819,0x181f,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x1825,0x182b,1,0x1831,1,1,0x774,0x1837,1,1,1,1,0x183d, -0x1843,0x1849,1,0x778,1,1,0x780,1,0x784,0x790,0x798,0x79c,0x184f,0x7ac,1,1, -1,0x7b0,1,1,1,1,0x7b4,1,1,1,0x7c4,1,1,1,0x7c8,1, -0x7cc,1,1,0x7d0,1,1,0x7d8,1,0x7dc,0x7e8,0x7f0,0x7f4,0x1855,0x804,1,1, -1,0x808,1,1,1,0x80c,1,1,1,0x81c,1,1,1,0x820,1,0x824, -1,1,0x185b,0x1861,1,0x1867,1,1,0x828,0x186d,1,1,1,1,0x1873,0x1879, -0x187f,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x82c,0x830,0x1885,0x188b,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1891, -0x1897,1,1,1,1,1,1,1,1,1,1,1,1,1,0x189d,0x18a3, -0x18a9,0x18af,1,1,0x18b5,0x18bb,0x834,0x838,0x18c1,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,1,1, -0x18e5,0x18eb,0x18f1,0x18f7,0x18fd,0x1903,0x83c,0x840,0x1909,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933, -0x1939,0x193f,0x1945,0x194b,1,1,0x1951,0x1957,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc, -0xffcc,0xffcc,0xffbc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, -0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffbc,0xffc8,0xffcc,0xfe14,0xfe16,0xfe18,0xfe1a,0xfe1c,0xfe1e,0xfe20,0xfe22, -0xfe24,0xfe26,0xfe26,0xfe28,0xfe2a,0xfe2c,1,0xfe2e,1,0xfe30,0xfe32,1,0xffcc,0xffb8,1,0xfe24, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xfe3c,0xfe3e,0xfe40,1,1,1,1,1,1,1,0x195c,0x1962,0x1969,0x196f,0x1975,0x844, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0x850,1,0x854,0xfe36,0xfe38,0xfe3a,0xfe3c,0xfe3e, -0xfe40,0xfe42,0xfe44,0xfdcc,0xfdcc,0xfdb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe46,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x197b,0x858,0x1981,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0x85c,0x1987,1,0x860,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc, -0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc, -0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1, -0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffb8, -0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,0xffcc, -0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x864,0x198d,1,1,1,1,1,1,0x868,0x1993,1,0x86c, -0x1999,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,1, -1,0x29dc,0x29e2,0x29e8,0x29ee,0x29f4,0x29fa,0x2a00,0x2a06,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0x870, -1,1,1,0x199f,0x19a5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00, -1,1,1,1,0x2a0c,0x2a12,1,0x2a18,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0x2a1e,1,1,0x2a24,1,1, -1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a2a,0x2a30,0x2a36, -1,1,0x2a3c,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x878,0x19ab,1,1,0x19b1,0x19b7,0xfe12,1,1,1,1,1,1,1,1,0xfc00, -0xfc00,1,1,1,1,0x2a42,0x2a48,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0x884,1,0x19bd,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,1, -0x19c3,0x19c9,0x19cf,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x894,1,0x19d5,1,1,1,1,0xfe12,1,1, -1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xfe0e,1,1,0x898,0x19db,1,0xfc00,1,1,1,0x89c,0x19e1,0x19e7, -1,0xdc4,0x19ef,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,1, -1,1,1,0x8a8,0x8b0,1,1,0x19f7,0x19fd,0x1a03,0xfe12,1,1,1,1,1, -1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,1, -1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a09,1,0xdce, -0x1a11,0x1a19,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,1, -1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xfeec,0xfeec,1,1,1,1,1,1,1,1,0xfef4,0xfef4,0xfef4,0xfef4, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a4f,1,1,1, -1,1,1,1,1,1,0x2a55,1,1,1,1,0x2a5b,1,1,1,1, -0x2a61,1,1,1,1,0x2a67,1,1,1,1,1,1,1,1,1,1, -1,1,0x2a6d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a72, -1,0x2a78,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c50,0xffcc,0xffcc,0xfe12,1,0xffcc, -0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a7f,1,1,1, -1,1,1,1,1,1,0x2a85,1,1,1,1,0x2a8b,1,1,1,1, -0x2a91,1,1,1,1,0x2a97,1,1,1,1,1,1,1,1,1,1, -1,1,0x2a9d,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x8c0,0x1a1f,1,1,1,1,1,1,1,0xfc00,1,1,1, -1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, -0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, -0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, -0xffb8,1,1,1,1,1,0x8c4,0x1a25,0x8c8,0x1a2b,0x8cc,0x1a31,0x8d0,0x1a37,0x8d4,0x1a3d, -1,1,0x8d8,0x1a43,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a49,0x8e0,0x1a4f,0x8e4, -0x8e8,0x1a55,0x1a5b,0x8ec,0x1a61,0xfe12,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, -0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1, -1,1,1,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc, -0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1, -1,1,0xffb8,1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1, -1,1,1,1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8, -0xffcc,0xffcc,0xffd4,0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,1, -0xffcc,0xffd2,0xffb8,0xffcc,0xffb8,0x1a66,0x1a6c,0x1a72,0x1a78,0x1a7f,0x1a85,0x1a8b,0x1a91,0x1a99,0x1aa3,0x1aaa, -0x1ab0,0x1ab6,0x1abc,0x1ac2,0x1ac8,0x1acf,0x1ad5,0x1ada,0x1ae0,0x1ae8,0x1af2,0x1afc,0x1b06,0x1b0e,0x1b14,0x1b1a, -0x1b20,0x1b29,0x1b33,0x1b3b,0x1b41,0x1b46,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b77,0x1b7d,0x1b82, -0x1b88,0x1b8e,0x1b94,0x1b9c,0x1ba6,0x1bae,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0xdd8,0xde2,0x1bd4,0x1bde,0x1be6, -0x1bec,0x1bf2,0x1bf8,0x1bfe,0x1c04,0x1c0a,0x1c10,0x1c17,0x1c1d,0x1c22,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46, -0x1c4c,0x1c54,0x1c5e,0x1c68,0x1c72,0x1c7c,0x1c86,0x1c90,0x1c9a,0x1ca3,0x1ca9,0x1caf,0x1cb5,0x1cba,0x1cc0,0xdec, -0xdf6,0x1cc8,0x1cd2,0x1cda,0x1ce0,0x1ce6,0x1cec,0xe00,0xe0a,0x1cf4,0x1cfe,0x1d08,0x1d12,0x1d1c,0x1d26,0x1d2e, -0x1d34,0x1d3a,0x1d40,0x1d46,0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d84,0x1d8e,0x1d98, -0x1da2,0x1daa,0x1db0,0x1db7,0x1dbd,0x1dc2,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df3,0x1df9,0x1dff, -0x1e05,0x1e0b,0x1e11,0x1e16,0x1e1c,0x1e22,0x1e28,0x1e2f,0x1e35,0x1e3b,0x1e41,0x1e46,0x1e4c,0x1e52,0x1e58,1, -0x1e5f,1,1,1,1,0xe14,0xe22,0x1e64,0x1e6a,0x1e72,0x1e7c,0x1e86,0x1e90,0x1e9a,0x1ea4,0x1eae, -0x1eb8,0x1ec2,0x1ecc,0x1ed6,0x1ee0,0x1eea,0x1ef4,0x1efe,0x1f08,0x1f12,0x1f1c,0x1f26,0x1f30,0xe30,0xe3a,0x1f38, -0x1f3e,0x1f44,0x1f4a,0x1f52,0x1f5c,0x1f66,0x1f70,0x1f7a,0x1f84,0x1f8e,0x1f98,0x1fa2,0x1fac,0x1fb4,0x1fba,0x1fc0, -0x1fc6,0xe44,0xe4e,0x1fcc,0x1fd2,0x1fda,0x1fe4,0x1fee,0x1ff8,0x2002,0x200c,0x2016,0x2020,0x202a,0x2034,0x203e, -0x2048,0x2052,0x205c,0x2066,0x2070,0x207a,0x2084,0x208e,0x2098,0x20a0,0x20a6,0x20ac,0x20b2,0x20ba,0x20c4,0x20ce, -0x20d8,0x20e2,0x20ec,0x20f6,0x2100,0x210a,0x2114,0x211c,0x2122,0x2129,0x212f,0x2134,0x213a,0x2140,0x2146,1, -1,1,1,1,1,0xe58,0xe6e,0xe86,0xe94,0xea2,0xeb0,0xebe,0xecc,0xed8,0xeee,0xf06, -0xf14,0xf22,0xf30,0xf3e,0xf4c,0xf58,0xf66,0x214f,0x2159,0x2163,0x216d,1,1,0xf74,0xf82,0x2177, -0x2181,0x218b,0x2195,1,1,0xf90,0xfa6,0xfbe,0xfcc,0xfda,0xfe8,0xff6,0x1004,0x1010,0x1026,0x103e, -0x104c,0x105a,0x1068,0x1076,0x1084,0x1090,0x10a2,0x219f,0x21a9,0x21b3,0x21bd,0x21c7,0x21d1,0x10b4,0x10c6,0x21db, -0x21e5,0x21ef,0x21f9,0x2203,0x220d,0x10d8,0x10e6,0x2217,0x2221,0x222b,0x2235,1,1,0x10f4,0x1102,0x223f, -0x2249,0x2253,0x225d,1,1,0x1110,0x1122,0x2267,0x2271,0x227b,0x2285,0x228f,0x2299,1,0x1134,1, -0x22a3,1,0x22ad,1,0x22b7,0x1146,0x115c,0x1174,0x1182,0x1190,0x119e,0x11ac,0x11ba,0x11c6,0x11dc,0x11f4, -0x1202,0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b96,0x1250,0x3b9e,0x22c5,0x3ba6,0x22cb,0x3bae,0x22d1, -0x3bb6,0x125a,0x3bbe,1,1,0x22d8,0x22e2,0x22f1,0x2301,0x2311,0x2321,0x2331,0x2341,0x234c,0x2356,0x2365, -0x2375,0x2385,0x2395,0x23a5,0x23b5,0x23c0,0x23ca,0x23d9,0x23e9,0x23f9,0x2409,0x2419,0x2429,0x2434,0x243e,0x244d, -0x245d,0x246d,0x247d,0x248d,0x249d,0x24a8,0x24b2,0x24c1,0x24d1,0x24e1,0x24f1,0x2501,0x2511,0x251c,0x2526,0x2535, -0x2545,0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7,0x25bf,0x25c5,0x25cb, -0x3bc6,0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9,0x2601,0x3bce,0x2607, -0x3bd6,0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be0,1,1,0x263b,0x2643,0x264b,0x2651,0x2657, -0x3bea,1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf4,0x2685,0x268b,0x2691,0x2699,0x26a1,0x26a7,0x26ad, -0x3bfe,0x26b3,0x26b9,0x3c06,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db,0x26e3,0x3c0e,0x26e9, -0x3c16,0x26ee,0x2aab,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02, -0xffcc,0xffcc,1,1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02, -0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x2aae,1,1,1, -0x2ab2,0x3c1e,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1, -0x26f5,0x26fb,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x2701,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x2707,0x270d,0x2713,0x914,1,0x918,1,0x91c,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0x920,0x2719,1,1,1,0x924,0x271f, -1,0x928,0x2725,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0x92c,0x272b,0x930,0x2731,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0x934,1,1,1,0x2737,1,0x938,0x273d,0x93c,1,0x2743,0x940,0x2749,1, -1,1,0x944,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x274f,0x948,0x2755,1,0x94c,0x950,1,1,1,1,1, -1,1,0x275b,0x2761,0x2767,0x276d,0x2773,0x954,0x958,0x2779,0x277f,0x95c,0x960,0x2785,0x278b,0x964, -0x968,0x96c,0x970,1,1,0x2791,0x2797,0x974,0x978,0x279d,0x27a3,0x97c,0x980,0x27a9,0x27af,1, -1,1,1,1,1,1,0x984,0x988,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1, -0x998,0x27b5,0x27bb,0x27c1,0x27c7,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0x27cd,0x27d3,0x27d9,0x27df,1, -1,1,1,1,1,0x27e5,0x27eb,0x27f1,0x27f7,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0x2ab7,0x2abb,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x2abf,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0, -0xffc0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0x9ac,1,1,1,1,0x9b0,0x27fd,0x9b4,0x2803,0x9b8,0x2809,0x9bc,0x280f,0x9c0,0x2815, -0x9c4,0x281b,0x9c8,0x2821,0x9cc,0x2827,0x9d0,0x282d,0x9d4,0x2833,0x9d8,0x2839,0x9dc,0x283f,1,0x9e0, -0x2845,0x9e4,0x284b,0x9e8,0x2851,1,1,1,1,1,0x9ec,0x2857,0x285d,0x9f4,0x2863,0x2869, -0x9fc,0x286f,0x2875,0xa04,0x287b,0x2881,0xa0c,0x2887,0x288d,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0x2893,1,1, -1,1,0xfc10,0xfc10,1,1,0xa14,0x2899,1,1,1,1,1,1,1,0xa18, -1,1,1,1,0xa1c,0x289f,0xa20,0x28a5,0xa24,0x28ab,0xa28,0x28b1,0xa2c,0x28b7,0xa30,0x28bd, -0xa34,0x28c3,0xa38,0x28c9,0xa3c,0x28cf,0xa40,0x28d5,0xa44,0x28db,0xa48,0x28e1,1,0xa4c,0x28e7,0xa50, -0x28ed,0xa54,0x28f3,1,1,1,1,1,0xa58,0x28f9,0x28ff,0xa60,0x2905,0x290b,0xa68,0x2911, -0x2917,0xa70,0x291d,0x2923,0xa78,0x2929,0x292f,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x2935,1,1, -0x293b,0x2941,0x2947,0x294d,1,1,0xa90,0x2953,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -1,0xffcc,0xffcc,0xffb8,1,1,0xffcc,0xffcc,1,1,1,1,1,0xffcc,0xffcc,1, -0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xadc, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0x3c56,1,0x3c56,0x3c56,0x3c56, -0x3c56,0x3c56,0x3c56,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x3c56,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0x3c56,1,1,1,1,0x3c56, -1,1,1,0x3c56,1,0x3c56,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x3b87,1,0x2ac5,0x2ac9,0x2acd,0x2ad1,0x2ad5,0x2ad9,0x2add,0x2ae1,0x2ae1,0x2ae5, -0x2ae9,0x2aed,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25, -0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65, -0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5, -0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5, -0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25, -0x2c29,0x2c2d,0x2b11,0x2c31,0x2c35,0x2c39,0x2c3d,0x2c41,0x2c45,0x2c49,0x2c4d,0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61, -0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1, -0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1, -0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21, -0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2c79,0x2d41,0x2d45,0x2d49,0x2d4d,0x2d51,0x2d55,0x2d59,0x2d5d, -0x2c39,0x2d61,0x2d65,0x2d69,0x2d6d,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99, -0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2b11,0x2db1,0x2db5,0x2db9,0x2dbd,0x2dc1,0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5, -0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15, -0x2e19,0x2c41,0x2e1d,0x2e21,0x2e25,0x2e29,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51, -0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91, -0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1, -0x2ed5,0x2ed9,0x2edd,0x2ee1,1,1,0x2ee5,1,0x2ee9,1,1,0x2eed,0x2ef1,0x2ef5,0x2ef9,0x2efd, -0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,1,0x2f15,1,0x2f19,1,1,0x2f1d,0x2f21,1,1,1, -0x2f25,0x2f29,0x2f2d,0x2f31,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61, -0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1, -0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,0x2fd1,0x2fd5,0x2d15,0x2fd9,0x2fdd, -0x2fe1,0x2fe5,0x2fe9,0x2fed,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x3001,0x3005,0x3009,0x300d,0x2f1d,0x3011,0x3015, -0x3019,0x301d,0x3021,0x3027,1,1,0x302b,0x302f,0x3033,0x3037,0x303b,0x303f,0x3043,0x3047,0x2f55,0x304b, -0x304f,0x3053,0x2ee5,0x3057,0x305b,0x305f,0x3063,0x3067,0x306b,0x306f,0x3073,0x3077,0x307b,0x307f,0x3083,0x2f79, -0x3087,0x2f7d,0x308b,0x308f,0x3093,0x3097,0x309b,0x2ee9,0x2b65,0x309f,0x30a3,0x30a7,0x2c7d,0x2dd9,0x30ab,0x30af, -0x2f99,0x30b3,0x2f9d,0x30b7,0x30bb,0x30bf,0x2ef1,0x30c3,0x30c7,0x30cb,0x30cf,0x30d3,0x2ef5,0x30d7,0x30db,0x30df, -0x30e3,0x30e7,0x30eb,0x2fd5,0x30ef,0x30f3,0x2d15,0x30f7,0x2fe5,0x30fb,0x30ff,0x3103,0x3107,0x310b,0x2ff9,0x310f, -0x2f19,0x3113,0x2ffd,0x2c31,0x3117,0x3001,0x311b,0x3009,0x311f,0x3123,0x3127,0x312b,0x312f,0x3011,0x2f09,0x3133, -0x3015,0x3137,0x3019,0x313b,0x2ae1,0x313f,0x3145,0x314b,0x3151,0x3155,0x3159,0x315d,0x3163,0x3169,0x316f,0x3173, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x3176,0xfe34,0x317c,1,1,1,1,1,1,1, -1,1,1,0x3182,0x3188,0x3190,0x319a,0x31a2,0x31a8,0x31ae,0x31b4,0x31ba,0x31c0,0x31c6,0x31cc,0x31d2, -1,0x31d8,0x31de,0x31e4,0x31ea,0x31f0,1,0x31f6,1,0x31fc,0x3202,1,0x3208,0x320e,1,0x3214, -0x321a,0x3220,0x3226,0x322c,0x3232,0x3238,0x323e,0x3244,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1, -1,1,1,1,0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1, -0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc, -0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,0xa94, -0x2959,0xa9a,0x2963,1,1,1,1,1,0xaa0,1,1,1,1,1,0x296d,1, -1,1,1,1,1,1,1,1,0xfe12,0xfc0e,1,1,1,1,1,1, -1,0xfc00,1,1,1,1,1,1,0x2977,0x2981,1,0xaa6,0xaac,0xfe12,0xfe12,1, -1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1, -1,1,1,1,0xfe0e,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1, -1,1,1,1,1,0xfe0e,0xfe12,1,1,1,1,1,1,1,1,1, -1,1,0xfe0e,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0xab2,1,1, -1,0x298b,0x2995,0xfe12,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,1,1,1,0xfe12,1,1,1,0xfe0e,1,1,1,1,1,1,1, -1,1,0xfc00,1,1,1,1,1,1,1,1,0xabe,0xfc00,0x299f,0x29a9,0xfc00, -0x29b3,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1, -1,0xad0,0xad6,0x29bd,0x29c7,1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1,1, -1,1,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xfe02,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x324a,0x3254,0x3268,0x3280,0x3298,0x32b0,0x32c8,0xffb0,0xffb0,0xfe02,0xfe02, -0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,1,1,1,1,1,1,1, -0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, -1,1,1,1,1,1,1,0x32d6,0x32e0,0x32f4,0x330c,0x3324,0x333c,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1, -1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,1,1,1,1,0x334b,0x334f, -0x3353,0x3357,0x335d,0x2f3d,0x3361,0x3365,0x3369,0x336d,0x2f41,0x3371,0x3375,0x3379,0x2f45,0x337f,0x3383,0x3387, -0x338b,0x3391,0x3395,0x3399,0x339d,0x33a3,0x33a7,0x33ab,0x33af,0x302f,0x33b3,0x33b9,0x33bd,0x33c1,0x33c5,0x33c9, -0x33cd,0x33d1,0x33d5,0x3043,0x2f49,0x2f4d,0x3047,0x33d9,0x33dd,0x2c49,0x33e1,0x2f51,0x33e5,0x33e9,0x33ed,0x33f1, -0x33f1,0x33f1,0x33f5,0x33fb,0x33ff,0x3403,0x3407,0x340d,0x3411,0x3415,0x3419,0x341d,0x3421,0x3425,0x3429,0x342d, -0x3431,0x3435,0x3439,0x343d,0x343d,0x304f,0x3441,0x3445,0x3449,0x344d,0x2f59,0x3451,0x3455,0x3459,0x2ead,0x345d, -0x3461,0x3465,0x3469,0x346d,0x3471,0x3475,0x3479,0x347d,0x3483,0x3487,0x348b,0x348f,0x3493,0x3497,0x349b,0x34a1, -0x34a7,0x34ab,0x34af,0x34b3,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34c7,0x34cb,0x34d1,0x34d5,0x2c39,0x34d9,0x34dd, -0x34e3,0x34e7,0x34eb,0x34ef,0x34f3,0x34f7,0x2f6d,0x34fb,0x34ff,0x3503,0x3509,0x350d,0x3513,0x3517,0x351b,0x351f, -0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3545,0x3549,0x354d,0x3551,0x2b61,0x3555,0x355b,0x355f, -0x355f,0x3565,0x3569,0x3569,0x356d,0x3571,0x3577,0x357d,0x3581,0x3585,0x3589,0x358d,0x3591,0x3595,0x3599,0x359d, -0x35a1,0x2f71,0x35a5,0x35ab,0x35af,0x35b3,0x307f,0x35b3,0x35b7,0x2f79,0x35bb,0x35bf,0x35c3,0x35c7,0x2f7d,0x2af5, -0x35cb,0x35cf,0x35d3,0x35d7,0x35db,0x35df,0x35e3,0x35e9,0x35ed,0x35f1,0x35f5,0x35f9,0x35fd,0x3603,0x3607,0x360b, -0x360f,0x3613,0x3617,0x361b,0x361f,0x3623,0x2f81,0x3627,0x362b,0x3631,0x3635,0x3639,0x363d,0x2f89,0x3641,0x3645, -0x3649,0x364d,0x3651,0x3655,0x3659,0x365d,0x2b65,0x309f,0x3661,0x3665,0x3669,0x366d,0x3673,0x3677,0x367b,0x367f, -0x2f8d,0x3683,0x3689,0x368d,0x3691,0x3151,0x3695,0x3699,0x369d,0x36a1,0x36a5,0x36ab,0x36af,0x36b3,0x36b7,0x36bd, -0x36c1,0x36c5,0x36c9,0x2c7d,0x36cd,0x36d1,0x36d7,0x36dd,0x36e3,0x36e7,0x36ed,0x36f1,0x36f5,0x36f9,0x36fd,0x2f91, -0x2dd9,0x3701,0x3705,0x3709,0x370d,0x3713,0x3717,0x371b,0x371f,0x30af,0x3723,0x3727,0x372d,0x3731,0x3735,0x373b, -0x3741,0x3745,0x30b3,0x3749,0x374d,0x3751,0x3755,0x3759,0x375d,0x3761,0x3767,0x376b,0x3771,0x3775,0x377b,0x30bb, -0x377f,0x3783,0x3789,0x378d,0x3791,0x3797,0x379d,0x37a1,0x37a5,0x37a9,0x37ad,0x37ad,0x37b1,0x37b5,0x30c3,0x37b9, -0x37bd,0x37c1,0x37c5,0x37c9,0x37cf,0x37d3,0x2c45,0x37d9,0x37df,0x37e3,0x37e9,0x37ef,0x37f5,0x37f9,0x30db,0x37fd, -0x3803,0x3809,0x380f,0x3815,0x3819,0x3819,0x30df,0x3159,0x381d,0x3821,0x3825,0x3829,0x382f,0x2bad,0x30e7,0x3833, -0x3837,0x2fbd,0x383d,0x3843,0x2f05,0x3849,0x384d,0x2fcd,0x3851,0x3855,0x3859,0x385f,0x385f,0x3865,0x3869,0x386d, -0x3873,0x3877,0x387b,0x387f,0x3885,0x3889,0x388d,0x3891,0x3895,0x3899,0x389f,0x38a3,0x38a7,0x38ab,0x38af,0x38b3, -0x38b7,0x38bd,0x38c3,0x38c7,0x38cd,0x38d1,0x38d7,0x38db,0x2fe5,0x38df,0x38e5,0x38eb,0x38ef,0x38f5,0x38f9,0x38ff, -0x3903,0x3907,0x390b,0x390f,0x3913,0x3917,0x391d,0x3923,0x3929,0x3565,0x392f,0x3933,0x3937,0x393b,0x393f,0x3943, -0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x2c8d,0x3961,0x3965,0x3969,0x396d,0x3971,0x3975,0x2ff1,0x3979,0x397d, -0x3981,0x3985,0x3989,0x398f,0x3995,0x399b,0x399f,0x39a3,0x39a7,0x39ab,0x39b1,0x39b5,0x39bb,0x39bf,0x39c3,0x39c9, -0x39cf,0x39d3,0x2b99,0x39d7,0x39db,0x39df,0x39e3,0x39e7,0x39eb,0x3103,0x39ef,0x39f3,0x39f7,0x39fb,0x39ff,0x3a03, -0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a19,0x3a1d,0x3a21,0x3a25,0x3a29,0x3a2d,0x3a33,0x3a39,0x3a3d,0x3a41,0x3117,0x311b, -0x3a45,0x3a49,0x3a4f,0x3a53,0x3a57,0x3a5b,0x3a5f,0x3a65,0x3a6b,0x3a6f,0x3a73,0x3a77,0x3a7d,0x311f,0x3a81,0x3a87, -0x3a8d,0x3a91,0x3a95,0x3a99,0x3a9f,0x3aa3,0x3aa7,0x3aab,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3ac1,0x3ac5,0x3ac9,0x3acd, -0x3ad3,0x3ad7,0x3adb,0x3adf,0x3ae3,0x3ae9,0x3aef,0x3af3,0x3af7,0x3afb,0x3b01,0x3b05,0x3137,0x3137,0x3b0b,0x3b0f, -0x3b15,0x3b19,0x3b1d,0x3b21,0x3b25,0x3b29,0x3b2d,0x3b31,0x313b,0x3b37,0x3b3b,0x3b3f,0x3b43,0x3b47,0x3b4b,0x3b51, -0x3b55,0x3b5b,0x3b61,0x3b67,0x3b6b,0x3b6f,0x3b73,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,1,1 -}; - -static const UCPTrie norm2_nfc_data_trie={ - norm2_nfc_data_trieIndex, - { norm2_nfc_data_trieData }, - 1690, 7822, - 0x2fc00, 0x30, - 0, 0, - 0, 0, - 0xc4, 0x226, - 0x1, -}; - -static const uint16_t norm2_nfc_data_extraData[7724]={ -0xffff,0xffff,0x8670,0x44dc,0x8670,0x44c0,0x8670,0x44de,0x600,0x180,0x602,0x182,0x604,0x185,0x606,0x186, -0x608,0x200,0x60c,0x205,0x60e,0x44d,0x610,0x189,0x612,0x3d44,0x614,0x18b,0x618,0x39a,0x61e,0x400, -0x622,0x404,0x646,0x3d41,0x64a,0x3c00,0x8650,0x208,0x60e,0x3c04,0x646,0x3c08,0x8662,0x3c0c,0x602,0x20c, -0x604,0x210,0x60e,0x214,0x618,0x218,0x864e,0x18f,0x60e,0x3c14,0x618,0x21c,0x646,0x3c18,0x64e,0x3c20, -0x65a,0x3c24,0x8662,0x3c1c,0x600,0x190,0x602,0x192,0x604,0x195,0x606,0x3d78,0x608,0x225,0x60c,0x228, -0x60e,0x22c,0x610,0x196,0x612,0x3d74,0x618,0x234,0x61e,0x408,0x622,0x40c,0x646,0x3d71,0x64e,0x451, -0x650,0x230,0x65a,0x3c30,0x8660,0x3c34,0x860e,0x3c3c,0x602,0x3e8,0x604,0x238,0x608,0x3c40,0x60c,0x23c, -0x60e,0x240,0x618,0x3cc,0x864e,0x244,0x604,0x248,0x60e,0x3c44,0x610,0x3c4c,0x618,0x43c,0x646,0x3c48, -0x64e,0x3c50,0x865c,0x3c54,0x600,0x198,0x602,0x19a,0x604,0x19c,0x606,0x250,0x608,0x254,0x60c,0x258, -0x60e,0x260,0x610,0x19f,0x612,0x3d90,0x618,0x39e,0x61e,0x410,0x622,0x414,0x646,0x3d94,0x650,0x25c, -0x8660,0x3c58,0x8604,0x268,0x602,0x3c60,0x618,0x3d0,0x646,0x3c64,0x64e,0x26c,0x8662,0x3c68,0x602,0x272, -0x618,0x27a,0x646,0x3c6d,0x64e,0x276,0x65a,0x3c78,0x8662,0x3c74,0x602,0x3c7c,0x60e,0x3c80,0x8646,0x3c84, -0x600,0x3f0,0x602,0x286,0x606,0x1a2,0x60e,0x3c88,0x618,0x28e,0x646,0x3c8c,0x64e,0x28a,0x65a,0x3c94, -0x8662,0x3c90,0x600,0x1a4,0x602,0x1a6,0x604,0x1a9,0x606,0x1ab,0x608,0x299,0x60c,0x29c,0x60e,0x45d, -0x610,0x1ad,0x612,0x3d9c,0x616,0x2a0,0x618,0x3a2,0x61e,0x418,0x622,0x41c,0x636,0x341,0x646,0x3d99, -0x8650,0x3d5,0x602,0x3ca8,0x860e,0x3cac,0x602,0x2a8,0x60e,0x3cb0,0x618,0x2b0,0x61e,0x420,0x622,0x424, -0x646,0x3cb5,0x64e,0x2ac,0x8662,0x3cbc,0x602,0x2b5,0x604,0x2b8,0x60e,0x3cc0,0x618,0x2c1,0x646,0x3cc5, -0x64c,0x430,0x864e,0x2bc,0x60e,0x3cd4,0x618,0x2c8,0x646,0x3cd8,0x64c,0x434,0x64e,0x2c4,0x65a,0x3ce0, -0x8662,0x3cdc,0x600,0x1b2,0x602,0x1b4,0x604,0x1b6,0x606,0x2d1,0x608,0x2d5,0x60c,0x2d8,0x610,0x1b9, -0x612,0x3dcc,0x614,0x2dc,0x616,0x2e0,0x618,0x3a6,0x61e,0x428,0x622,0x42c,0x636,0x35f,0x646,0x3dc8, -0x648,0x3ce4,0x650,0x2e4,0x65a,0x3cec,0x8660,0x3ce8,0x606,0x3cf8,0x8646,0x3cfc,0x600,0x3d00,0x602,0x3d04, -0x604,0x2e8,0x60e,0x3d0c,0x610,0x3d08,0x8646,0x3d10,0x60e,0x3d14,0x8610,0x3d18,0x600,0x3de4,0x602,0x1ba, -0x604,0x2ec,0x606,0x3df0,0x608,0x464,0x60e,0x3d1c,0x610,0x2f0,0x612,0x3dec,0x8646,0x3de8,0x602,0x2f2, -0x604,0x3d20,0x60e,0x2f6,0x618,0x2fa,0x646,0x3d24,0x8662,0x3d28,0x600,0x1c0,0x602,0x1c2,0x604,0x1c5, -0x606,0x1c6,0x608,0x202,0x60c,0x207,0x60e,0x44f,0x610,0x1c9,0x612,0x3d46,0x614,0x1cb,0x618,0x39c, -0x61e,0x402,0x622,0x406,0x646,0x3d43,0x64a,0x3c02,0x8650,0x20a,0x60e,0x3c06,0x646,0x3c0a,0x8662,0x3c0e, -0x602,0x20e,0x604,0x212,0x60e,0x216,0x618,0x21a,0x864e,0x1cf,0x60e,0x3c16,0x618,0x21e,0x646,0x3c1a, -0x64e,0x3c22,0x65a,0x3c26,0x8662,0x3c1e,0x600,0x1d0,0x602,0x1d2,0x604,0x1d5,0x606,0x3d7a,0x608,0x227, -0x60c,0x22a,0x60e,0x22e,0x610,0x1d6,0x612,0x3d76,0x618,0x236,0x61e,0x40a,0x622,0x40e,0x646,0x3d73, -0x64e,0x453,0x650,0x232,0x65a,0x3c32,0x8660,0x3c36,0x860e,0x3c3e,0x602,0x3ea,0x604,0x23a,0x608,0x3c42, -0x60c,0x23e,0x60e,0x242,0x618,0x3ce,0x864e,0x246,0x604,0x24a,0x60e,0x3c46,0x610,0x3c4e,0x618,0x43e, -0x646,0x3c4a,0x64e,0x3c52,0x65c,0x3c56,0x8662,0x3d2c,0x600,0x1d8,0x602,0x1da,0x604,0x1dc,0x606,0x252, -0x608,0x256,0x60c,0x25a,0x610,0x1df,0x612,0x3d92,0x618,0x3a0,0x61e,0x412,0x622,0x416,0x646,0x3d96, -0x650,0x25e,0x8660,0x3c5a,0x604,0x26a,0x8618,0x3e0,0x602,0x3c62,0x618,0x3d2,0x646,0x3c66,0x64e,0x26e, -0x8662,0x3c6a,0x602,0x274,0x618,0x27c,0x646,0x3c6f,0x64e,0x278,0x65a,0x3c7a,0x8662,0x3c76,0x602,0x3c7e, -0x60e,0x3c82,0x8646,0x3c86,0x600,0x3f2,0x602,0x288,0x606,0x1e2,0x60e,0x3c8a,0x618,0x290,0x646,0x3c8e, -0x64e,0x28c,0x65a,0x3c96,0x8662,0x3c92,0x600,0x1e4,0x602,0x1e6,0x604,0x1e9,0x606,0x1eb,0x608,0x29b, -0x60c,0x29e,0x60e,0x45f,0x610,0x1ed,0x612,0x3d9e,0x616,0x2a2,0x618,0x3a4,0x61e,0x41a,0x622,0x41e, -0x636,0x343,0x646,0x3d9b,0x8650,0x3d7,0x602,0x3caa,0x860e,0x3cae,0x602,0x2aa,0x60e,0x3cb2,0x618,0x2b2, -0x61e,0x422,0x622,0x426,0x646,0x3cb7,0x64e,0x2ae,0x8662,0x3cbe,0x602,0x2b7,0x604,0x2ba,0x60e,0x3cc2, -0x618,0x2c3,0x646,0x3cc7,0x64c,0x432,0x864e,0x2be,0x60e,0x3cd6,0x610,0x3d2e,0x618,0x2ca,0x646,0x3cda, -0x64c,0x436,0x64e,0x2c6,0x65a,0x3ce2,0x8662,0x3cde,0x600,0x1f2,0x602,0x1f4,0x604,0x1f6,0x606,0x2d3, -0x608,0x2d7,0x60c,0x2da,0x610,0x1f9,0x612,0x3dce,0x614,0x2de,0x616,0x2e2,0x618,0x3a8,0x61e,0x42a, -0x622,0x42e,0x636,0x361,0x646,0x3dca,0x648,0x3ce6,0x650,0x2e6,0x65a,0x3cee,0x8660,0x3cea,0x606,0x3cfa, -0x8646,0x3cfe,0x600,0x3d02,0x602,0x3d06,0x604,0x2ea,0x60e,0x3d0e,0x610,0x3d0a,0x614,0x3d30,0x8646,0x3d12, -0x60e,0x3d16,0x8610,0x3d1a,0x600,0x3de6,0x602,0x1fa,0x604,0x2ee,0x606,0x3df2,0x608,0x466,0x60e,0x3d1e, -0x610,0x1fe,0x612,0x3dee,0x614,0x3d32,0x8646,0x3dea,0x602,0x2f4,0x604,0x3d22,0x60e,0x2f8,0x618,0x2fc, -0x646,0x3d26,0x8662,0x3d2a,0x600,0x3fda,0x602,0x70a,0x8684,0x3f82,0x602,0x3f8,0x8608,0x3c4,0x8602,0x3fc, -0x602,0x3fa,0x8608,0x3c6,0x8602,0x3fe,0x860e,0x3d36,0x8618,0x3dc,0x8618,0x3de,0x600,0x3f74,0x602,0x70c, -0x608,0x3f72,0x60c,0x3f70,0x626,0x3e11,0x628,0x3e13,0x868a,0x3f78,0x600,0x3f90,0x602,0x710,0x626,0x3e31, -0x8628,0x3e33,0x600,0x3f94,0x602,0x712,0x626,0x3e51,0x628,0x3e53,0x868a,0x3f98,0x600,0x3fb4,0x602,0x714, -0x608,0x3fb2,0x60c,0x3fb0,0x610,0x754,0x626,0x3e71,0x8628,0x3e73,0x600,0x3ff0,0x602,0x718,0x626,0x3e91, -0x8628,0x3e93,0x8628,0x3fd8,0x600,0x3fd4,0x602,0x71c,0x608,0x3fd2,0x60c,0x3fd0,0x610,0x756,0x8628,0x3eb3, -0x600,0x3ff4,0x602,0x71e,0x626,0x3ed1,0x628,0x3ed3,0x868a,0x3ff8,0x600,0x3ee1,0x602,0x759,0x608,0x3f62, -0x60c,0x3f60,0x626,0x3e01,0x628,0x3e03,0x684,0x3f6d,0x868a,0x3f66,0x600,0x3ee4,0x602,0x75a,0x626,0x3e21, -0x8628,0x3e23,0x600,0x3ee9,0x602,0x75d,0x626,0x3e41,0x628,0x3e43,0x684,0x3f8d,0x868a,0x3f86,0x600,0x3eec, -0x602,0x75e,0x608,0x3fa2,0x60c,0x3fa0,0x610,0x795,0x626,0x3e61,0x628,0x3e63,0x8684,0x3fac,0x600,0x3ef0, -0x602,0x798,0x626,0x3e81,0x8628,0x3e83,0x626,0x3fc8,0x8628,0x3fca,0x600,0x3ef4,0x602,0x79a,0x608,0x3fc2, -0x60c,0x3fc0,0x610,0x797,0x626,0x3ea1,0x628,0x3ea3,0x8684,0x3fcc,0x600,0x3ef9,0x602,0x79d,0x626,0x3ec1, -0x628,0x3ec3,0x684,0x3fed,0x868a,0x3fe6,0x602,0x7a6,0x8610,0x7a8,0x8610,0x80e,0x60c,0x9a0,0x8610,0x9a4, -0x8602,0x806,0x600,0x800,0x60c,0x9ac,0x8610,0x802,0x60c,0x982,0x8610,0x9b8,0x8610,0x9bc,0x600,0x81a, -0x608,0x9c4,0x60c,0x832,0x8610,0x9c8,0x8602,0x818,0x8610,0x9cc,0x608,0x9dc,0x60c,0x81c,0x610,0x9e0, -0x8616,0x9e4,0x8610,0x9e8,0x8610,0x9f0,0x8610,0x9d8,0x60c,0x9a2,0x8610,0x9a6,0x8602,0x8a6,0x600,0x8a0, -0x60c,0x9ae,0x8610,0x8a2,0x60c,0x984,0x8610,0x9ba,0x8610,0x9be,0x600,0x8ba,0x608,0x9c6,0x60c,0x872, -0x8610,0x9ca,0x8602,0x8b8,0x8610,0x9ce,0x608,0x9de,0x60c,0x8bc,0x610,0x9e2,0x8616,0x9e6,0x8610,0x9ea, -0x8610,0x9f2,0x8610,0x9da,0x8610,0x8ae,0x861e,0x8ec,0x861e,0x8ee,0x8610,0x9b4,0x8610,0x9b6,0x8610,0x9d4, -0x8610,0x9d6,0xca6,0xc44,0xca8,0xc46,0x8caa,0xc4a,0x8ca8,0xc48,0x8ca8,0xc4c,0x8ca8,0xd84,0x8ca8,0xda6, -0x8ca8,0xd80,0x9278,0x1252,0x9278,0x1262,0x9278,0x1268,0x137c,0x1396,0x93ae,0x1398,0x167c,0x1696,0x16ac,0x1690, -0x96ae,0x1698,0x97ae,0x1728,0x177c,0x1794,0x97ae,0x1798,0x977c,0x1796,0x98ac,0x1890,0x99aa,0x1980,0x1984,0x1995, -0x19aa,0x198e,0x99ac,0x1990,0x1a7c,0x1a94,0x9aae,0x1a98,0x9a7c,0x1a96,0x1b94,0x1bb4,0x1b9e,0x1bb9,0x9bbe,0x1bbc, -0xa05c,0x204c,0xb66a,0x360c,0xb66a,0x3610,0xb66a,0x3614,0xb66a,0x3618,0xb66a,0x361c,0xb66a,0x3624,0xb66a,0x3676, -0xb66a,0x367a,0xb66a,0x3680,0xb66a,0x3682,0xb66a,0x3686,0x600,0x3f9a,0x602,0x3f9c,0x8684,0x3f9e,0x600,0x3fba, -0x602,0x3fbc,0x8684,0x3fbe,0x8670,0x4334,0x8670,0x4336,0x8670,0x435c,0x8670,0x439a,0x8670,0x439e,0x8670,0x439c, -0x8670,0x4408,0x8670,0x4412,0x8670,0x4418,0x8670,0x4448,0x8670,0x444c,0x8670,0x4482,0x8670,0x4488,0x8670,0x448e, -0x8670,0x4492,0x8670,0x44da,0x8670,0x44c4,0x8670,0x44e0,0x8670,0x44e2,0x8670,0x44e8,0x8670,0x44ea,0x8670,0x44f0, -0x8670,0x44f2,0x8670,0x4500,0x8670,0x4502,0x8670,0x45c0,0x8670,0x45c2,0x8670,0x4508,0x8670,0x450a,0x8670,0x4510, -0x8670,0x4512,0x8670,0x45c4,0x8670,0x45c6,0x8670,0x4558,0x8670,0x455a,0x8670,0x455c,0x8670,0x455e,0x8670,0x45d4, -0x8670,0x45d6,0x8670,0x45d8,0x8670,0x45da,0xe132,0x6128,0xe132,0x6098,0xe132,0x609c,0xe132,0x60a0,0xe132,0x60a4, -0xe132,0x60a8,0xe132,0x60ac,0xe132,0x60b0,0xe132,0x60b4,0xe132,0x60b8,0xe132,0x60bc,0xe132,0x60c0,0xe132,0x60c4, -0xe132,0x60ca,0xe132,0x60ce,0xe132,0x60d2,0x6132,0x60e0,0xe134,0x60e2,0x6132,0x60e6,0xe134,0x60e8,0x6132,0x60ec, -0xe134,0x60ee,0x6132,0x60f2,0xe134,0x60f4,0x6132,0x60f8,0xe134,0x60fa,0xe132,0x613c,0xe132,0x61e8,0xe132,0x6158, -0xe132,0x615c,0xe132,0x6160,0xe132,0x6164,0xe132,0x6168,0xe132,0x616c,0xe132,0x6170,0xe132,0x6174,0xe132,0x6178, -0xe132,0x617c,0xe132,0x6180,0xe132,0x6184,0xe132,0x618a,0xe132,0x618e,0xe132,0x6192,0x6132,0x61a0,0xe134,0x61a2, -0x6132,0x61a6,0xe134,0x61a8,0x6132,0x61ac,0xe134,0x61ae,0x6132,0x61b2,0xe134,0x61b4,0x6132,0x61b8,0xe134,0x61ba, -0xe132,0x61ee,0xe132,0x61f0,0xe132,0x61f2,0xe132,0x61f4,0xe132,0x61fc,0xb489,0x2e82,0x2134,0xb489,0x2e82,0x2138, -0xb489,0x2e82,0x2156,0xb489,0x49c2,0x225c,0xb489,0x49c2,0x225e,0x3489,0xcf82,0x2696,0xb489,0xd5c2,0x2698,0x348b, -0x2c02,0x2978,0x348b,0x2e82,0x2976,0xb48b,0x2f42,0x297c,0xb48b,0x6bc2,0x2b74,0xb48b,0x6bc2,0x2b76,2,0xe602, -0x41,0x302,0x600,0x3d4c,0x602,0x3d48,0x606,0x3d54,0x8612,0x3d50,0xe602,0x41,0x308,0x8608,0x3bc,0xe602, -0x41,0x30a,0x8602,0x3f4,0xca02,0x43,0x327,0x8602,0x3c10,0xe602,0x45,0x302,0x600,0x3d80,0x602,0x3d7c, -0x606,0x3d88,0x8612,0x3d84,0xe602,0x49,0x308,0x8602,0x3c5c,0xe602,0x4f,0x302,0x600,0x3da4,0x602,0x3da0, -0x606,0x3dac,0x8612,0x3da8,0xe602,0x4f,0x303,0x602,0x3c98,0x608,0x458,0x8610,0x3c9c,0xe602,0x4f,0x308, -0x8608,0x454,0xe602,0x55,0x308,0x600,0x3b6,0x602,0x3ae,0x608,0x3aa,0x8618,0x3b2,0xe602,0x61,0x302, -0x600,0x3d4e,0x602,0x3d4a,0x606,0x3d56,0x8612,0x3d52,0xe602,0x61,0x308,0x8608,0x3be,0xe602,0x61,0x30a, -0x8602,0x3f6,0xca02,0x63,0x327,0x8602,0x3c12,0xe602,0x65,0x302,0x600,0x3d82,0x602,0x3d7e,0x606,0x3d8a, -0x8612,0x3d86,0xe602,0x69,0x308,0x8602,0x3c5e,0xe602,0x6f,0x302,0x600,0x3da6,0x602,0x3da2,0x606,0x3dae, -0x8612,0x3daa,0xe602,0x6f,0x303,0x602,0x3c9a,0x608,0x45a,0x8610,0x3c9e,0xe602,0x6f,0x308,0x8608,0x456, -0xe602,0x75,0x308,0x600,0x3b8,0x602,0x3b0,0x608,0x3ac,0x8618,0x3b4,0xe602,0x41,0x306,0x600,0x3d60, -0x602,0x3d5c,0x606,0x3d68,0x8612,0x3d64,0xe602,0x61,0x306,0x600,0x3d62,0x602,0x3d5e,0x606,0x3d6a,0x8612, -0x3d66,0xe602,0x45,0x304,0x600,0x3c28,0x8602,0x3c2c,0xe602,0x65,0x304,0x600,0x3c2a,0x8602,0x3c2e,0xe602, -0x4f,0x304,0x600,0x3ca0,0x8602,0x3ca4,0xe602,0x6f,0x304,0x600,0x3ca2,0x8602,0x3ca6,0xe602,0x53,0x301, -0x860e,0x3cc8,0xe602,0x73,0x301,0x860e,0x3cca,0xe602,0x53,0x30c,0x860e,0x3ccc,0xe602,0x73,0x30c,0x860e, -0x3cce,0xe602,0x55,0x303,0x8602,0x3cf0,0xe602,0x75,0x303,0x8602,0x3cf2,0xe602,0x55,0x304,0x8610,0x3cf4, -0xe602,0x75,0x304,0x8610,0x3cf6,0xd802,0x4f,0x31b,0x600,0x3db8,0x602,0x3db4,0x606,0x3dc0,0x612,0x3dbc, -0x8646,0x3dc4,0xd802,0x6f,0x31b,0x600,0x3dba,0x602,0x3db6,0x606,0x3dc2,0x612,0x3dbe,0x8646,0x3dc6,0xd802, -0x55,0x31b,0x600,0x3dd4,0x602,0x3dd0,0x606,0x3ddc,0x612,0x3dd8,0x8646,0x3de0,0xd802,0x75,0x31b,0x600, -0x3dd6,0x602,0x3dd2,0x606,0x3dde,0x612,0x3dda,0x8646,0x3de2,0xca02,0x4f,0x328,0x8608,0x3d8,0xca02,0x6f, -0x328,0x8608,0x3da,0xe602,0x41,0x307,0x8608,0x3c0,0xe602,0x61,0x307,0x8608,0x3c2,0xca02,0x45,0x327, -0x860c,0x3c38,0xca02,0x65,0x327,0x860c,0x3c3a,0xe602,0x4f,0x307,0x8608,0x460,0xe602,0x6f,0x307,0x8608, -0x462,0xe602,0x3b1,0x301,0x868a,0x3f68,0xe602,0x3b7,0x301,0x868a,0x3f88,0xe602,0x3b9,0x308,0x600,0x3fa4, -0x602,0x720,0x8684,0x3fae,0xe602,0x3c5,0x308,0x600,0x3fc4,0x602,0x760,0x8684,0x3fce,0xe602,0x3c9,0x301, -0x868a,0x3fe8,2,0xcc6,0xcc2,0x99aa,0x1996,2,0xdd9,0xdcf,0x9b94,0x1bba,0xdc02,0x4c,0x323,0x8608, -0x3c70,0xdc02,0x6c,0x323,0x8608,0x3c72,0xdc02,0x52,0x323,0x8608,0x3cb8,0xdc02,0x72,0x323,0x8608,0x3cba, -0xdc02,0x53,0x323,0x860e,0x3cd0,0xdc02,0x73,0x323,0x860e,0x3cd2,0xdc02,0x41,0x323,0x604,0x3d58,0x860c, -0x3d6c,0xdc02,0x61,0x323,0x604,0x3d5a,0x860c,0x3d6e,0xdc02,0x45,0x323,0x8604,0x3d8c,0xdc02,0x65,0x323, -0x8604,0x3d8e,0xdc02,0x4f,0x323,0x8604,0x3db0,0xdc02,0x6f,0x323,0x8604,0x3db2,0xe602,0x3b1,0x313,0x600, -0x3e05,0x602,0x3e09,0x684,0x3e0d,0x868a,0x3f00,0xe602,0x3b1,0x314,0x600,0x3e07,0x602,0x3e0b,0x684,0x3e0f, -0x868a,0x3f02,0x1f00,0xe643,0x3b1,0x313,0x300,0x868a,0x3f04,0x1f01,0xe643,0x3b1,0x314,0x300,0x868a,0x3f06, -0x1f00,0xe643,0x3b1,0x313,0x301,0x868a,0x3f08,0x1f01,0xe643,0x3b1,0x314,0x301,0x868a,0x3f0a,0x1f00,0xe643, -0x3b1,0x313,0x342,0x868a,0x3f0c,0x1f01,0xe643,0x3b1,0x314,0x342,0x868a,0x3f0e,0xe602,0x391,0x313,0x600, -0x3e15,0x602,0x3e19,0x684,0x3e1d,0x868a,0x3f10,0xe602,0x391,0x314,0x600,0x3e17,0x602,0x3e1b,0x684,0x3e1f, -0x868a,0x3f12,0x1f08,0xe643,0x391,0x313,0x300,0x868a,0x3f14,0x1f09,0xe643,0x391,0x314,0x300,0x868a,0x3f16, -0x1f08,0xe643,0x391,0x313,0x301,0x868a,0x3f18,0x1f09,0xe643,0x391,0x314,0x301,0x868a,0x3f1a,0x1f08,0xe643, -0x391,0x313,0x342,0x868a,0x3f1c,0x1f09,0xe643,0x391,0x314,0x342,0x868a,0x3f1e,0xe602,0x3b5,0x313,0x600, -0x3e24,0x8602,0x3e28,0xe602,0x3b5,0x314,0x600,0x3e26,0x8602,0x3e2a,0xe602,0x395,0x313,0x600,0x3e34,0x8602, -0x3e38,0xe602,0x395,0x314,0x600,0x3e36,0x8602,0x3e3a,0xe602,0x3b7,0x313,0x600,0x3e45,0x602,0x3e49,0x684, -0x3e4d,0x868a,0x3f20,0xe602,0x3b7,0x314,0x600,0x3e47,0x602,0x3e4b,0x684,0x3e4f,0x868a,0x3f22,0x1f20,0xe643, -0x3b7,0x313,0x300,0x868a,0x3f24,0x1f21,0xe643,0x3b7,0x314,0x300,0x868a,0x3f26,0x1f20,0xe643,0x3b7,0x313, -0x301,0x868a,0x3f28,0x1f21,0xe643,0x3b7,0x314,0x301,0x868a,0x3f2a,0x1f20,0xe643,0x3b7,0x313,0x342,0x868a, -0x3f2c,0x1f21,0xe643,0x3b7,0x314,0x342,0x868a,0x3f2e,0xe602,0x397,0x313,0x600,0x3e55,0x602,0x3e59,0x684, -0x3e5d,0x868a,0x3f30,0xe602,0x397,0x314,0x600,0x3e57,0x602,0x3e5b,0x684,0x3e5f,0x868a,0x3f32,0x1f28,0xe643, -0x397,0x313,0x300,0x868a,0x3f34,0x1f29,0xe643,0x397,0x314,0x300,0x868a,0x3f36,0x1f28,0xe643,0x397,0x313, -0x301,0x868a,0x3f38,0x1f29,0xe643,0x397,0x314,0x301,0x868a,0x3f3a,0x1f28,0xe643,0x397,0x313,0x342,0x868a, -0x3f3c,0x1f29,0xe643,0x397,0x314,0x342,0x868a,0x3f3e,0xe602,0x3b9,0x313,0x600,0x3e64,0x602,0x3e68,0x8684, -0x3e6c,0xe602,0x3b9,0x314,0x600,0x3e66,0x602,0x3e6a,0x8684,0x3e6e,0xe602,0x399,0x313,0x600,0x3e74,0x602, -0x3e78,0x8684,0x3e7c,0xe602,0x399,0x314,0x600,0x3e76,0x602,0x3e7a,0x8684,0x3e7e,0xe602,0x3bf,0x313,0x600, -0x3e84,0x8602,0x3e88,0xe602,0x3bf,0x314,0x600,0x3e86,0x8602,0x3e8a,0xe602,0x39f,0x313,0x600,0x3e94,0x8602, -0x3e98,0xe602,0x39f,0x314,0x600,0x3e96,0x8602,0x3e9a,0xe602,0x3c5,0x313,0x600,0x3ea4,0x602,0x3ea8,0x8684, -0x3eac,0xe602,0x3c5,0x314,0x600,0x3ea6,0x602,0x3eaa,0x8684,0x3eae,0xe602,0x3a5,0x314,0x600,0x3eb6,0x602, -0x3eba,0x8684,0x3ebe,0xe602,0x3c9,0x313,0x600,0x3ec5,0x602,0x3ec9,0x684,0x3ecd,0x868a,0x3f40,0xe602,0x3c9, -0x314,0x600,0x3ec7,0x602,0x3ecb,0x684,0x3ecf,0x868a,0x3f42,0x1f60,0xe643,0x3c9,0x313,0x300,0x868a,0x3f44, -0x1f61,0xe643,0x3c9,0x314,0x300,0x868a,0x3f46,0x1f60,0xe643,0x3c9,0x313,0x301,0x868a,0x3f48,0x1f61,0xe643, -0x3c9,0x314,0x301,0x868a,0x3f4a,0x1f60,0xe643,0x3c9,0x313,0x342,0x868a,0x3f4c,0x1f61,0xe643,0x3c9,0x314, -0x342,0x868a,0x3f4e,0xe602,0x3a9,0x313,0x600,0x3ed5,0x602,0x3ed9,0x684,0x3edd,0x868a,0x3f50,0xe602,0x3a9, -0x314,0x600,0x3ed7,0x602,0x3edb,0x684,0x3edf,0x868a,0x3f52,0x1f68,0xe643,0x3a9,0x313,0x300,0x868a,0x3f54, -0x1f69,0xe643,0x3a9,0x314,0x300,0x868a,0x3f56,0x1f68,0xe643,0x3a9,0x313,0x301,0x868a,0x3f58,0x1f69,0xe643, -0x3a9,0x314,0x301,0x868a,0x3f5a,0x1f68,0xe643,0x3a9,0x313,0x342,0x868a,0x3f5c,0x1f69,0xe643,0x3a9,0x314, -0x342,0x868a,0x3f5e,0xe602,0x3b1,0x300,0x868a,0x3f64,0xe602,0x3b7,0x300,0x868a,0x3f84,0xe602,0x3c9,0x300, -0x868a,0x3fe4,0xe602,0x3b1,0x342,0x868a,0x3f6e,0xe602,0x3b7,0x342,0x868a,0x3f8e,0xe602,0x3c9,0x342,0x868a, -0x3fee,3,0xe602,0x41,0x300,0xe602,0x41,0x301,0xe602,0x41,0x303,0xe602,0x45,0x300,0xe602,0x45, -0x301,0xe602,0x45,0x308,0xe602,0x49,0x300,0xe602,0x49,0x301,0xe602,0x49,0x302,0xe602,0x4e,0x303, -0xe602,0x4f,0x300,0xe602,0x4f,0x301,0xe602,0x55,0x300,0xe602,0x55,0x301,0xe602,0x55,0x302,0xe602, -0x59,0x301,0xe602,0x61,0x300,0xe602,0x61,0x301,0xe602,0x61,0x303,0xe602,0x65,0x300,0xe602,0x65, -0x301,0xe602,0x65,0x308,0xe602,0x69,0x300,0xe602,0x69,0x301,0xe602,0x69,0x302,0xe602,0x6e,0x303, -0xe602,0x6f,0x300,0xe602,0x6f,0x301,0xe602,0x75,0x300,0xe602,0x75,0x301,0xe602,0x75,0x302,0xe602, -0x79,0x301,0xe602,0x79,0x308,0xe602,0x41,0x304,0xe602,0x61,0x304,0xca02,0x41,0x328,0xca02,0x61, -0x328,0xe602,0x43,0x301,0xe602,0x63,0x301,0xe602,0x43,0x302,0xe602,0x63,0x302,0xe602,0x43,0x307, -0xe602,0x63,0x307,0xe602,0x43,0x30c,0xe602,0x63,0x30c,0xe602,0x44,0x30c,0xe602,0x64,0x30c,0xe602, -0x45,0x306,0xe602,0x65,0x306,0xe602,0x45,0x307,0xe602,0x65,0x307,0xca02,0x45,0x328,0xca02,0x65, -0x328,0xe602,0x45,0x30c,0xe602,0x65,0x30c,0xe602,0x47,0x302,0xe602,0x67,0x302,0xe602,0x47,0x306, -0xe602,0x67,0x306,0xe602,0x47,0x307,0xe602,0x67,0x307,0xca02,0x47,0x327,0xca02,0x67,0x327,0xe602, -0x48,0x302,0xe602,0x68,0x302,0xe602,0x49,0x303,0xe602,0x69,0x303,0xe602,0x49,0x304,0xe602,0x69, -0x304,0xe602,0x49,0x306,0xe602,0x69,0x306,0xca02,0x49,0x328,0xca02,0x69,0x328,0xe602,0x49,0x307, -0xe602,0x4a,0x302,0xe602,0x6a,0x302,0xca02,0x4b,0x327,0xca02,0x6b,0x327,0xe602,0x4c,0x301,0xe602, -0x6c,0x301,0xca02,0x4c,0x327,0xca02,0x6c,0x327,0xe602,0x4c,0x30c,0xe602,0x6c,0x30c,0xe602,0x4e, -0x301,0xe602,0x6e,0x301,0xca02,0x4e,0x327,0xca02,0x6e,0x327,0xe602,0x4e,0x30c,0xe602,0x6e,0x30c, -0xe602,0x4f,0x306,0xe602,0x6f,0x306,0xe602,0x4f,0x30b,0xe602,0x6f,0x30b,0xe602,0x52,0x301,0xe602, -0x72,0x301,0xca02,0x52,0x327,0xca02,0x72,0x327,0xe602,0x52,0x30c,0xe602,0x72,0x30c,0xe602,0x53, -0x302,0xe602,0x73,0x302,0xca02,0x53,0x327,0xca02,0x73,0x327,0xca02,0x54,0x327,0xca02,0x74,0x327, -0xe602,0x54,0x30c,0xe602,0x74,0x30c,0xe602,0x55,0x306,0xe602,0x75,0x306,0xe602,0x55,0x30a,0xe602, -0x75,0x30a,0xe602,0x55,0x30b,0xe602,0x75,0x30b,0xca02,0x55,0x328,0xca02,0x75,0x328,0xe602,0x57, -0x302,0xe602,0x77,0x302,0xe602,0x59,0x302,0xe602,0x79,0x302,0xe602,0x59,0x308,0xe602,0x5a,0x301, -0xe602,0x7a,0x301,0xe602,0x5a,0x307,0xe602,0x7a,0x307,0xe602,0x5a,0x30c,0xe602,0x7a,0x30c,0xe602, -0x41,0x30c,0xe602,0x61,0x30c,0xe602,0x49,0x30c,0xe602,0x69,0x30c,0xe602,0x4f,0x30c,0xe602,0x6f, -0x30c,0xe602,0x55,0x30c,0xe602,0x75,0x30c,0xdc,0xe643,0x55,0x308,0x304,0xfc,0xe643,0x75,0x308, -0x304,0xdc,0xe643,0x55,0x308,0x301,0xfc,0xe643,0x75,0x308,0x301,0xdc,0xe643,0x55,0x308,0x30c, -0xfc,0xe643,0x75,0x308,0x30c,0xdc,0xe643,0x55,0x308,0x300,0xfc,0xe643,0x75,0x308,0x300,0xc4, -0xe643,0x41,0x308,0x304,0xe4,0xe643,0x61,0x308,0x304,0x226,0xe643,0x41,0x307,0x304,0x227,0xe643, -0x61,0x307,0x304,0xe602,0xc6,0x304,0xe602,0xe6,0x304,0xe602,0x47,0x30c,0xe602,0x67,0x30c,0xe602, -0x4b,0x30c,0xe602,0x6b,0x30c,0x1ea,0xe643,0x4f,0x328,0x304,0x1eb,0xe643,0x6f,0x328,0x304,0xe602, -0x1b7,0x30c,0xe602,0x292,0x30c,0xe602,0x6a,0x30c,0xe602,0x47,0x301,0xe602,0x67,0x301,0xe602,0x4e, -0x300,0xe602,0x6e,0x300,0xc5,0xe643,0x41,0x30a,0x301,0xe5,0xe643,0x61,0x30a,0x301,0xe602,0xc6, -0x301,0xe602,0xe6,0x301,0xe602,0xd8,0x301,0xe602,0xf8,0x301,0xe602,0x41,0x30f,0xe602,0x61,0x30f, -0xe602,0x41,0x311,0xe602,0x61,0x311,0xe602,0x45,0x30f,0xe602,0x65,0x30f,0xe602,0x45,0x311,0xe602, -0x65,0x311,0xe602,0x49,0x30f,0xe602,0x69,0x30f,0xe602,0x49,0x311,0xe602,0x69,0x311,0xe602,0x4f, -0x30f,0xe602,0x6f,0x30f,0xe602,0x4f,0x311,0xe602,0x6f,0x311,0xe602,0x52,0x30f,0xe602,0x72,0x30f, -0xe602,0x52,0x311,0xe602,0x72,0x311,0xe602,0x55,0x30f,0xe602,0x75,0x30f,0xe602,0x55,0x311,0xe602, -0x75,0x311,0xdc02,0x53,0x326,0xdc02,0x73,0x326,0xdc02,0x54,0x326,0xdc02,0x74,0x326,0xe602,0x48, -0x30c,0xe602,0x68,0x30c,0xd6,0xe643,0x4f,0x308,0x304,0xf6,0xe643,0x6f,0x308,0x304,0xd5,0xe643, -0x4f,0x303,0x304,0xf5,0xe643,0x6f,0x303,0x304,0x22e,0xe643,0x4f,0x307,0x304,0x22f,0xe643,0x6f, -0x307,0x304,0xe602,0x59,0x304,0xe602,0x79,0x304,0xe602,0xa8,0x301,0xe602,0x391,0x301,0xe602,0x395, -0x301,0xe602,0x397,0x301,0xe602,0x399,0x301,0xe602,0x39f,0x301,0xe602,0x3a5,0x301,0xe602,0x3a9,0x301, -0x3ca,0xe643,0x3b9,0x308,0x301,0xe602,0x399,0x308,0xe602,0x3a5,0x308,0xe602,0x3b5,0x301,0xe602,0x3b9, -0x301,0x3cb,0xe643,0x3c5,0x308,0x301,0xe602,0x3bf,0x301,0xe602,0x3c5,0x301,0xe602,0x3d2,0x301,0xe602, -0x3d2,0x308,0xe602,0x415,0x300,0xe602,0x415,0x308,0xe602,0x413,0x301,0xe602,0x406,0x308,0xe602,0x41a, -0x301,0xe602,0x418,0x300,0xe602,0x423,0x306,0xe602,0x418,0x306,0xe602,0x438,0x306,0xe602,0x435,0x300, -0xe602,0x435,0x308,0xe602,0x433,0x301,0xe602,0x456,0x308,0xe602,0x43a,0x301,0xe602,0x438,0x300,0xe602, -0x443,0x306,0xe602,0x474,0x30f,0xe602,0x475,0x30f,0xe602,0x416,0x306,0xe602,0x436,0x306,0xe602,0x410, -0x306,0xe602,0x430,0x306,0xe602,0x410,0x308,0xe602,0x430,0x308,0xe602,0x415,0x306,0xe602,0x435,0x306, -0xe602,0x4d8,0x308,0xe602,0x4d9,0x308,0xe602,0x416,0x308,0xe602,0x436,0x308,0xe602,0x417,0x308,0xe602, -0x437,0x308,0xe602,0x418,0x304,0xe602,0x438,0x304,0xe602,0x418,0x308,0xe602,0x438,0x308,0xe602,0x41e, -0x308,0xe602,0x43e,0x308,0xe602,0x4e8,0x308,0xe602,0x4e9,0x308,0xe602,0x42d,0x308,0xe602,0x44d,0x308, -0xe602,0x423,0x304,0xe602,0x443,0x304,0xe602,0x423,0x308,0xe602,0x443,0x308,0xe602,0x423,0x30b,0xe602, -0x443,0x30b,0xe602,0x427,0x308,0xe602,0x447,0x308,0xe602,0x42b,0x308,0xe602,0x44b,0x308,0xe602,0x627, -0x653,0xe602,0x627,0x654,0xe602,0x648,0x654,0xdc02,0x627,0x655,0xe602,0x64a,0x654,0xe602,0x6d5,0x654, -0xe602,0x6c1,0x654,0xe602,0x6d2,0x654,0x702,0x928,0x93c,0x702,0x930,0x93c,0x702,0x933,0x93c,2, -0x9c7,0x9be,2,0x9c7,0x9d7,2,0xb47,0xb56,2,0xb47,0xb3e,2,0xb47,0xb57,2,0xb92, -0xbd7,2,0xbc6,0xbbe,2,0xbc7,0xbbe,2,0xbc6,0xbd7,0x5b02,0xc46,0xc56,2,0xcbf,0xcd5, -2,0xcc6,0xcd5,2,0xcc6,0xcd6,0xcca,0x43,0xcc6,0xcc2,0xcd5,2,0xd46,0xd3e,2,0xd47, -0xd3e,2,0xd46,0xd57,0x902,0xdd9,0xdca,0xddc,0x943,0xdd9,0xdcf,0xdca,2,0xdd9,0xddf,2, -0x1025,0x102e,2,0x1b05,0x1b35,2,0x1b07,0x1b35,2,0x1b09,0x1b35,2,0x1b0b,0x1b35,2,0x1b0d, -0x1b35,2,0x1b11,0x1b35,2,0x1b3a,0x1b35,2,0x1b3c,0x1b35,2,0x1b3e,0x1b35,2,0x1b3f,0x1b35, -2,0x1b42,0x1b35,0xdc02,0x41,0x325,0xdc02,0x61,0x325,0xe602,0x42,0x307,0xe602,0x62,0x307,0xdc02, -0x42,0x323,0xdc02,0x62,0x323,0xdc02,0x42,0x331,0xdc02,0x62,0x331,0xc7,0xe643,0x43,0x327,0x301, -0xe7,0xe643,0x63,0x327,0x301,0xe602,0x44,0x307,0xe602,0x64,0x307,0xdc02,0x44,0x323,0xdc02,0x64, -0x323,0xdc02,0x44,0x331,0xdc02,0x64,0x331,0xca02,0x44,0x327,0xca02,0x64,0x327,0xdc02,0x44,0x32d, -0xdc02,0x64,0x32d,0x112,0xe643,0x45,0x304,0x300,0x113,0xe643,0x65,0x304,0x300,0x112,0xe643,0x45, -0x304,0x301,0x113,0xe643,0x65,0x304,0x301,0xdc02,0x45,0x32d,0xdc02,0x65,0x32d,0xdc02,0x45,0x330, -0xdc02,0x65,0x330,0x228,0xe643,0x45,0x327,0x306,0x229,0xe643,0x65,0x327,0x306,0xe602,0x46,0x307, -0xe602,0x66,0x307,0xe602,0x47,0x304,0xe602,0x67,0x304,0xe602,0x48,0x307,0xe602,0x68,0x307,0xdc02, -0x48,0x323,0xdc02,0x68,0x323,0xe602,0x48,0x308,0xe602,0x68,0x308,0xca02,0x48,0x327,0xca02,0x68, -0x327,0xdc02,0x48,0x32e,0xdc02,0x68,0x32e,0xdc02,0x49,0x330,0xdc02,0x69,0x330,0xcf,0xe643,0x49, -0x308,0x301,0xef,0xe643,0x69,0x308,0x301,0xe602,0x4b,0x301,0xe602,0x6b,0x301,0xdc02,0x4b,0x323, -0xdc02,0x6b,0x323,0xdc02,0x4b,0x331,0xdc02,0x6b,0x331,0x1e36,0xe643,0x4c,0x323,0x304,0x1e37,0xe643, -0x6c,0x323,0x304,0xdc02,0x4c,0x331,0xdc02,0x6c,0x331,0xdc02,0x4c,0x32d,0xdc02,0x6c,0x32d,0xe602, -0x4d,0x301,0xe602,0x6d,0x301,0xe602,0x4d,0x307,0xe602,0x6d,0x307,0xdc02,0x4d,0x323,0xdc02,0x6d, -0x323,0xe602,0x4e,0x307,0xe602,0x6e,0x307,0xdc02,0x4e,0x323,0xdc02,0x6e,0x323,0xdc02,0x4e,0x331, -0xdc02,0x6e,0x331,0xdc02,0x4e,0x32d,0xdc02,0x6e,0x32d,0xd5,0xe643,0x4f,0x303,0x301,0xf5,0xe643, -0x6f,0x303,0x301,0xd5,0xe643,0x4f,0x303,0x308,0xf5,0xe643,0x6f,0x303,0x308,0x14c,0xe643,0x4f, -0x304,0x300,0x14d,0xe643,0x6f,0x304,0x300,0x14c,0xe643,0x4f,0x304,0x301,0x14d,0xe643,0x6f,0x304, -0x301,0xe602,0x50,0x301,0xe602,0x70,0x301,0xe602,0x50,0x307,0xe602,0x70,0x307,0xe602,0x52,0x307, -0xe602,0x72,0x307,0x1e5a,0xe643,0x52,0x323,0x304,0x1e5b,0xe643,0x72,0x323,0x304,0xdc02,0x52,0x331, -0xdc02,0x72,0x331,0xe602,0x53,0x307,0xe602,0x73,0x307,0x15a,0xe643,0x53,0x301,0x307,0x15b,0xe643, -0x73,0x301,0x307,0x160,0xe643,0x53,0x30c,0x307,0x161,0xe643,0x73,0x30c,0x307,0x1e62,0xe643,0x53, -0x323,0x307,0x1e63,0xe643,0x73,0x323,0x307,0xe602,0x54,0x307,0xe602,0x74,0x307,0xdc02,0x54,0x323, -0xdc02,0x74,0x323,0xdc02,0x54,0x331,0xdc02,0x74,0x331,0xdc02,0x54,0x32d,0xdc02,0x74,0x32d,0xdc02, -0x55,0x324,0xdc02,0x75,0x324,0xdc02,0x55,0x330,0xdc02,0x75,0x330,0xdc02,0x55,0x32d,0xdc02,0x75, -0x32d,0x168,0xe643,0x55,0x303,0x301,0x169,0xe643,0x75,0x303,0x301,0x16a,0xe643,0x55,0x304,0x308, -0x16b,0xe643,0x75,0x304,0x308,0xe602,0x56,0x303,0xe602,0x76,0x303,0xdc02,0x56,0x323,0xdc02,0x76, -0x323,0xe602,0x57,0x300,0xe602,0x77,0x300,0xe602,0x57,0x301,0xe602,0x77,0x301,0xe602,0x57,0x308, -0xe602,0x77,0x308,0xe602,0x57,0x307,0xe602,0x77,0x307,0xdc02,0x57,0x323,0xdc02,0x77,0x323,0xe602, -0x58,0x307,0xe602,0x78,0x307,0xe602,0x58,0x308,0xe602,0x78,0x308,0xe602,0x59,0x307,0xe602,0x79, -0x307,0xe602,0x5a,0x302,0xe602,0x7a,0x302,0xdc02,0x5a,0x323,0xdc02,0x7a,0x323,0xdc02,0x5a,0x331, -0xdc02,0x7a,0x331,0xdc02,0x68,0x331,0xe602,0x74,0x308,0xe602,0x77,0x30a,0xe602,0x79,0x30a,0xe602, -0x17f,0x307,0xe602,0x41,0x309,0xe602,0x61,0x309,0xc2,0xe643,0x41,0x302,0x301,0xe2,0xe643,0x61, -0x302,0x301,0xc2,0xe643,0x41,0x302,0x300,0xe2,0xe643,0x61,0x302,0x300,0xc2,0xe643,0x41,0x302, -0x309,0xe2,0xe643,0x61,0x302,0x309,0xc2,0xe643,0x41,0x302,0x303,0xe2,0xe643,0x61,0x302,0x303, -0x1ea0,0xe643,0x41,0x323,0x302,0x1ea1,0xe643,0x61,0x323,0x302,0x102,0xe643,0x41,0x306,0x301,0x103, -0xe643,0x61,0x306,0x301,0x102,0xe643,0x41,0x306,0x300,0x103,0xe643,0x61,0x306,0x300,0x102,0xe643, -0x41,0x306,0x309,0x103,0xe643,0x61,0x306,0x309,0x102,0xe643,0x41,0x306,0x303,0x103,0xe643,0x61, -0x306,0x303,0x1ea0,0xe643,0x41,0x323,0x306,0x1ea1,0xe643,0x61,0x323,0x306,0xe602,0x45,0x309,0xe602, -0x65,0x309,0xe602,0x45,0x303,0xe602,0x65,0x303,0xca,0xe643,0x45,0x302,0x301,0xea,0xe643,0x65, -0x302,0x301,0xca,0xe643,0x45,0x302,0x300,0xea,0xe643,0x65,0x302,0x300,0xca,0xe643,0x45,0x302, -0x309,0xea,0xe643,0x65,0x302,0x309,0xca,0xe643,0x45,0x302,0x303,0xea,0xe643,0x65,0x302,0x303, -0x1eb8,0xe643,0x45,0x323,0x302,0x1eb9,0xe643,0x65,0x323,0x302,0xe602,0x49,0x309,0xe602,0x69,0x309, -0xdc02,0x49,0x323,0xdc02,0x69,0x323,0xe602,0x4f,0x309,0xe602,0x6f,0x309,0xd4,0xe643,0x4f,0x302, -0x301,0xf4,0xe643,0x6f,0x302,0x301,0xd4,0xe643,0x4f,0x302,0x300,0xf4,0xe643,0x6f,0x302,0x300, -0xd4,0xe643,0x4f,0x302,0x309,0xf4,0xe643,0x6f,0x302,0x309,0xd4,0xe643,0x4f,0x302,0x303,0xf4, -0xe643,0x6f,0x302,0x303,0x1ecc,0xe643,0x4f,0x323,0x302,0x1ecd,0xe643,0x6f,0x323,0x302,0x1a0,0xe643, -0x4f,0x31b,0x301,0x1a1,0xe643,0x6f,0x31b,0x301,0x1a0,0xe643,0x4f,0x31b,0x300,0x1a1,0xe643,0x6f, -0x31b,0x300,0x1a0,0xe643,0x4f,0x31b,0x309,0x1a1,0xe643,0x6f,0x31b,0x309,0x1a0,0xe643,0x4f,0x31b, -0x303,0x1a1,0xe643,0x6f,0x31b,0x303,0x1a0,0xdc43,0x4f,0x31b,0x323,0x1a1,0xdc43,0x6f,0x31b,0x323, -0xdc02,0x55,0x323,0xdc02,0x75,0x323,0xe602,0x55,0x309,0xe602,0x75,0x309,0x1af,0xe643,0x55,0x31b, -0x301,0x1b0,0xe643,0x75,0x31b,0x301,0x1af,0xe643,0x55,0x31b,0x300,0x1b0,0xe643,0x75,0x31b,0x300, -0x1af,0xe643,0x55,0x31b,0x309,0x1b0,0xe643,0x75,0x31b,0x309,0x1af,0xe643,0x55,0x31b,0x303,0x1b0, -0xe643,0x75,0x31b,0x303,0x1af,0xdc43,0x55,0x31b,0x323,0x1b0,0xdc43,0x75,0x31b,0x323,0xe602,0x59, -0x300,0xe602,0x79,0x300,0xdc02,0x59,0x323,0xdc02,0x79,0x323,0xe602,0x59,0x309,0xe602,0x79,0x309, -0xe602,0x59,0x303,0xe602,0x79,0x303,0x1f10,0xe643,0x3b5,0x313,0x300,0x1f11,0xe643,0x3b5,0x314,0x300, -0x1f10,0xe643,0x3b5,0x313,0x301,0x1f11,0xe643,0x3b5,0x314,0x301,0x1f18,0xe643,0x395,0x313,0x300,0x1f19, -0xe643,0x395,0x314,0x300,0x1f18,0xe643,0x395,0x313,0x301,0x1f19,0xe643,0x395,0x314,0x301,0x1f30,0xe643, -0x3b9,0x313,0x300,0x1f31,0xe643,0x3b9,0x314,0x300,0x1f30,0xe643,0x3b9,0x313,0x301,0x1f31,0xe643,0x3b9, -0x314,0x301,0x1f30,0xe643,0x3b9,0x313,0x342,0x1f31,0xe643,0x3b9,0x314,0x342,0x1f38,0xe643,0x399,0x313, -0x300,0x1f39,0xe643,0x399,0x314,0x300,0x1f38,0xe643,0x399,0x313,0x301,0x1f39,0xe643,0x399,0x314,0x301, -0x1f38,0xe643,0x399,0x313,0x342,0x1f39,0xe643,0x399,0x314,0x342,0x1f40,0xe643,0x3bf,0x313,0x300,0x1f41, -0xe643,0x3bf,0x314,0x300,0x1f40,0xe643,0x3bf,0x313,0x301,0x1f41,0xe643,0x3bf,0x314,0x301,0x1f48,0xe643, -0x39f,0x313,0x300,0x1f49,0xe643,0x39f,0x314,0x300,0x1f48,0xe643,0x39f,0x313,0x301,0x1f49,0xe643,0x39f, -0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x300,0x1f51,0xe643,0x3c5,0x314,0x300,0x1f50,0xe643,0x3c5,0x313, -0x301,0x1f51,0xe643,0x3c5,0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x342,0x1f51,0xe643,0x3c5,0x314,0x342, -0x1f59,0xe643,0x3a5,0x314,0x300,0x1f59,0xe643,0x3a5,0x314,0x301,0x1f59,0xe643,0x3a5,0x314,0x342,0xe602, -0x3b5,0x300,0xe602,0x3b9,0x300,0xe602,0x3bf,0x300,0xe602,0x3c5,0x300,0x1f00,0xf043,0x3b1,0x313,0x345, -0x1f01,0xf043,0x3b1,0x314,0x345,0x1f02,0x345,2,0xf044,0x3b1,0x313,0x300,0x345,0x1f03,0x345,2, -0xf044,0x3b1,0x314,0x300,0x345,0x1f04,0x345,2,0xf044,0x3b1,0x313,0x301,0x345,0x1f05,0x345,2, -0xf044,0x3b1,0x314,0x301,0x345,0x1f06,0x345,2,0xf044,0x3b1,0x313,0x342,0x345,0x1f07,0x345,2, -0xf044,0x3b1,0x314,0x342,0x345,0x1f08,0xf043,0x391,0x313,0x345,0x1f09,0xf043,0x391,0x314,0x345,0x1f0a, -0x345,2,0xf044,0x391,0x313,0x300,0x345,0x1f0b,0x345,2,0xf044,0x391,0x314,0x300,0x345,0x1f0c, -0x345,2,0xf044,0x391,0x313,0x301,0x345,0x1f0d,0x345,2,0xf044,0x391,0x314,0x301,0x345,0x1f0e, -0x345,2,0xf044,0x391,0x313,0x342,0x345,0x1f0f,0x345,2,0xf044,0x391,0x314,0x342,0x345,0x1f20, -0xf043,0x3b7,0x313,0x345,0x1f21,0xf043,0x3b7,0x314,0x345,0x1f22,0x345,2,0xf044,0x3b7,0x313,0x300, -0x345,0x1f23,0x345,2,0xf044,0x3b7,0x314,0x300,0x345,0x1f24,0x345,2,0xf044,0x3b7,0x313,0x301, -0x345,0x1f25,0x345,2,0xf044,0x3b7,0x314,0x301,0x345,0x1f26,0x345,2,0xf044,0x3b7,0x313,0x342, -0x345,0x1f27,0x345,2,0xf044,0x3b7,0x314,0x342,0x345,0x1f28,0xf043,0x397,0x313,0x345,0x1f29,0xf043, -0x397,0x314,0x345,0x1f2a,0x345,2,0xf044,0x397,0x313,0x300,0x345,0x1f2b,0x345,2,0xf044,0x397, -0x314,0x300,0x345,0x1f2c,0x345,2,0xf044,0x397,0x313,0x301,0x345,0x1f2d,0x345,2,0xf044,0x397, -0x314,0x301,0x345,0x1f2e,0x345,2,0xf044,0x397,0x313,0x342,0x345,0x1f2f,0x345,2,0xf044,0x397, -0x314,0x342,0x345,0x1f60,0xf043,0x3c9,0x313,0x345,0x1f61,0xf043,0x3c9,0x314,0x345,0x1f62,0x345,2, -0xf044,0x3c9,0x313,0x300,0x345,0x1f63,0x345,2,0xf044,0x3c9,0x314,0x300,0x345,0x1f64,0x345,2, -0xf044,0x3c9,0x313,0x301,0x345,0x1f65,0x345,2,0xf044,0x3c9,0x314,0x301,0x345,0x1f66,0x345,2, -0xf044,0x3c9,0x313,0x342,0x345,0x1f67,0x345,2,0xf044,0x3c9,0x314,0x342,0x345,0x1f68,0xf043,0x3a9, -0x313,0x345,0x1f69,0xf043,0x3a9,0x314,0x345,0x1f6a,0x345,2,0xf044,0x3a9,0x313,0x300,0x345,0x1f6b, -0x345,2,0xf044,0x3a9,0x314,0x300,0x345,0x1f6c,0x345,2,0xf044,0x3a9,0x313,0x301,0x345,0x1f6d, -0x345,2,0xf044,0x3a9,0x314,0x301,0x345,0x1f6e,0x345,2,0xf044,0x3a9,0x313,0x342,0x345,0x1f6f, -0x345,2,0xf044,0x3a9,0x314,0x342,0x345,0xe602,0x3b1,0x306,0xe602,0x3b1,0x304,0x1f70,0xf043,0x3b1, -0x300,0x345,0xf002,0x3b1,0x345,0x3ac,0xf043,0x3b1,0x301,0x345,0x1fb6,0xf043,0x3b1,0x342,0x345,0xe602, -0x391,0x306,0xe602,0x391,0x304,0xe602,0x391,0x300,0xf002,0x391,0x345,0xe602,0xa8,0x342,0x1f74,0xf043, -0x3b7,0x300,0x345,0xf002,0x3b7,0x345,0x3ae,0xf043,0x3b7,0x301,0x345,0x1fc6,0xf043,0x3b7,0x342,0x345, -0xe602,0x395,0x300,0xe602,0x397,0x300,0xf002,0x397,0x345,0xe602,0x1fbf,0x300,0xe602,0x1fbf,0x301,0xe602, -0x1fbf,0x342,0xe602,0x3b9,0x306,0xe602,0x3b9,0x304,0x3ca,0xe643,0x3b9,0x308,0x300,0xe602,0x3b9,0x342, -0x3ca,0xe643,0x3b9,0x308,0x342,0xe602,0x399,0x306,0xe602,0x399,0x304,0xe602,0x399,0x300,0xe602,0x1ffe, -0x300,0xe602,0x1ffe,0x301,0xe602,0x1ffe,0x342,0xe602,0x3c5,0x306,0xe602,0x3c5,0x304,0x3cb,0xe643,0x3c5, -0x308,0x300,0xe602,0x3c1,0x313,0xe602,0x3c1,0x314,0xe602,0x3c5,0x342,0x3cb,0xe643,0x3c5,0x308,0x342, -0xe602,0x3a5,0x306,0xe602,0x3a5,0x304,0xe602,0x3a5,0x300,0xe602,0x3a1,0x314,0xe602,0xa8,0x300,0x1f7c, -0xf043,0x3c9,0x300,0x345,0xf002,0x3c9,0x345,0x3ce,0xf043,0x3c9,0x301,0x345,0x1ff6,0xf043,0x3c9,0x342, -0x345,0xe602,0x39f,0x300,0xe602,0x3a9,0x300,0xf002,0x3a9,0x345,0x102,0x2190,0x338,0x102,0x2192,0x338, -0x102,0x2194,0x338,0x102,0x21d0,0x338,0x102,0x21d4,0x338,0x102,0x21d2,0x338,0x102,0x2203,0x338,0x102, -0x2208,0x338,0x102,0x220b,0x338,0x102,0x2223,0x338,0x102,0x2225,0x338,0x102,0x223c,0x338,0x102,0x2243, -0x338,0x102,0x2245,0x338,0x102,0x2248,0x338,0x102,0x3d,0x338,0x102,0x2261,0x338,0x102,0x224d,0x338, -0x102,0x3c,0x338,0x102,0x3e,0x338,0x102,0x2264,0x338,0x102,0x2265,0x338,0x102,0x2272,0x338,0x102, -0x2273,0x338,0x102,0x2276,0x338,0x102,0x2277,0x338,0x102,0x227a,0x338,0x102,0x227b,0x338,0x102,0x2282, -0x338,0x102,0x2283,0x338,0x102,0x2286,0x338,0x102,0x2287,0x338,0x102,0x22a2,0x338,0x102,0x22a8,0x338, -0x102,0x22a9,0x338,0x102,0x22ab,0x338,0x102,0x227c,0x338,0x102,0x227d,0x338,0x102,0x2291,0x338,0x102, -0x2292,0x338,0x102,0x22b2,0x338,0x102,0x22b3,0x338,0x102,0x22b4,0x338,0x102,0x22b5,0x338,0x802,0x304b, -0x3099,0x802,0x304d,0x3099,0x802,0x304f,0x3099,0x802,0x3051,0x3099,0x802,0x3053,0x3099,0x802,0x3055,0x3099, -0x802,0x3057,0x3099,0x802,0x3059,0x3099,0x802,0x305b,0x3099,0x802,0x305d,0x3099,0x802,0x305f,0x3099,0x802, -0x3061,0x3099,0x802,0x3064,0x3099,0x802,0x3066,0x3099,0x802,0x3068,0x3099,0x802,0x306f,0x3099,0x802,0x306f, -0x309a,0x802,0x3072,0x3099,0x802,0x3072,0x309a,0x802,0x3075,0x3099,0x802,0x3075,0x309a,0x802,0x3078,0x3099, -0x802,0x3078,0x309a,0x802,0x307b,0x3099,0x802,0x307b,0x309a,0x802,0x3046,0x3099,0x802,0x309d,0x3099,0x802, -0x30ab,0x3099,0x802,0x30ad,0x3099,0x802,0x30af,0x3099,0x802,0x30b1,0x3099,0x802,0x30b3,0x3099,0x802,0x30b5, -0x3099,0x802,0x30b7,0x3099,0x802,0x30b9,0x3099,0x802,0x30bb,0x3099,0x802,0x30bd,0x3099,0x802,0x30bf,0x3099, -0x802,0x30c1,0x3099,0x802,0x30c4,0x3099,0x802,0x30c6,0x3099,0x802,0x30c8,0x3099,0x802,0x30cf,0x3099,0x802, -0x30cf,0x309a,0x802,0x30d2,0x3099,0x802,0x30d2,0x309a,0x802,0x30d5,0x3099,0x802,0x30d5,0x309a,0x802,0x30d8, -0x3099,0x802,0x30d8,0x309a,0x802,0x30db,0x3099,0x802,0x30db,0x309a,0x802,0x30a6,0x3099,0x802,0x30ef,0x3099, -0x802,0x30f0,0x3099,0x802,0x30f1,0x3099,0x802,0x30f2,0x3099,0x802,0x30fd,0x3099,0x704,0xd804,0xdc99,0xd804, -0xdcba,0x704,0xd804,0xdc9b,0xd804,0xdcba,0x704,0xd804,0xdca5,0xd804,0xdcba,4,0xd804,0xdd31,0xd804,0xdd27, -4,0xd804,0xdd32,0xd804,0xdd27,4,0xd804,0xdf47,0xd804,0xdf3e,4,0xd804,0xdf47,0xd804,0xdf57,4, -0xd805,0xdcb9,0xd805,0xdcba,4,0xd805,0xdcb9,0xd805,0xdcb0,4,0xd805,0xdcb9,0xd805,0xdcbd,4,0xd805, -0xddb8,0xd805,0xddaf,4,0xd805,0xddb9,0xd805,0xddaf,1,0x2b9,1,0x3b,1,0xb7,0x702,0x915, -0x93c,0x702,0x916,0x93c,0x702,0x917,0x93c,0x702,0x91c,0x93c,0x702,0x921,0x93c,0x702,0x922,0x93c, -0x702,0x92b,0x93c,0x702,0x92f,0x93c,0x702,0x9a1,0x9bc,0x702,0x9a2,0x9bc,0x702,0x9af,0x9bc,0x702, -0xa32,0xa3c,0x702,0xa38,0xa3c,0x702,0xa16,0xa3c,0x702,0xa17,0xa3c,0x702,0xa1c,0xa3c,0x702,0xa2b, -0xa3c,0x702,0xb21,0xb3c,0x702,0xb22,0xb3c,2,0xf42,0xfb7,2,0xf4c,0xfb7,2,0xf51,0xfb7, -2,0xf56,0xfb7,2,0xf5b,0xfb7,2,0xf40,0xfb5,0x8202,0xfb2,0xf80,0x8202,0xfb3,0xf80,2, -0xf92,0xfb7,2,0xf9c,0xfb7,2,0xfa1,0xfb7,2,0xfa6,0xfb7,2,0xfab,0xfb7,2,0xf90, -0xfb5,1,0x3b9,1,0x60,1,0xb4,1,0x3a9,1,0x4b,1,0x3008,1,0x3009,0x102, -0x2add,0x338,1,0x8c48,1,0x66f4,1,0x8eca,1,0x8cc8,1,0x6ed1,1,0x4e32,1,0x53e5, -1,0x9f9c,1,0x5951,1,0x91d1,1,0x5587,1,0x5948,1,0x61f6,1,0x7669,1,0x7f85, -1,0x863f,1,0x87ba,1,0x88f8,1,0x908f,1,0x6a02,1,0x6d1b,1,0x70d9,1,0x73de, -1,0x843d,1,0x916a,1,0x99f1,1,0x4e82,1,0x5375,1,0x6b04,1,0x721b,1,0x862d, -1,0x9e1e,1,0x5d50,1,0x6feb,1,0x85cd,1,0x8964,1,0x62c9,1,0x81d8,1,0x881f, -1,0x5eca,1,0x6717,1,0x6d6a,1,0x72fc,1,0x90ce,1,0x4f86,1,0x51b7,1,0x52de, -1,0x64c4,1,0x6ad3,1,0x7210,1,0x76e7,1,0x8001,1,0x8606,1,0x865c,1,0x8def, -1,0x9732,1,0x9b6f,1,0x9dfa,1,0x788c,1,0x797f,1,0x7da0,1,0x83c9,1,0x9304, -1,0x9e7f,1,0x8ad6,1,0x58df,1,0x5f04,1,0x7c60,1,0x807e,1,0x7262,1,0x78ca, -1,0x8cc2,1,0x96f7,1,0x58d8,1,0x5c62,1,0x6a13,1,0x6dda,1,0x6f0f,1,0x7d2f, -1,0x7e37,1,0x964b,1,0x52d2,1,0x808b,1,0x51dc,1,0x51cc,1,0x7a1c,1,0x7dbe, -1,0x83f1,1,0x9675,1,0x8b80,1,0x62cf,1,0x8afe,1,0x4e39,1,0x5be7,1,0x6012, -1,0x7387,1,0x7570,1,0x5317,1,0x78fb,1,0x4fbf,1,0x5fa9,1,0x4e0d,1,0x6ccc, -1,0x6578,1,0x7d22,1,0x53c3,1,0x585e,1,0x7701,1,0x8449,1,0x8aaa,1,0x6bba, -1,0x8fb0,1,0x6c88,1,0x62fe,1,0x82e5,1,0x63a0,1,0x7565,1,0x4eae,1,0x5169, -1,0x51c9,1,0x6881,1,0x7ce7,1,0x826f,1,0x8ad2,1,0x91cf,1,0x52f5,1,0x5442, -1,0x5973,1,0x5eec,1,0x65c5,1,0x6ffe,1,0x792a,1,0x95ad,1,0x9a6a,1,0x9e97, -1,0x9ece,1,0x529b,1,0x66c6,1,0x6b77,1,0x8f62,1,0x5e74,1,0x6190,1,0x6200, -1,0x649a,1,0x6f23,1,0x7149,1,0x7489,1,0x79ca,1,0x7df4,1,0x806f,1,0x8f26, -1,0x84ee,1,0x9023,1,0x934a,1,0x5217,1,0x52a3,1,0x54bd,1,0x70c8,1,0x88c2, -1,0x5ec9,1,0x5ff5,1,0x637b,1,0x6bae,1,0x7c3e,1,0x7375,1,0x4ee4,1,0x56f9, -1,0x5dba,1,0x601c,1,0x73b2,1,0x7469,1,0x7f9a,1,0x8046,1,0x9234,1,0x96f6, -1,0x9748,1,0x9818,1,0x4f8b,1,0x79ae,1,0x91b4,1,0x96b8,1,0x60e1,1,0x4e86, -1,0x50da,1,0x5bee,1,0x5c3f,1,0x6599,1,0x71ce,1,0x7642,1,0x84fc,1,0x907c, -1,0x9f8d,1,0x6688,1,0x962e,1,0x5289,1,0x677b,1,0x67f3,1,0x6d41,1,0x6e9c, -1,0x7409,1,0x7559,1,0x786b,1,0x7d10,1,0x985e,1,0x516d,1,0x622e,1,0x9678, -1,0x502b,1,0x5d19,1,0x6dea,1,0x8f2a,1,0x5f8b,1,0x6144,1,0x6817,1,0x9686, -1,0x5229,1,0x540f,1,0x5c65,1,0x6613,1,0x674e,1,0x68a8,1,0x6ce5,1,0x7406, -1,0x75e2,1,0x7f79,1,0x88cf,1,0x88e1,1,0x91cc,1,0x96e2,1,0x533f,1,0x6eba, -1,0x541d,1,0x71d0,1,0x7498,1,0x85fa,1,0x96a3,1,0x9c57,1,0x9e9f,1,0x6797, -1,0x6dcb,1,0x81e8,1,0x7acb,1,0x7b20,1,0x7c92,1,0x72c0,1,0x7099,1,0x8b58, -1,0x4ec0,1,0x8336,1,0x523a,1,0x5207,1,0x5ea6,1,0x62d3,1,0x7cd6,1,0x5b85, -1,0x6d1e,1,0x66b4,1,0x8f3b,1,0x884c,1,0x964d,1,0x898b,1,0x5ed3,1,0x5140, -1,0x55c0,1,0x585a,1,0x6674,1,0x51de,1,0x732a,1,0x76ca,1,0x793c,1,0x795e, -1,0x7965,1,0x798f,1,0x9756,1,0x7cbe,1,0x7fbd,1,0x8612,1,0x8af8,1,0x9038, -1,0x90fd,1,0x98ef,1,0x98fc,1,0x9928,1,0x9db4,1,0x90de,1,0x96b7,1,0x4fae, -1,0x50e7,1,0x514d,1,0x52c9,1,0x52e4,1,0x5351,1,0x559d,1,0x5606,1,0x5668, -1,0x5840,1,0x58a8,1,0x5c64,1,0x5c6e,1,0x6094,1,0x6168,1,0x618e,1,0x61f2, -1,0x654f,1,0x65e2,1,0x6691,1,0x6885,1,0x6d77,1,0x6e1a,1,0x6f22,1,0x716e, -1,0x722b,1,0x7422,1,0x7891,1,0x793e,1,0x7949,1,0x7948,1,0x7950,1,0x7956, -1,0x795d,1,0x798d,1,0x798e,1,0x7a40,1,0x7a81,1,0x7bc0,1,0x7e09,1,0x7e41, -1,0x7f72,1,0x8005,1,0x81ed,1,0x8279,1,0x8457,1,0x8910,1,0x8996,1,0x8b01, -1,0x8b39,1,0x8cd3,1,0x8d08,1,0x8fb6,1,0x96e3,1,0x97ff,1,0x983b,1,0x6075, -2,0xd850,0xdeee,1,0x8218,1,0x4e26,1,0x51b5,1,0x5168,1,0x4f80,1,0x5145,1, -0x5180,1,0x52c7,1,0x52fa,1,0x5555,1,0x5599,1,0x55e2,1,0x58b3,1,0x5944,1, -0x5954,1,0x5a62,1,0x5b28,1,0x5ed2,1,0x5ed9,1,0x5f69,1,0x5fad,1,0x60d8,1, -0x614e,1,0x6108,1,0x6160,1,0x6234,1,0x63c4,1,0x641c,1,0x6452,1,0x6556,1, -0x671b,1,0x6756,1,0x6b79,1,0x6edb,1,0x6ecb,1,0x701e,1,0x77a7,1,0x7235,1, -0x72af,1,0x7471,1,0x7506,1,0x753b,1,0x761d,1,0x761f,1,0x76db,1,0x76f4,1, -0x774a,1,0x7740,1,0x78cc,1,0x7ab1,1,0x7c7b,1,0x7d5b,1,0x7f3e,1,0x8352,1, -0x83ef,1,0x8779,1,0x8941,1,0x8986,1,0x8abf,1,0x8acb,1,0x8aed,1,0x8b8a,1, -0x8f38,1,0x9072,1,0x9199,1,0x9276,1,0x967c,1,0x97db,1,0x980b,1,0x9b12,2, -0xd84a,0xdc4a,2,0xd84a,0xdc44,2,0xd84c,0xdfd5,1,0x3b9d,1,0x4018,1,0x4039,2,0xd854, -0xde49,2,0xd857,0xdcd0,2,0xd85f,0xded3,1,0x9f43,1,0x9f8e,0xe02,0x5d9,0x5b4,0x1102,0x5f2, -0x5b7,0x1802,0x5e9,0x5c1,0x1902,0x5e9,0x5c2,0xfb49,0x1843,0x5e9,0x5bc,0x5c1,0xfb49,0x1943,0x5e9,0x5bc, -0x5c2,0x1102,0x5d0,0x5b7,0x1202,0x5d0,0x5b8,0x1502,0x5d0,0x5bc,0x1502,0x5d1,0x5bc,0x1502,0x5d2,0x5bc, -0x1502,0x5d3,0x5bc,0x1502,0x5d4,0x5bc,0x1502,0x5d5,0x5bc,0x1502,0x5d6,0x5bc,0x1502,0x5d8,0x5bc,0x1502, -0x5d9,0x5bc,0x1502,0x5da,0x5bc,0x1502,0x5db,0x5bc,0x1502,0x5dc,0x5bc,0x1502,0x5de,0x5bc,0x1502,0x5e0, -0x5bc,0x1502,0x5e1,0x5bc,0x1502,0x5e3,0x5bc,0x1502,0x5e4,0x5bc,0x1502,0x5e6,0x5bc,0x1502,0x5e7,0x5bc, -0x1502,0x5e8,0x5bc,0x1502,0x5e9,0x5bc,0x1502,0x5ea,0x5bc,0x1302,0x5d5,0x5b9,0x1702,0x5d1,0x5bf,0x1702, -0x5db,0x5bf,0x1702,0x5e4,0x5bf,0xd804,0xd834,0xdd57,0xd834,0xdd65,0xd804,0xd834,0xdd58,0xd834,0xdd65,0xd834, -0xdd5f,0xd834,0xdd6e,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xdd5f,0xd834,0xdd6f,4, -0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd6f,0xd834,0xdd5f,0xd834,0xdd70,4,0xd846,0xd834,0xdd58,0xd834, -0xdd65,0xd834,0xdd70,0xd834,0xdd5f,0xd834,0xdd71,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd71,0xd834, -0xdd5f,0xd834,0xdd72,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd72,0xd804,0xd834,0xddb9,0xd834,0xdd65, -0xd804,0xd834,0xddba,0xd834,0xdd65,0xd834,0xddbb,0xd834,0xdd6e,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834, -0xdd6e,0xd834,0xddbc,0xd834,0xdd6e,4,0xd846,0xd834,0xddba,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xddbb,0xd834, -0xdd6f,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834,0xdd6f,0xd834,0xddbc,0xd834,0xdd6f,4,0xd846,0xd834, -0xddba,0xd834,0xdd65,0xd834,0xdd6f,1,0x4e3d,1,0x4e38,1,0x4e41,2,0xd840,0xdd22,1,0x4f60, -1,0x4fbb,1,0x5002,1,0x507a,1,0x5099,1,0x50cf,1,0x349e,2,0xd841,0xde3a,1, -0x5154,1,0x5164,1,0x5177,2,0xd841,0xdd1c,1,0x34b9,1,0x5167,1,0x518d,2,0xd841, -0xdd4b,1,0x5197,1,0x51a4,1,0x4ecc,1,0x51ac,2,0xd864,0xdddf,1,0x51f5,1,0x5203, -1,0x34df,1,0x523b,1,0x5246,1,0x5272,1,0x5277,1,0x3515,1,0x5305,1,0x5306, -1,0x5349,1,0x535a,1,0x5373,1,0x537d,1,0x537f,2,0xd842,0xde2c,1,0x7070,1, -0x53ca,1,0x53df,2,0xd842,0xdf63,1,0x53eb,1,0x53f1,1,0x5406,1,0x549e,1,0x5438, -1,0x5448,1,0x5468,1,0x54a2,1,0x54f6,1,0x5510,1,0x5553,1,0x5563,1,0x5584, -1,0x55ab,1,0x55b3,1,0x55c2,1,0x5716,1,0x5717,1,0x5651,1,0x5674,1,0x58ee, -1,0x57ce,1,0x57f4,1,0x580d,1,0x578b,1,0x5832,1,0x5831,1,0x58ac,2,0xd845, -0xdce4,1,0x58f2,1,0x58f7,1,0x5906,1,0x591a,1,0x5922,1,0x5962,2,0xd845,0xdea8, -2,0xd845,0xdeea,1,0x59ec,1,0x5a1b,1,0x5a27,1,0x59d8,1,0x5a66,1,0x36ee,1, -0x36fc,1,0x5b08,1,0x5b3e,2,0xd846,0xddc8,1,0x5bc3,1,0x5bd8,1,0x5bf3,2,0xd846, -0xdf18,1,0x5bff,1,0x5c06,1,0x5f53,1,0x5c22,1,0x3781,1,0x5c60,1,0x5cc0,1, -0x5c8d,2,0xd847,0xdde4,1,0x5d43,2,0xd847,0xdde6,1,0x5d6e,1,0x5d6b,1,0x5d7c,1, -0x5de1,1,0x5de2,1,0x382f,1,0x5dfd,1,0x5e28,1,0x5e3d,1,0x5e69,1,0x3862,2, -0xd848,0xdd83,1,0x387c,1,0x5eb0,1,0x5eb3,1,0x5eb6,2,0xd868,0xdf92,1,0x5efe,2, -0xd848,0xdf31,1,0x8201,1,0x5f22,1,0x38c7,2,0xd84c,0xdeb8,2,0xd858,0xddda,1,0x5f62, -1,0x5f6b,1,0x38e3,1,0x5f9a,1,0x5fcd,1,0x5fd7,1,0x5ff9,1,0x6081,1,0x393a, -1,0x391c,2,0xd849,0xded4,1,0x60c7,1,0x6148,1,0x614c,1,0x617a,1,0x61b2,1, -0x61a4,1,0x61af,1,0x61de,1,0x6210,1,0x621b,1,0x625d,1,0x62b1,1,0x62d4,1, -0x6350,2,0xd84a,0xdf0c,1,0x633d,1,0x62fc,1,0x6368,1,0x6383,1,0x63e4,2,0xd84a, -0xdff1,1,0x6422,1,0x63c5,1,0x63a9,1,0x3a2e,1,0x6469,1,0x647e,1,0x649d,1, -0x6477,1,0x3a6c,1,0x656c,2,0xd84c,0xdc0a,1,0x65e3,1,0x66f8,1,0x6649,1,0x3b19, -1,0x3b08,1,0x3ae4,1,0x5192,1,0x5195,1,0x6700,1,0x669c,1,0x80ad,1,0x43d9, -1,0x6721,1,0x675e,1,0x6753,2,0xd84c,0xdfc3,1,0x3b49,1,0x67fa,1,0x6785,1, -0x6852,2,0xd84d,0xdc6d,1,0x688e,1,0x681f,1,0x6914,1,0x6942,1,0x69a3,1,0x69ea, -1,0x6aa8,2,0xd84d,0xdea3,1,0x6adb,1,0x3c18,1,0x6b21,2,0xd84e,0xdca7,1,0x6b54, -1,0x3c4e,1,0x6b72,1,0x6b9f,1,0x6bbb,2,0xd84e,0xde8d,2,0xd847,0xdd0b,2,0xd84e, -0xdefa,1,0x6c4e,2,0xd84f,0xdcbc,1,0x6cbf,1,0x6ccd,1,0x6c67,1,0x6d16,1,0x6d3e, -1,0x6d69,1,0x6d78,1,0x6d85,2,0xd84f,0xdd1e,1,0x6d34,1,0x6e2f,1,0x6e6e,1, -0x3d33,1,0x6ec7,2,0xd84f,0xded1,1,0x6df9,1,0x6f6e,2,0xd84f,0xdf5e,2,0xd84f,0xdf8e, -1,0x6fc6,1,0x7039,1,0x701b,1,0x3d96,1,0x704a,1,0x707d,1,0x7077,1,0x70ad, -2,0xd841,0xdd25,1,0x7145,2,0xd850,0xde63,1,0x719c,2,0xd850,0xdfab,1,0x7228,1, -0x7250,2,0xd851,0xde08,1,0x7280,1,0x7295,2,0xd851,0xdf35,2,0xd852,0xdc14,1,0x737a, -1,0x738b,1,0x3eac,1,0x73a5,1,0x3eb8,1,0x7447,1,0x745c,1,0x7485,1,0x74ca, -1,0x3f1b,1,0x7524,2,0xd853,0xdc36,1,0x753e,2,0xd853,0xdc92,2,0xd848,0xdd9f,1, -0x7610,2,0xd853,0xdfa1,2,0xd853,0xdfb8,2,0xd854,0xdc44,1,0x3ffc,1,0x4008,2,0xd854, -0xdcf3,2,0xd854,0xdcf2,2,0xd854,0xdd19,2,0xd854,0xdd33,1,0x771e,1,0x771f,1,0x778b, -1,0x4046,1,0x4096,2,0xd855,0xdc1d,1,0x784e,1,0x40e3,2,0xd855,0xde26,2,0xd855, -0xde9a,2,0xd855,0xdec5,1,0x79eb,1,0x412f,1,0x7a4a,1,0x7a4f,2,0xd856,0xdd7c,2, -0xd856,0xdea7,1,0x7aee,1,0x4202,2,0xd856,0xdfab,1,0x7bc6,1,0x7bc9,1,0x4227,2, -0xd857,0xdc80,1,0x7cd2,1,0x42a0,1,0x7ce8,1,0x7ce3,1,0x7d00,2,0xd857,0xdf86,1, -0x7d63,1,0x4301,1,0x7dc7,1,0x7e02,1,0x7e45,1,0x4334,2,0xd858,0xde28,2,0xd858, -0xde47,1,0x4359,2,0xd858,0xded9,1,0x7f7a,2,0xd858,0xdf3e,1,0x7f95,1,0x7ffa,2, -0xd859,0xdcda,2,0xd859,0xdd23,1,0x8060,2,0xd859,0xdda8,1,0x8070,2,0xd84c,0xdf5f,1, -0x43d5,1,0x80b2,1,0x8103,1,0x440b,1,0x813e,1,0x5ab5,2,0xd859,0xdfa7,2,0xd859, -0xdfb5,2,0xd84c,0xdf93,2,0xd84c,0xdf9c,1,0x8204,1,0x8f9e,1,0x446b,1,0x8291,1, -0x828b,1,0x829d,1,0x52b3,1,0x82b1,1,0x82b3,1,0x82bd,1,0x82e6,2,0xd85a,0xdf3c, -1,0x831d,1,0x8363,1,0x83ad,1,0x8323,1,0x83bd,1,0x83e7,1,0x8353,1,0x83ca, -1,0x83cc,1,0x83dc,2,0xd85b,0xdc36,2,0xd85b,0xdd6b,2,0xd85b,0xdcd5,1,0x452b,1, -0x84f1,1,0x84f3,1,0x8516,2,0xd85c,0xdfca,1,0x8564,2,0xd85b,0xdf2c,1,0x455d,1, -0x4561,2,0xd85b,0xdfb1,2,0xd85c,0xdcd2,1,0x456b,1,0x8650,1,0x8667,1,0x8669,1, -0x86a9,1,0x8688,1,0x870e,1,0x86e2,1,0x8728,1,0x876b,1,0x8786,1,0x45d7,1, -0x87e1,1,0x8801,1,0x45f9,1,0x8860,1,0x8863,2,0xd85d,0xde67,1,0x88d7,1,0x88de, -1,0x4635,1,0x88fa,1,0x34bb,2,0xd85e,0xdcae,2,0xd85e,0xdd66,1,0x46be,1,0x46c7, -1,0x8aa0,1,0x8c55,2,0xd85f,0xdca8,1,0x8cab,1,0x8cc1,1,0x8d1b,1,0x8d77,2, -0xd85f,0xdf2f,2,0xd842,0xdc04,1,0x8dcb,1,0x8dbc,1,0x8df0,2,0xd842,0xdcde,1,0x8ed4, -2,0xd861,0xddd2,2,0xd861,0xdded,1,0x9094,1,0x90f1,1,0x9111,2,0xd861,0xdf2e,1, -0x911b,1,0x9238,1,0x92d7,1,0x92d8,1,0x927c,1,0x93f9,1,0x9415,2,0xd862,0xdffa, -1,0x958b,1,0x4995,1,0x95b7,2,0xd863,0xdd77,1,0x49e6,1,0x96c3,1,0x5db2,1, -0x9723,2,0xd864,0xdd45,2,0xd864,0xde1a,1,0x4a6e,1,0x4a76,1,0x97e0,2,0xd865,0xdc0a, -1,0x4ab2,2,0xd865,0xdc96,1,0x9829,2,0xd865,0xddb6,1,0x98e2,1,0x4b33,1,0x9929, -1,0x99a7,1,0x99c2,1,0x99fe,1,0x4bce,2,0xd866,0xdf30,1,0x9c40,1,0x9cfd,1, -0x4cce,1,0x4ced,1,0x9d67,2,0xd868,0xdcce,1,0x4cf8,2,0xd868,0xdd05,2,0xd868,0xde0e, -2,0xd868,0xde91,1,0x9ebb,1,0x4d56,1,0x9ef9,1,0x9efe,1,0x9f05,1,0x9f0f,1, -0x9f16,1,0x9f3b,2,0xd869,0xde00,0x3ac,0xe642,0x3b1,0x301,0x3ad,0xe642,0x3b5,0x301,0x3ae,0xe642, -0x3b7,0x301,0x3af,0xe642,0x3b9,0x301,0x3cc,0xe642,0x3bf,0x301,0x3cd,0xe642,0x3c5,0x301,0x3ce,0xe642, -0x3c9,0x301,0x386,0xe642,0x391,0x301,0x388,0xe642,0x395,0x301,0x389,0xe642,0x397,0x301,0x390,1, -0xe643,0x3b9,0x308,0x301,0x38a,0xe642,0x399,0x301,0x3b0,1,0xe643,0x3c5,0x308,0x301,0x38e,0xe642, -0x3a5,0x301,0x385,0xe642,0xa8,0x301,0x38c,0xe642,0x39f,0x301,0x38f,0xe642,0x3a9,0x301,0xc5,0xe642, -0x41,0x30a,0xe6e6,0xe681,0x300,0xe6e6,0xe681,0x301,0xe6e6,0xe681,0x313,0xe6e6,0xe682,0x308,0x301,0x8100, -0x8282,0xf71,0xf72,0x8100,0x8482,0xf71,0xf74,0x8100,0x8282,0xf71,0xf80,0 -}; - -static const uint8_t norm2_nfc_data_smallFCD[256]={ -0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xc7,0xe6,0x66,0x46,0x64,0x46,0x66,0x5b, -0x12,0,0,4,0,0,0,0x43,0x20,2,0x29,0xae,0xc2,0xc0,0xff,0xff, -0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0, -0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0x98,0,0xc1,0x66,0xe0,0x80,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,7,0,0,2,0 -}; - -#endif // INCLUDED_FROM_NORMALIZER2_CPP diff --git a/deps/node/deps/icu-small/source/common/norm2allmodes.h b/deps/node/deps/icu-small/source/common/norm2allmodes.h deleted file mode 100644 index 682ece28..00000000 --- a/deps/node/deps/icu-small/source/common/norm2allmodes.h +++ /dev/null @@ -1,369 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* norm2allmodes.h -* -* created on: 2014sep07 -* created by: Markus W. Scherer -*/ - -#ifndef __NORM2ALLMODES_H__ -#define __NORM2ALLMODES_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/edits.h" -#include "unicode/normalizer2.h" -#include "unicode/stringoptions.h" -#include "unicode/unistr.h" -#include "cpputils.h" -#include "normalizer2impl.h" - -U_NAMESPACE_BEGIN - -// Intermediate class: -// Has Normalizer2Impl and does boilerplate argument checking and setup. -class Normalizer2WithImpl : public Normalizer2 { -public: - Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} - virtual ~Normalizer2WithImpl(); - - // normalize - virtual UnicodeString & - normalize(const UnicodeString &src, - UnicodeString &dest, - UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - dest.setToBogus(); - return dest; - } - const UChar *sArray=src.getBuffer(); - if(&dest==&src || sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - dest.setToBogus(); - return dest; - } - dest.remove(); - ReorderingBuffer buffer(impl, dest); - if(buffer.init(src.length(), errorCode)) { - normalize(sArray, sArray+src.length(), buffer, errorCode); - } - return dest; - } - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; - - // normalize and append - virtual UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, TRUE, errorCode); - } - virtual UnicodeString & - append(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const { - return normalizeSecondAndAppend(first, second, FALSE, errorCode); - } - UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UBool doNormalize, - UErrorCode &errorCode) const { - uprv_checkCanGetBuffer(first, errorCode); - if(U_FAILURE(errorCode)) { - return first; - } - const UChar *secondArray=second.getBuffer(); - if(&first==&second || secondArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return first; - } - int32_t firstLength=first.length(); - UnicodeString safeMiddle; - { - ReorderingBuffer buffer(impl, first); - if(buffer.init(firstLength+second.length(), errorCode)) { - normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, - safeMiddle, buffer, errorCode); - } - } // The ReorderingBuffer destructor finalizes the first string. - if(U_FAILURE(errorCode)) { - // Restore the modified suffix of the first string. - first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); - } - return first; - } - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; - virtual UBool - getDecomposition(UChar32 c, UnicodeString &decomposition) const { - UChar buffer[4]; - int32_t length; - const UChar *d=impl.getDecomposition(c, buffer, length); - if(d==NULL) { - return FALSE; - } - if(d==buffer) { - decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) - } else { - decomposition.setTo(FALSE, d, length); // read-only alias - } - return TRUE; - } - virtual UBool - getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { - UChar buffer[30]; - int32_t length; - const UChar *d=impl.getRawDecomposition(c, buffer, length); - if(d==NULL) { - return FALSE; - } - if(d==buffer) { - decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) - } else { - decomposition.setTo(FALSE, d, length); // read-only alias - } - return TRUE; - } - virtual UChar32 - composePair(UChar32 a, UChar32 b) const { - return impl.composePair(a, b); - } - - virtual uint8_t - getCombiningClass(UChar32 c) const { - return impl.getCC(impl.getNorm16(c)); - } - - // quick checks - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - const UChar *sLimit=sArray+s.length(); - return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); - } - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { - return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; - } - virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return 0; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; - - virtual UNormalizationCheckResult getQuickCheck(UChar32) const { - return UNORM_YES; - } - - const Normalizer2Impl &impl; -}; - -class DecomposeNormalizer2 : public Normalizer2WithImpl { -public: - DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} - virtual ~DecomposeNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.decompose(src, limit, &buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { - return impl.decompose(src, limit, NULL, errorCode); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { - return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; - } - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); } - virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } -}; - -class ComposeNormalizer2 : public Normalizer2WithImpl { -public: - ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : - Normalizer2WithImpl(ni), onlyContiguous(fcc) {} - virtual ~ComposeNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { - impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - - void - normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, - Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { - if (U_FAILURE(errorCode)) { - return; - } - if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { - edits->reset(); - } - const uint8_t *s = reinterpret_cast(src.data()); - impl.composeUTF8(options, onlyContiguous, s, s + src.length(), - &sink, edits, errorCode); - sink.Flush(); - } - - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { - impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); - } - - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - UnicodeString temp; - ReorderingBuffer buffer(impl, temp); - if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization - return FALSE; - } - return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); - } - virtual UBool - isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const uint8_t *s = reinterpret_cast(sp.data()); - return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); - } - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { - if(U_FAILURE(errorCode)) { - return UNORM_MAYBE; - } - const UChar *sArray=s.getBuffer(); - if(sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return UNORM_MAYBE; - } - UNormalizationCheckResult qcResult=UNORM_YES; - impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); - return qcResult; - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE { - return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { - return impl.getCompQuickCheck(impl.getNorm16(c)); - } - virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { - return impl.hasCompBoundaryBefore(c); - } - virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { - return impl.hasCompBoundaryAfter(c, onlyContiguous); - } - virtual UBool isInert(UChar32 c) const U_OVERRIDE { - return impl.isCompInert(c, onlyContiguous); - } - - const UBool onlyContiguous; -}; - -class FCDNormalizer2 : public Normalizer2WithImpl { -public: - FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} - virtual ~FCDNormalizer2(); - -private: - virtual void - normalize(const UChar *src, const UChar *limit, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.makeFCD(src, limit, &buffer, errorCode); - } - using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. - virtual void - normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); - } - virtual const UChar * - spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { - return impl.makeFCD(src, limit, NULL, errorCode); - } - using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. - virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } - virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } - virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } -}; - -struct Norm2AllModes : public UMemory { - Norm2AllModes(Normalizer2Impl *i) - : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} - ~Norm2AllModes(); - - static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); - static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); - static Norm2AllModes *createInstance(const char *packageName, - const char *name, - UErrorCode &errorCode); - - static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); - static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); - static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); - - Normalizer2Impl *impl; - ComposeNormalizer2 comp; - DecomposeNormalizer2 decomp; - FCDNormalizer2 fcd; - ComposeNormalizer2 fcc; -}; - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_NORMALIZATION -#endif // __NORM2ALLMODES_H__ diff --git a/deps/node/deps/icu-small/source/common/normalizer2.cpp b/deps/node/deps/icu-small/source/common/normalizer2.cpp deleted file mode 100644 index ca5d3aba..00000000 --- a/deps/node/deps/icu-small/source/common/normalizer2.cpp +++ /dev/null @@ -1,572 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: normalizer2.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009nov22 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/edits.h" -#include "unicode/normalizer2.h" -#include "unicode/stringoptions.h" -#include "unicode/unistr.h" -#include "unicode/unorm.h" -#include "cstring.h" -#include "mutex.h" -#include "norm2allmodes.h" -#include "normalizer2impl.h" -#include "uassert.h" -#include "ucln_cmn.h" - -using icu::Normalizer2Impl; - -#if NORM2_HARDCODE_NFC_DATA -// NFC/NFD data machine-generated by gennorm2 --csource -#define INCLUDED_FROM_NORMALIZER2_CPP -#include "norm2_nfc_data.h" -#endif - -U_NAMESPACE_BEGIN - -// Public API dispatch via Normalizer2 subclasses -------------------------- *** - -Normalizer2::~Normalizer2() {} - -void -Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink, - Edits *edits, UErrorCode &errorCode) const { - if (U_FAILURE(errorCode)) { - return; - } - if (edits != nullptr) { - errorCode = U_UNSUPPORTED_ERROR; - return; - } - UnicodeString src16 = UnicodeString::fromUTF8(src); - normalize(src16, errorCode).toUTF8(sink); -} - -UBool -Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { - return FALSE; -} - -UChar32 -Normalizer2::composePair(UChar32, UChar32) const { - return U_SENTINEL; -} - -uint8_t -Normalizer2::getCombiningClass(UChar32 /*c*/) const { - return 0; -} - -UBool -Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const { - return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode); -} - -// Normalizer2 implementation for the old UNORM_NONE. -class NoopNormalizer2 : public Normalizer2 { - virtual ~NoopNormalizer2(); - - virtual UnicodeString & - normalize(const UnicodeString &src, - UnicodeString &dest, - UErrorCode &errorCode) const U_OVERRIDE { - if(U_SUCCESS(errorCode)) { - if(&dest!=&src) { - dest=src; - } else { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - } - return dest; - } - virtual void - normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, - Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { - if(U_SUCCESS(errorCode)) { - if (edits != nullptr) { - if ((options & U_EDITS_NO_RESET) == 0) { - edits->reset(); - } - edits->addUnchanged(src.length()); - } - if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { - sink.Append(src.data(), src.length()); - } - sink.Flush(); - } - } - - virtual UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const U_OVERRIDE { - if(U_SUCCESS(errorCode)) { - if(&first!=&second) { - first.append(second); - } else { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - } - return first; - } - virtual UnicodeString & - append(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const U_OVERRIDE { - if(U_SUCCESS(errorCode)) { - if(&first!=&second) { - first.append(second); - } else { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - } - return first; - } - virtual UBool - getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE { - return FALSE; - } - // No need to U_OVERRIDE the default getRawDecomposition(). - virtual UBool - isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE { - return U_SUCCESS(errorCode); - } - virtual UBool - isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE { - return U_SUCCESS(errorCode); - } - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE { - return UNORM_YES; - } - virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE { - return s.length(); - } - virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; } - virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; } - virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; } -}; - -NoopNormalizer2::~NoopNormalizer2() {} - -Normalizer2WithImpl::~Normalizer2WithImpl() {} - -DecomposeNormalizer2::~DecomposeNormalizer2() {} - -ComposeNormalizer2::~ComposeNormalizer2() {} - -FCDNormalizer2::~FCDNormalizer2() {} - -// instance cache ---------------------------------------------------------- *** - -U_CDECL_BEGIN -static UBool U_CALLCONV uprv_normalizer2_cleanup(); -U_CDECL_END - -static Normalizer2 *noopSingleton; -static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; - -static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - noopSingleton=new NoopNormalizer2; - if(noopSingleton==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); -} - -const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); - return noopSingleton; -} - -const Normalizer2Impl * -Normalizer2Factory::getImpl(const Normalizer2 *norm2) { - return &((Normalizer2WithImpl *)norm2)->impl; -} - -Norm2AllModes::~Norm2AllModes() { - delete impl; -} - -Norm2AllModes * -Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - delete impl; - return NULL; - } - Norm2AllModes *allModes=new Norm2AllModes(impl); - if(allModes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - delete impl; - return NULL; - } - return allModes; -} - -#if NORM2_HARDCODE_NFC_DATA -Norm2AllModes * -Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - Normalizer2Impl *impl=new Normalizer2Impl; - if(impl==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie, - norm2_nfc_data_extraData, norm2_nfc_data_smallFCD); - return createInstance(impl, errorCode); -} - -static Norm2AllModes *nfcSingleton; - -static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; - -static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) { - nfcSingleton=Norm2AllModes::createNFCInstance(errorCode); - ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); -} - -const Norm2AllModes * -Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode); - return nfcSingleton; -} - -const Normalizer2 * -Normalizer2::getNFCInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->comp : NULL; -} - -const Normalizer2 * -Normalizer2::getNFDInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->decomp : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->fcd : NULL; -} - -const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? &allModes->fcc : NULL; -} - -const Normalizer2Impl * -Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { - const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); - return allModes!=NULL ? allModes->impl : NULL; -} -#endif // NORM2_HARDCODE_NFC_DATA - -U_CDECL_BEGIN - -static UBool U_CALLCONV uprv_normalizer2_cleanup() { - delete noopSingleton; - noopSingleton = NULL; - noopInitOnce.reset(); -#if NORM2_HARDCODE_NFC_DATA - delete nfcSingleton; - nfcSingleton = NULL; - nfcInitOnce.reset(); -#endif - return TRUE; -} - -U_CDECL_END - -U_NAMESPACE_END - -// C API ------------------------------------------------------------------- *** - -U_NAMESPACE_USE - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFCInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); -} - -U_CAPI const UNormalizer2 * U_EXPORT2 -unorm2_getNFDInstance(UErrorCode *pErrorCode) { - return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); -} - -U_CAPI void U_EXPORT2 -unorm2_close(UNormalizer2 *norm2) { - delete (Normalizer2 *)norm2; -} - -U_CAPI int32_t U_EXPORT2 -unorm2_normalize(const UNormalizer2 *norm2, - const UChar *src, int32_t length, - UChar *dest, int32_t capacity, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if( (src==NULL ? length!=0 : length<-1) || - (dest==NULL ? capacity!=0 : capacity<0) || - (src==dest && src!=NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString destString(dest, 0, capacity); - // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. - if(length!=0) { - const Normalizer2 *n2=(const Normalizer2 *)norm2; - const Normalizer2WithImpl *n2wi=dynamic_cast(n2); - if(n2wi!=NULL) { - // Avoid duplicate argument checking and support NUL-terminated src. - ReorderingBuffer buffer(n2wi->impl, destString); - if(buffer.init(length, *pErrorCode)) { - n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); - } - } else { - UnicodeString srcString(length<0, src, length); - n2->normalize(srcString, destString, *pErrorCode); - } - } - return destString.extract(dest, capacity, *pErrorCode); -} - -static int32_t -normalizeSecondAndAppend(const UNormalizer2 *norm2, - UChar *first, int32_t firstLength, int32_t firstCapacity, - const UChar *second, int32_t secondLength, - UBool doNormalize, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if( (second==NULL ? secondLength!=0 : secondLength<-1) || - (first==NULL ? (firstCapacity!=0 || firstLength!=0) : - (firstCapacity<0 || firstLength<-1)) || - (first==second && first!=NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString firstString(first, firstLength, firstCapacity); - firstLength=firstString.length(); // In case it was -1. - // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. - if(secondLength!=0) { - const Normalizer2 *n2=(const Normalizer2 *)norm2; - const Normalizer2WithImpl *n2wi=dynamic_cast(n2); - if(n2wi!=NULL) { - // Avoid duplicate argument checking and support NUL-terminated src. - UnicodeString safeMiddle; - { - ReorderingBuffer buffer(n2wi->impl, firstString); - if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 - n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, - doNormalize, safeMiddle, buffer, *pErrorCode); - } - } // The ReorderingBuffer destructor finalizes firstString. - if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { - // Restore the modified suffix of the first string. - // This does not restore first[] array contents between firstLength and firstCapacity. - // (That might be uninitialized memory, as far as we know.) - if(first!=NULL) { /* don't dereference NULL */ - safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); - if(firstLengthnormalizeSecondAndAppend(firstString, secondString, *pErrorCode); - } else { - n2->append(firstString, secondString, *pErrorCode); - } - } - } - return firstString.extract(first, firstCapacity, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, - UChar *first, int32_t firstLength, int32_t firstCapacity, - const UChar *second, int32_t secondLength, - UErrorCode *pErrorCode) { - return normalizeSecondAndAppend(norm2, - first, firstLength, firstCapacity, - second, secondLength, - TRUE, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -unorm2_append(const UNormalizer2 *norm2, - UChar *first, int32_t firstLength, int32_t firstCapacity, - const UChar *second, int32_t secondLength, - UErrorCode *pErrorCode) { - return normalizeSecondAndAppend(norm2, - first, firstLength, firstCapacity, - second, secondLength, - FALSE, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -unorm2_getDecomposition(const UNormalizer2 *norm2, - UChar32 c, UChar *decomposition, int32_t capacity, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(decomposition==NULL ? capacity!=0 : capacity<0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString destString(decomposition, 0, capacity); - if(reinterpret_cast(norm2)->getDecomposition(c, destString)) { - return destString.extract(decomposition, capacity, *pErrorCode); - } else { - return -1; - } -} - -U_CAPI int32_t U_EXPORT2 -unorm2_getRawDecomposition(const UNormalizer2 *norm2, - UChar32 c, UChar *decomposition, int32_t capacity, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(decomposition==NULL ? capacity!=0 : capacity<0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString destString(decomposition, 0, capacity); - if(reinterpret_cast(norm2)->getRawDecomposition(c, destString)) { - return destString.extract(decomposition, capacity, *pErrorCode); - } else { - return -1; - } -} - -U_CAPI UChar32 U_EXPORT2 -unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { - return reinterpret_cast(norm2)->composePair(a, b); -} - -U_CAPI uint8_t U_EXPORT2 -unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { - return reinterpret_cast(norm2)->getCombiningClass(c); -} - -U_CAPI UBool U_EXPORT2 -unorm2_isNormalized(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if((s==NULL && length!=0) || length<-1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString sString(length<0, s, length); - return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); -} - -U_CAPI UNormalizationCheckResult U_EXPORT2 -unorm2_quickCheck(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return UNORM_NO; - } - if((s==NULL && length!=0) || length<-1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return UNORM_NO; - } - UnicodeString sString(length<0, s, length); - return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if((s==NULL && length!=0) || length<-1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UnicodeString sString(length<0, s, length); - return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); -} - -U_CAPI UBool U_EXPORT2 -unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { - return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); -} - -U_CAPI UBool U_EXPORT2 -unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { - return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); -} - -U_CAPI UBool U_EXPORT2 -unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { - return ((const Normalizer2 *)norm2)->isInert(c); -} - -// Some properties APIs ---------------------------------------------------- *** - -U_CAPI uint8_t U_EXPORT2 -u_getCombiningClass(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); - if(U_SUCCESS(errorCode)) { - return nfd->getCombiningClass(c); - } else { - return 0; - } -} - -U_CFUNC uint16_t -unorm_getFCD16(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - return impl->getFCD16(c); - } else { - return 0; - } -} - -#endif // !UCONFIG_NO_NORMALIZATION diff --git a/deps/node/deps/icu-small/source/common/normalizer2impl.cpp b/deps/node/deps/icu-small/source/common/normalizer2impl.cpp deleted file mode 100644 index e7ae646c..00000000 --- a/deps/node/deps/icu-small/source/common/normalizer2impl.cpp +++ /dev/null @@ -1,2660 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: normalizer2impl.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009nov22 -* created by: Markus W. Scherer -*/ - -// #define UCPTRIE_DEBUG - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/bytestream.h" -#include "unicode/edits.h" -#include "unicode/normalizer2.h" -#include "unicode/stringoptions.h" -#include "unicode/ucptrie.h" -#include "unicode/udata.h" -#include "unicode/umutablecptrie.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "unicode/utf8.h" -#include "bytesinkutil.h" -#include "cmemory.h" -#include "mutex.h" -#include "normalizer2impl.h" -#include "putilimp.h" -#include "uassert.h" -#include "ucptrie_impl.h" -#include "uset_imp.h" -#include "uvector.h" - -U_NAMESPACE_BEGIN - -namespace { - -/** - * UTF-8 lead byte for minNoMaybeCP. - * Can be lower than the actual lead byte for c. - * Typically U+0300 for NFC/NFD, U+00A0 for NFKC/NFKD, U+0041 for NFKC_Casefold. - */ -inline uint8_t leadByteForCP(UChar32 c) { - if (c <= 0x7f) { - return (uint8_t)c; - } else if (c <= 0x7ff) { - return (uint8_t)(0xc0+(c>>6)); - } else { - // Should not occur because ccc(U+0300)!=0. - return 0xe0; - } -} - -/** - * Returns the code point from one single well-formed UTF-8 byte sequence - * between cpStart and cpLimit. - * - * Trie UTF-8 macros do not assemble whole code points (for efficiency). - * When we do need the code point, we call this function. - * We should not need it for normalization-inert data (norm16==0). - * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points. - */ -UChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) { - // Similar to U8_NEXT_UNSAFE(s, i, c). - U_ASSERT(cpStart < cpLimit); - uint8_t c = *cpStart; - switch(cpLimit-cpStart) { - case 1: - return c; - case 2: - return ((c&0x1f)<<6) | (cpStart[1]&0x3f); - case 3: - // no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) - return (UChar)((c<<12) | ((cpStart[1]&0x3f)<<6) | (cpStart[2]&0x3f)); - case 4: - return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f); - default: - U_ASSERT(FALSE); // Should not occur. - return U_SENTINEL; - } -} - -/** - * Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF. - * Otherwise returns a negative value. - */ -UChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) { - if ((p - start) >= 3) { - p -= 3; - uint8_t l = *p; - uint8_t t1, t2; - if (0xe1 <= l && l <= 0xed && - (t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f && - (t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f && - (l < 0xed || t1 <= 0x1f)) { - return ((l & 0xf) << 12) | (t1 << 6) | t2; - } - } - return U_SENTINEL; -} - -/** - * Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point. - * Otherwise returns a negative value. - */ -int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) { - // Jamo T: E1 86 A8..E1 87 82 - if ((limit - src) >= 3 && *src == 0xe1) { - if (src[1] == 0x86) { - uint8_t t = src[2]; - // The first Jamo T is U+11A8 but JAMO_T_BASE is 11A7. - // Offset 0 does not correspond to any conjoining Jamo. - if (0xa8 <= t && t <= 0xbf) { - return t - 0xa7; - } - } else if (src[1] == 0x87) { - uint8_t t = src[2]; - if ((int8_t)t <= (int8_t)0x82u) { - return t - (0xa7 - 0x40); - } - } - } - return -1; -} - -void -appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t delta, - ByteSink &sink, Edits *edits) { - char buffer[U8_MAX_LENGTH]; - int32_t length; - int32_t cpLength = (int32_t)(cpLimit - cpStart); - if (cpLength == 1) { - // The builder makes ASCII map to ASCII. - buffer[0] = (uint8_t)(*cpStart + delta); - length = 1; - } else { - int32_t trail = *(cpLimit-1) + delta; - if (0x80 <= trail && trail <= 0xbf) { - // The delta only changes the last trail byte. - --cpLimit; - length = 0; - do { buffer[length++] = *cpStart++; } while (cpStart < cpLimit); - buffer[length++] = (uint8_t)trail; - } else { - // Decode the code point, add the delta, re-encode. - UChar32 c = codePointFromValidUTF8(cpStart, cpLimit) + delta; - length = 0; - U8_APPEND_UNSAFE(buffer, length, c); - } - } - if (edits != nullptr) { - edits->addReplace(cpLength, length); - } - sink.Append(buffer, length); -} - -} // namespace - -// ReorderingBuffer -------------------------------------------------------- *** - -ReorderingBuffer::ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, - UErrorCode &errorCode) : - impl(ni), str(dest), - start(str.getBuffer(8)), reorderStart(start), limit(start), - remainingCapacity(str.getCapacity()), lastCC(0) { - if (start == nullptr && U_SUCCESS(errorCode)) { - // getBuffer() already did str.setToBogus() - errorCode = U_MEMORY_ALLOCATION_ERROR; - } -} - -UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) { - int32_t length=str.length(); - start=str.getBuffer(destCapacity); - if(start==NULL) { - // getBuffer() already did str.setToBogus() - errorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - limit=start+length; - remainingCapacity=str.getCapacity()-length; - reorderStart=start; - if(start==limit) { - lastCC=0; - } else { - setIterator(); - lastCC=previousCC(); - // Set reorderStart after the last code point with cc<=1 if there is one. - if(lastCC>1) { - while(previousCC()>1) {} - } - reorderStart=codePointLimit; - } - return TRUE; -} - -UBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const { - int32_t length=(int32_t)(limit-start); - return - length==(int32_t)(otherLimit-otherStart) && - 0==u_memcmp(start, otherStart, length); -} - -UBool ReorderingBuffer::equals(const uint8_t *otherStart, const uint8_t *otherLimit) const { - U_ASSERT((otherLimit - otherStart) <= INT32_MAX); // ensured by caller - int32_t length = (int32_t)(limit - start); - int32_t otherLength = (int32_t)(otherLimit - otherStart); - // For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long. - if (otherLength < length || (otherLength / 3) > length) { - return FALSE; - } - // Compare valid strings from between normalization boundaries. - // (Invalid sequences are normalization-inert.) - for (int32_t i = 0, j = 0;;) { - if (i >= length) { - return j >= otherLength; - } else if (j >= otherLength) { - return FALSE; - } - // Not at the end of either string yet. - UChar32 c, other; - U16_NEXT_UNSAFE(start, i, c); - U8_NEXT_UNSAFE(otherStart, j, other); - if (c != other) { - return FALSE; - } - } -} - -UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) { - if(remainingCapacity<2 && !resize(2, errorCode)) { - return FALSE; - } - if(lastCC<=cc || cc==0) { - limit[0]=U16_LEAD(c); - limit[1]=U16_TRAIL(c); - limit+=2; - lastCC=cc; - if(cc<=1) { - reorderStart=limit; - } - } else { - insert(c, cc); - } - remainingCapacity-=2; - return TRUE; -} - -UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD, - uint8_t leadCC, uint8_t trailCC, - UErrorCode &errorCode) { - if(length==0) { - return TRUE; - } - if(remainingCapacity=codePointStart) { - return 0; - } - UChar32 c=*--codePointStart; - UChar c2; - if(U16_IS_TRAIL(c) && startcc;) {} - // insert c at codePointLimit, after the character with prevCC<=cc - UChar *q=limit; - UChar *r=limit+=U16_LENGTH(c); - do { - *--r=*--q; - } while(codePointLimit!=q); - writeCodePoint(q, c); - if(cc<=1) { - reorderStart=r; - } -} - -// Normalizer2Impl --------------------------------------------------------- *** - -struct CanonIterData : public UMemory { - CanonIterData(UErrorCode &errorCode); - ~CanonIterData(); - void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode); - UMutableCPTrie *mutableTrie; - UCPTrie *trie; - UVector canonStartSets; // contains UnicodeSet * -}; - -Normalizer2Impl::~Normalizer2Impl() { - delete fCanonIterData; -} - -void -Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie, - const uint16_t *inExtraData, const uint8_t *inSmallFCD) { - minDecompNoCP = static_cast(inIndexes[IX_MIN_DECOMP_NO_CP]); - minCompNoMaybeCP = static_cast(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]); - minLcccCP = static_cast(inIndexes[IX_MIN_LCCC_CP]); - - minYesNo = static_cast(inIndexes[IX_MIN_YES_NO]); - minYesNoMappingsOnly = static_cast(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]); - minNoNo = static_cast(inIndexes[IX_MIN_NO_NO]); - minNoNoCompBoundaryBefore = static_cast(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]); - minNoNoCompNoMaybeCC = static_cast(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]); - minNoNoEmpty = static_cast(inIndexes[IX_MIN_NO_NO_EMPTY]); - limitNoNo = static_cast(inIndexes[IX_LIMIT_NO_NO]); - minMaybeYes = static_cast(inIndexes[IX_MIN_MAYBE_YES]); - U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields - centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1; - - normTrie=inTrie; - - maybeYesCompositions=inExtraData; - extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT); - - smallFCD=inSmallFCD; -} - -U_CDECL_BEGIN - -static uint32_t U_CALLCONV -segmentStarterMapper(const void * /*context*/, uint32_t value) { - return value&CANON_NOT_SEGMENT_STARTER; -} - -U_CDECL_END - -void -Normalizer2Impl::addLcccChars(UnicodeSet &set) const { - UChar32 start = 0, end; - uint32_t norm16; - while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT, - nullptr, nullptr, &norm16)) >= 0) { - if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES && - norm16 != Normalizer2Impl::JAMO_VT) { - set.add(start, end); - } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) { - uint16_t fcd16 = getFCD16(start); - if (fcd16 > 0xff) { set.add(start, end); } - } - start = end + 1; - } -} - -void -Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const { - // Add the start code point of each same-value range of the trie. - UChar32 start = 0, end; - uint32_t value; - while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT, - nullptr, nullptr, &value)) >= 0) { - sa->add(sa->set, start); - if (start != end && isAlgorithmicNoNo((uint16_t)value) && - (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) { - // Range of code points with same-norm16-value algorithmic decompositions. - // They might have different non-zero FCD16 values. - uint16_t prevFCD16 = getFCD16(start); - while (++start <= end) { - uint16_t fcd16 = getFCD16(start); - if (fcd16 != prevFCD16) { - sa->add(sa->set, start); - prevFCD16 = fcd16; - } - } - } - start = end + 1; - } - - /* add Hangul LV syllables and LV+1 because of skippables */ - for(UChar c=Hangul::HANGUL_BASE; cadd(sa->set, c); - sa->add(sa->set, c+1); - } - sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ -} - -void -Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const { - // Add the start code point of each same-value range of the canonical iterator data trie. - if (!ensureCanonIterData(errorCode)) { return; } - // Currently only used for the SEGMENT_STARTER property. - UChar32 start = 0, end; - uint32_t value; - while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0, - segmentStarterMapper, nullptr, &value)) >= 0) { - sa->add(sa->set, start); - start = end + 1; - } -} - -const UChar * -Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src, - UChar32 minNeedDataCP, - ReorderingBuffer *buffer, - UErrorCode &errorCode) const { - // Make some effort to support NUL-terminated strings reasonably. - // Take the part of the fast quick check loop that does not look up - // data and check the first part of the string. - // After this prefix, determine the string length to simplify the rest - // of the code. - const UChar *prevSrc=src; - UChar c; - while((c=*src++)appendZeroCC(prevSrc, src, errorCode); - } - } - return src; -} - -UnicodeString & -Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest, - UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - dest.setToBogus(); - return dest; - } - const UChar *sArray=src.getBuffer(); - if(&dest==&src || sArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - dest.setToBogus(); - return dest; - } - decompose(sArray, sArray+src.length(), dest, src.length(), errorCode); - return dest; -} - -void -Normalizer2Impl::decompose(const UChar *src, const UChar *limit, - UnicodeString &dest, - int32_t destLengthEstimate, - UErrorCode &errorCode) const { - if(destLengthEstimate<0 && limit!=NULL) { - destLengthEstimate=(int32_t)(limit-src); - } - dest.remove(); - ReorderingBuffer buffer(*this, dest); - if(buffer.init(destLengthEstimate, errorCode)) { - decompose(src, limit, &buffer, errorCode); - } -} - -// Dual functionality: -// buffer!=NULL: normalize -// buffer==NULL: isNormalized/spanQuickCheckYes -const UChar * -Normalizer2Impl::decompose(const UChar *src, const UChar *limit, - ReorderingBuffer *buffer, - UErrorCode &errorCode) const { - UChar32 minNoCP=minDecompNoCP; - if(limit==NULL) { - src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode); - if(U_FAILURE(errorCode)) { - return src; - } - limit=u_strchr(src, 0); - } - - const UChar *prevSrc; - UChar32 c=0; - uint16_t norm16=0; - - // only for quick check - const UChar *prevBoundary=src; - uint8_t prevCC=0; - - for(;;) { - // count code units below the minimum or with irrelevant data for the quick check - for(prevSrc=src; src!=limit;) { - if( (c=*src)appendZeroCC(prevSrc, src, errorCode)) { - break; - } - } else { - prevCC=0; - prevBoundary=src; - } - } - if(src==limit) { - break; - } - - // Check one above-minimum, relevant code point. - src+=U16_LENGTH(c); - if(buffer!=NULL) { - if(!decompose(c, norm16, *buffer, errorCode)) { - break; - } - } else { - if(isDecompYes(norm16)) { - uint8_t cc=getCCFromYesOrMaybe(norm16); - if(prevCC<=cc || cc==0) { - prevCC=cc; - if(cc<=1) { - prevBoundary=src; - } - continue; - } - } - return prevBoundary; // "no" or cc out of order - } - } - return src; -} - -// Decompose a short piece of text which is likely to contain characters that -// fail the quick check loop and/or where the quick check loop's overhead -// is unlikely to be amortized. -// Called by the compose() and makeFCD() implementations. -const UChar * -Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit, - UBool stopAtCompBoundary, UBool onlyContiguous, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - if (U_FAILURE(errorCode)) { - return nullptr; - } - while(src= limitNoNo) { - if (isMaybeOrNonZeroCC(norm16)) { - return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode); - } - // Maps to an isCompYesAndZeroCC. - c=mapAlgorithmic(c, norm16); - norm16=getRawNorm16(c); - } - if (norm16 < minYesNo) { - // c does not decompose - return buffer.append(c, 0, errorCode); - } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { - // Hangul syllable: decompose algorithmically - UChar jamos[3]; - return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode); - } - // c decomposes, get everything from the variable-length extra data - const uint16_t *mapping=getMapping(norm16); - uint16_t firstUnit=*mapping; - int32_t length=firstUnit&MAPPING_LENGTH_MASK; - uint8_t leadCC, trailCC; - trailCC=(uint8_t)(firstUnit>>8); - if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { - leadCC=(uint8_t)(*(mapping-1)>>8); - } else { - leadCC=0; - } - return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode); -} - -const uint8_t * -Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, - UBool stopAtCompBoundary, UBool onlyContiguous, - ReorderingBuffer &buffer, UErrorCode &errorCode) const { - if (U_FAILURE(errorCode)) { - return nullptr; - } - while (src < limit) { - const uint8_t *prevSrc = src; - uint16_t norm16; - UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); - // Get the decomposition and the lead and trail cc's. - UChar32 c = U_SENTINEL; - if (norm16 >= limitNoNo) { - if (isMaybeOrNonZeroCC(norm16)) { - // No boundaries around this character. - c = codePointFromValidUTF8(prevSrc, src); - if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) { - return nullptr; - } - continue; - } - // Maps to an isCompYesAndZeroCC. - if (stopAtCompBoundary) { - return prevSrc; - } - c = codePointFromValidUTF8(prevSrc, src); - c = mapAlgorithmic(c, norm16); - norm16 = getRawNorm16(c); - } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) { - return prevSrc; - } - // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8. - // We do not see invalid UTF-8 here because - // its norm16==INERT is normalization-inert, - // so it gets copied unchanged in the fast path, - // and we stop the slow path where invalid UTF-8 begins. - U_ASSERT(norm16 != INERT); - if (norm16 < minYesNo) { - if (c < 0) { - c = codePointFromValidUTF8(prevSrc, src); - } - // does not decompose - if (!buffer.append(c, 0, errorCode)) { - return nullptr; - } - } else if (isHangulLV(norm16) || isHangulLVT(norm16)) { - // Hangul syllable: decompose algorithmically - if (c < 0) { - c = codePointFromValidUTF8(prevSrc, src); - } - char16_t jamos[3]; - if (!buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode)) { - return nullptr; - } - } else { - // The character decomposes, get everything from the variable-length extra data. - const uint16_t *mapping = getMapping(norm16); - uint16_t firstUnit = *mapping; - int32_t length = firstUnit & MAPPING_LENGTH_MASK; - uint8_t trailCC = (uint8_t)(firstUnit >> 8); - uint8_t leadCC; - if (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) { - leadCC = (uint8_t)(*(mapping-1) >> 8); - } else { - leadCC = 0; - } - if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) { - return nullptr; - } - } - if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - return src; - } - } - return src; -} - -const UChar * -Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const { - uint16_t norm16; - if(c>7)&1)-1; - uint16_t rm0=*rawMapping; - if(rm0<=MAPPING_LENGTH_MASK) { - length=rm0; - return (const UChar *)rawMapping-rm0; - } else { - // Copy the normal mapping and replace its first two code units with rm0. - buffer[0]=(UChar)rm0; - u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2); - length=mLength-1; - return buffer; - } - } else { - length=mLength; - return (const UChar *)mapping+1; - } -} - -void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit, - UBool doDecompose, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const { - buffer.copyReorderableSuffixTo(safeMiddle); - if(doDecompose) { - decompose(src, limit, &buffer, errorCode); - return; - } - // Just merge the strings at the boundary. - bool isFirst = true; - uint8_t firstCC = 0, prevCC = 0, cc; - const UChar *p = src; - while (p != limit) { - const UChar *codePointStart = p; - UChar32 c; - uint16_t norm16; - UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); - if ((cc = getCC(norm16)) == 0) { - p = codePointStart; - break; - } - if (isFirst) { - firstCC = cc; - isFirst = false; - } - prevCC = cc; - } - if(limit==NULL) { // appendZeroCC() needs limit!=NULL - limit=u_strchr(p, 0); - } - - if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) { - buffer.appendZeroCC(p, limit, errorCode); - } -} - -UBool Normalizer2Impl::hasDecompBoundaryBefore(UChar32 c) const { - return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) || - norm16HasDecompBoundaryBefore(getNorm16(c)); -} - -UBool Normalizer2Impl::norm16HasDecompBoundaryBefore(uint16_t norm16) const { - if (norm16 < minNoNoCompNoMaybeCC) { - return TRUE; - } - if (norm16 >= limitNoNo) { - return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; - } - // c decomposes, get everything from the variable-length extra data - const uint16_t *mapping=getMapping(norm16); - uint16_t firstUnit=*mapping; - // TRUE if leadCC==0 (hasFCDBoundaryBefore()) - return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0; -} - -UBool Normalizer2Impl::hasDecompBoundaryAfter(UChar32 c) const { - if (c < minDecompNoCP) { - return TRUE; - } - if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) { - return TRUE; - } - return norm16HasDecompBoundaryAfter(getNorm16(c)); -} - -UBool Normalizer2Impl::norm16HasDecompBoundaryAfter(uint16_t norm16) const { - if(norm16 <= minYesNo || isHangulLVT(norm16)) { - return TRUE; - } - if (norm16 >= limitNoNo) { - if (isMaybeOrNonZeroCC(norm16)) { - return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; - } - // Maps to an isCompYesAndZeroCC. - return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1; - } - // c decomposes, get everything from the variable-length extra data - const uint16_t *mapping=getMapping(norm16); - uint16_t firstUnit=*mapping; - // decomp after-boundary: same as hasFCDBoundaryAfter(), - // fcd16<=1 || trailCC==0 - if(firstUnit>0x1ff) { - return FALSE; // trailCC>1 - } - if(firstUnit<=0xff) { - return TRUE; // trailCC==0 - } - // if(trailCC==1) test leadCC==0, same as checking for before-boundary - // TRUE if leadCC==0 (hasFCDBoundaryBefore()) - return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0; -} - -/* - * Finds the recomposition result for - * a forward-combining "lead" character, - * specified with a pointer to its compositions list, - * and a backward-combining "trail" character. - * - * If the lead and trail characters combine, then this function returns - * the following "compositeAndFwd" value: - * Bits 21..1 composite character - * Bit 0 set if the composite is a forward-combining starter - * otherwise it returns -1. - * - * The compositions list has (trail, compositeAndFwd) pair entries, - * encoded as either pairs or triples of 16-bit units. - * The last entry has the high bit of its first unit set. - * - * The list is sorted by ascending trail characters (there are no duplicates). - * A linear search is used. - * - * See normalizer2impl.h for a more detailed description - * of the compositions list format. - */ -int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) { - uint16_t key1, firstUnit; - if(trail(firstUnit=*list)) { - list+=2+(firstUnit&COMP_1_TRIPLE); - } - if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { - if(firstUnit&COMP_1_TRIPLE) { - return ((int32_t)list[1]<<16)|list[2]; - } else { - return list[1]; - } - } - } else { - // trail character is 3400..10FFFF - // result entry has 3 units - key1=(uint16_t)(COMP_1_TRAIL_LIMIT+ - (((trail>>COMP_1_TRAIL_SHIFT))& - ~COMP_1_TRIPLE)); - uint16_t key2=(uint16_t)(trail<(firstUnit=*list)) { - list+=2+(firstUnit&COMP_1_TRIPLE); - } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { - if(key2>(secondUnit=list[1])) { - if(firstUnit&COMP_1_LAST_TUPLE) { - break; - } else { - list+=3; - } - } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { - return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2]; - } else { - break; - } - } else { - break; - } - } - } - return -1; -} - -/** - * @param list some character's compositions list - * @param set recursively receives the composites from these compositions - */ -void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const { - uint16_t firstUnit; - int32_t compositeAndFwd; - do { - firstUnit=*list; - if((firstUnit&COMP_1_TRIPLE)==0) { - compositeAndFwd=list[1]; - list+=2; - } else { - compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2]; - list+=3; - } - UChar32 composite=compositeAndFwd>>1; - if((compositeAndFwd&1)!=0) { - addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set); - } - set.add(composite); - } while((firstUnit&COMP_1_LAST_TUPLE)==0); -} - -/* - * Recomposes the buffer text starting at recomposeStartIndex - * (which is in NFD - decomposed and canonically ordered), - * and truncates the buffer contents. - * - * Note that recomposition never lengthens the text: - * Any character consists of either one or two code units; - * a composition may contain at most one more code unit than the original starter, - * while the combining mark that is removed has at least one code unit. - */ -void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, - UBool onlyContiguous) const { - UChar *p=buffer.getStart()+recomposeStartIndex; - UChar *limit=buffer.getLimit(); - if(p==limit) { - return; - } - - UChar *starter, *pRemove, *q, *r; - const uint16_t *compositionsList; - UChar32 c, compositeAndFwd; - uint16_t norm16; - uint8_t cc, prevCC; - UBool starterIsSupplementary; - - // Some of the following variables are not used until we have a forward-combining starter - // and are only initialized now to avoid compiler warnings. - compositionsList=NULL; // used as indicator for whether we have a forward-combining starter - starter=NULL; - starterIsSupplementary=FALSE; - prevCC=0; - - for(;;) { - UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); - cc=getCCFromYesOrMaybe(norm16); - if( // this character combines backward and - isMaybe(norm16) && - // we have seen a starter that combines forward and - compositionsList!=NULL && - // the backward-combining character is not blocked - (prevCC=0) { - // The starter and the combining mark (c) do combine. - UChar32 composite=compositeAndFwd>>1; - - // Replace the starter with the composite, remove the combining mark. - pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark - if(starterIsSupplementary) { - if(U_IS_SUPPLEMENTARY(composite)) { - // both are supplementary - starter[0]=U16_LEAD(composite); - starter[1]=U16_TRAIL(composite); - } else { - *starter=(UChar)composite; - // The composite is shorter than the starter, - // move the intermediate characters forward one. - starterIsSupplementary=FALSE; - q=starter+1; - r=q+1; - while(rminYesNo) { // composite 'a' has both mapping & compositions list - list+= // mapping pointer - 1+ // +1 to skip the first unit with the mapping length - (*list&MAPPING_LENGTH_MASK); // + mapping length - } - } - } else if(norm16>1; -#else - int32_t compositeAndFwd=combine(list, b); - return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL; -#endif -} - -// Very similar to composeQuickCheck(): Make the same changes in both places if relevant. -// doCompose: normalize -// !doCompose: isNormalized (buffer must be empty and initialized) -UBool -Normalizer2Impl::compose(const UChar *src, const UChar *limit, - UBool onlyContiguous, - UBool doCompose, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const { - const UChar *prevBoundary=src; - UChar32 minNoMaybeCP=minCompNoMaybeCP; - if(limit==NULL) { - src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, - doCompose ? &buffer : NULL, - errorCode); - if(U_FAILURE(errorCode)) { - return FALSE; - } - limit=u_strchr(src, 0); - if (prevBoundary != src) { - if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) { - prevBoundary = src; - } else { - buffer.removeSuffix(1); - prevBoundary = --src; - } - } - } - - for (;;) { - // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, - // or with (compYes && ccc==0) properties. - const UChar *prevSrc; - UChar32 c = 0; - uint16_t norm16 = 0; - for (;;) { - if (src == limit) { - if (prevBoundary != limit && doCompose) { - buffer.appendZeroCC(prevBoundary, limit, errorCode); - } - return TRUE; - } - if( (c=*src)=minNoNo. - // The current character is either a "noNo" (has a mapping) - // or a "maybeYes" (combines backward) - // or a "yesYes" with ccc!=0. - // It is not a Hangul syllable or Jamo L because those have "yes" properties. - - // Medium-fast path: Handle cases that do not require full decomposition and recomposition. - if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes - if (!doCompose) { - return FALSE; - } - // Fast path for mapping a character that is immediately surrounded by boundaries. - // In this case, we need not decompose around the current character. - if (isDecompNoAlgorithmic(norm16)) { - // Maps to a single isCompYesAndZeroCC character - // which also implies hasCompBoundaryBefore. - if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || - hasCompBoundaryBefore(src, limit)) { - if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { - break; - } - if(!buffer.append(mapAlgorithmic(c, norm16), 0, errorCode)) { - break; - } - prevBoundary = src; - continue; - } - } else if (norm16 < minNoNoCompBoundaryBefore) { - // The mapping is comp-normalized which also implies hasCompBoundaryBefore. - if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || - hasCompBoundaryBefore(src, limit)) { - if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { - break; - } - const UChar *mapping = reinterpret_cast(getMapping(norm16)); - int32_t length = *mapping++ & MAPPING_LENGTH_MASK; - if(!buffer.appendZeroCC(mapping, mapping + length, errorCode)) { - break; - } - prevBoundary = src; - continue; - } - } else if (norm16 >= minNoNoEmpty) { - // The current character maps to nothing. - // Simply omit it from the output if there is a boundary before _or_ after it. - // The character itself implies no boundaries. - if (hasCompBoundaryBefore(src, limit) || - hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) { - if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { - break; - } - prevBoundary = src; - continue; - } - } - // Other "noNo" type, or need to examine more text around this character: - // Fall through to the slow path. - } else if (isJamoVT(norm16) && prevBoundary != prevSrc) { - UChar prev=*(prevSrc-1); - if(c= 0) { - UChar32 syllable = Hangul::HANGUL_BASE + - (l*Hangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE)) * - Hangul::JAMO_T_COUNT + t; - --prevSrc; // Replace the Jamo L as well. - if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { - break; - } - if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) { - break; - } - prevBoundary = src; - continue; - } - // If we see L+V+x where x!=T then we drop to the slow path, - // decompose and recompose. - // This is to deal with NFKC finding normal L and V but a - // compatibility variant of a T. - // We need to either fully compose that combination here - // (which would complicate the code and may not work with strange custom data) - // or use the slow path. - } - } else if (Hangul::isHangulLV(prev)) { - // The current character is a Jamo Trailing consonant, - // compose with previous Hangul LV that does not contain a Jamo T. - if (!doCompose) { - return FALSE; - } - UChar32 syllable = prev + c - Hangul::JAMO_T_BASE; - --prevSrc; // Replace the Hangul LV as well. - if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { - break; - } - if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) { - break; - } - prevBoundary = src; - continue; - } - // No matching context, or may need to decompose surrounding text first: - // Fall through to the slow path. - } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC - // One or more combining marks that do not combine-back: - // Check for canonical order, copy unchanged if ok and - // if followed by a character with a boundary-before. - uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0 - if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) { - // Fails FCD test, need to decompose and contiguously recompose. - if (!doCompose) { - return FALSE; - } - } else { - // If !onlyContiguous (not FCC), then we ignore the tccc of - // the previous character which passed the quick check "yes && ccc==0" test. - const UChar *nextSrc; - uint16_t n16; - for (;;) { - if (src == limit) { - if (doCompose) { - buffer.appendZeroCC(prevBoundary, limit, errorCode); - } - return TRUE; - } - uint8_t prevCC = cc; - nextSrc = src; - UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16); - if (n16 >= MIN_YES_YES_WITH_CC) { - cc = getCCFromNormalYesOrMaybe(n16); - if (prevCC > cc) { - if (!doCompose) { - return FALSE; - } - break; - } - } else { - break; - } - src = nextSrc; - } - // src is after the last in-order combining mark. - // If there is a boundary here, then we continue with no change. - if (norm16HasCompBoundaryBefore(n16)) { - if (isCompYesAndZeroCC(n16)) { - src = nextSrc; - } - continue; - } - // Use the slow path. There is no boundary in [prevSrc, src[. - } - } - - // Slow path: Find the nearest boundaries around the current character, - // decompose and recompose. - if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { - const UChar *p = prevSrc; - UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16); - if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - prevSrc = p; - } - } - if (doCompose && prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { - break; - } - int32_t recomposeStartIndex=buffer.length(); - // We know there is not a boundary here. - decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous, - buffer, errorCode); - // Decompose until the next boundary. - src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous, - buffer, errorCode); - if (U_FAILURE(errorCode)) { - break; - } - if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return TRUE; - } - recompose(buffer, recomposeStartIndex, onlyContiguous); - if(!doCompose) { - if(!buffer.equals(prevSrc, src)) { - return FALSE; - } - buffer.remove(); - } - prevBoundary=src; - } - return TRUE; -} - -// Very similar to compose(): Make the same changes in both places if relevant. -// pQCResult==NULL: spanQuickCheckYes -// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES) -const UChar * -Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, - UBool onlyContiguous, - UNormalizationCheckResult *pQCResult) const { - const UChar *prevBoundary=src; - UChar32 minNoMaybeCP=minCompNoMaybeCP; - if(limit==NULL) { - UErrorCode errorCode=U_ZERO_ERROR; - src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode); - limit=u_strchr(src, 0); - if (prevBoundary != src) { - if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) { - prevBoundary = src; - } else { - prevBoundary = --src; - } - } - } - - for(;;) { - // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, - // or with (compYes && ccc==0) properties. - const UChar *prevSrc; - UChar32 c = 0; - uint16_t norm16 = 0; - for (;;) { - if(src==limit) { - return src; - } - if( (c=*src)=minNoNo. - // The current character is either a "noNo" (has a mapping) - // or a "maybeYes" (combines backward) - // or a "yesYes" with ccc!=0. - // It is not a Hangul syllable or Jamo L because those have "yes" properties. - - uint16_t prevNorm16 = INERT; - if (prevBoundary != prevSrc) { - if (norm16HasCompBoundaryBefore(norm16)) { - prevBoundary = prevSrc; - } else { - const UChar *p = prevSrc; - uint16_t n16; - UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16); - if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) { - prevBoundary = prevSrc; - } else { - prevBoundary = p; - prevNorm16 = n16; - } - } - } - - if(isMaybeOrNonZeroCC(norm16)) { - uint8_t cc=getCCFromYesOrMaybe(norm16); - if (onlyContiguous /* FCC */ && cc != 0 && - getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) { - // The [prevBoundary..prevSrc[ character - // passed the quick check "yes && ccc==0" test - // but is out of canonical order with the current combining mark. - } else { - // If !onlyContiguous (not FCC), then we ignore the tccc of - // the previous character which passed the quick check "yes && ccc==0" test. - const UChar *nextSrc; - for (;;) { - if (norm16 < MIN_YES_YES_WITH_CC) { - if (pQCResult != nullptr) { - *pQCResult = UNORM_MAYBE; - } else { - return prevBoundary; - } - } - if (src == limit) { - return src; - } - uint8_t prevCC = cc; - nextSrc = src; - UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16); - if (isMaybeOrNonZeroCC(norm16)) { - cc = getCCFromYesOrMaybe(norm16); - if (!(prevCC <= cc || cc == 0)) { - break; - } - } else { - break; - } - src = nextSrc; - } - // src is after the last in-order combining mark. - if (isCompYesAndZeroCC(norm16)) { - prevBoundary = src; - src = nextSrc; - continue; - } - } - } - if(pQCResult!=NULL) { - *pQCResult=UNORM_NO; - } - return prevBoundary; - } -} - -void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit, - UBool doCompose, - UBool onlyContiguous, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const { - if(!buffer.isEmpty()) { - const UChar *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous); - if(src!=firstStarterInSrc) { - const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(), - buffer.getLimit(), onlyContiguous); - int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest); - UnicodeString middle(lastStarterInDest, destSuffixLength); - buffer.removeSuffix(destSuffixLength); - safeMiddle=middle; - middle.append(src, (int32_t)(firstStarterInSrc-src)); - const UChar *middleStart=middle.getBuffer(); - compose(middleStart, middleStart+middle.length(), onlyContiguous, - TRUE, buffer, errorCode); - if(U_FAILURE(errorCode)) { - return; - } - src=firstStarterInSrc; - } - } - if(doCompose) { - compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); - } else { - if(limit==NULL) { // appendZeroCC() needs limit!=NULL - limit=u_strchr(src, 0); - } - buffer.appendZeroCC(src, limit, errorCode); - } -} - -UBool -Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous, - const uint8_t *src, const uint8_t *limit, - ByteSink *sink, Edits *edits, UErrorCode &errorCode) const { - U_ASSERT(limit != nullptr); - UnicodeString s16; - uint8_t minNoMaybeLead = leadByteForCP(minCompNoMaybeCP); - const uint8_t *prevBoundary = src; - - for (;;) { - // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, - // or with (compYes && ccc==0) properties. - const uint8_t *prevSrc; - uint16_t norm16 = 0; - for (;;) { - if (src == limit) { - if (prevBoundary != limit && sink != nullptr) { - ByteSinkUtil::appendUnchanged(prevBoundary, limit, - *sink, options, edits, errorCode); - } - return TRUE; - } - if (*src < minNoMaybeLead) { - ++src; - } else { - prevSrc = src; - UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); - if (!isCompYesAndZeroCC(norm16)) { - break; - } - } - } - // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. - // The current character is either a "noNo" (has a mapping) - // or a "maybeYes" (combines backward) - // or a "yesYes" with ccc!=0. - // It is not a Hangul syllable or Jamo L because those have "yes" properties. - - // Medium-fast path: Handle cases that do not require full decomposition and recomposition. - if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes - if (sink == nullptr) { - return FALSE; - } - // Fast path for mapping a character that is immediately surrounded by boundaries. - // In this case, we need not decompose around the current character. - if (isDecompNoAlgorithmic(norm16)) { - // Maps to a single isCompYesAndZeroCC character - // which also implies hasCompBoundaryBefore. - if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || - hasCompBoundaryBefore(src, limit)) { - if (prevBoundary != prevSrc && - !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, - *sink, options, edits, errorCode)) { - break; - } - appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits); - prevBoundary = src; - continue; - } - } else if (norm16 < minNoNoCompBoundaryBefore) { - // The mapping is comp-normalized which also implies hasCompBoundaryBefore. - if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || - hasCompBoundaryBefore(src, limit)) { - if (prevBoundary != prevSrc && - !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, - *sink, options, edits, errorCode)) { - break; - } - const uint16_t *mapping = getMapping(norm16); - int32_t length = *mapping++ & MAPPING_LENGTH_MASK; - if (!ByteSinkUtil::appendChange(prevSrc, src, (const UChar *)mapping, length, - *sink, edits, errorCode)) { - break; - } - prevBoundary = src; - continue; - } - } else if (norm16 >= minNoNoEmpty) { - // The current character maps to nothing. - // Simply omit it from the output if there is a boundary before _or_ after it. - // The character itself implies no boundaries. - if (hasCompBoundaryBefore(src, limit) || - hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) { - if (prevBoundary != prevSrc && - !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, - *sink, options, edits, errorCode)) { - break; - } - if (edits != nullptr) { - edits->addReplace((int32_t)(src - prevSrc), 0); - } - prevBoundary = src; - continue; - } - } - // Other "noNo" type, or need to examine more text around this character: - // Fall through to the slow path. - } else if (isJamoVT(norm16)) { - // Jamo L: E1 84 80..92 - // Jamo V: E1 85 A1..B5 - // Jamo T: E1 86 A8..E1 87 82 - U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1); - UChar32 prev = previousHangulOrJamo(prevBoundary, prevSrc); - if (prevSrc[1] == 0x85) { - // The current character is a Jamo Vowel, - // compose with previous Jamo L and following Jamo T. - UChar32 l = prev - Hangul::JAMO_L_BASE; - if ((uint32_t)l < Hangul::JAMO_L_COUNT) { - if (sink == nullptr) { - return FALSE; - } - int32_t t = getJamoTMinusBase(src, limit); - if (t >= 0) { - // The next character is a Jamo T. - src += 3; - } else if (hasCompBoundaryBefore(src, limit)) { - // No Jamo T follows, not even via decomposition. - t = 0; - } - if (t >= 0) { - UChar32 syllable = Hangul::HANGUL_BASE + - (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) * - Hangul::JAMO_T_COUNT + t; - prevSrc -= 3; // Replace the Jamo L as well. - if (prevBoundary != prevSrc && - !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, - *sink, options, edits, errorCode)) { - break; - } - ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits); - prevBoundary = src; - continue; - } - // If we see L+V+x where x!=T then we drop to the slow path, - // decompose and recompose. - // This is to deal with NFKC finding normal L and V but a - // compatibility variant of a T. - // We need to either fully compose that combination here - // (which would complicate the code and may not work with strange custom data) - // or use the slow path. - } - } else if (Hangul::isHangulLV(prev)) { - // The current character is a Jamo Trailing consonant, - // compose with previous Hangul LV that does not contain a Jamo T. - if (sink == nullptr) { - return FALSE; - } - UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src); - prevSrc -= 3; // Replace the Hangul LV as well. - if (prevBoundary != prevSrc && - !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, - *sink, options, edits, errorCode)) { - break; - } - ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits); - prevBoundary = src; - continue; - } - // No matching context, or may need to decompose surrounding text first: - // Fall through to the slow path. - } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC - // One or more combining marks that do not combine-back: - // Check for canonical order, copy unchanged if ok and - // if followed by a character with a boundary-before. - uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0 - if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) { - // Fails FCD test, need to decompose and contiguously recompose. - if (sink == nullptr) { - return FALSE; - } - } else { - // If !onlyContiguous (not FCC), then we ignore the tccc of - // the previous character which passed the quick check "yes && ccc==0" test. - const uint8_t *nextSrc; - uint16_t n16; - for (;;) { - if (src == limit) { - if (sink != nullptr) { - ByteSinkUtil::appendUnchanged(prevBoundary, limit, - *sink, options, edits, errorCode); - } - return TRUE; - } - uint8_t prevCC = cc; - nextSrc = src; - UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16); - if (n16 >= MIN_YES_YES_WITH_CC) { - cc = getCCFromNormalYesOrMaybe(n16); - if (prevCC > cc) { - if (sink == nullptr) { - return FALSE; - } - break; - } - } else { - break; - } - src = nextSrc; - } - // src is after the last in-order combining mark. - // If there is a boundary here, then we continue with no change. - if (norm16HasCompBoundaryBefore(n16)) { - if (isCompYesAndZeroCC(n16)) { - src = nextSrc; - } - continue; - } - // Use the slow path. There is no boundary in [prevSrc, src[. - } - } - - // Slow path: Find the nearest boundaries around the current character, - // decompose and recompose. - if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { - const uint8_t *p = prevSrc; - UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16); - if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - prevSrc = p; - } - } - ReorderingBuffer buffer(*this, s16, errorCode); - if (U_FAILURE(errorCode)) { - break; - } - // We know there is not a boundary here. - decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous, - buffer, errorCode); - // Decompose until the next boundary. - src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous, - buffer, errorCode); - if (U_FAILURE(errorCode)) { - break; - } - if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return TRUE; - } - recompose(buffer, 0, onlyContiguous); - if (!buffer.equals(prevSrc, src)) { - if (sink == nullptr) { - return FALSE; - } - if (prevBoundary != prevSrc && - !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, - *sink, options, edits, errorCode)) { - break; - } - if (!ByteSinkUtil::appendChange(prevSrc, src, buffer.getStart(), buffer.length(), - *sink, edits, errorCode)) { - break; - } - prevBoundary = src; - } - } - return TRUE; -} - -UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limit) const { - if (src == limit || *src < minCompNoMaybeCP) { - return TRUE; - } - UChar32 c; - uint16_t norm16; - UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16); - return norm16HasCompBoundaryBefore(norm16); -} - -UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const { - if (src == limit) { - return TRUE; - } - uint16_t norm16; - UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); - return norm16HasCompBoundaryBefore(norm16); -} - -UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p, - UBool onlyContiguous) const { - if (start == p) { - return TRUE; - } - UChar32 c; - uint16_t norm16; - UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); - return norm16HasCompBoundaryAfter(norm16, onlyContiguous); -} - -UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p, - UBool onlyContiguous) const { - if (start == p) { - return TRUE; - } - uint16_t norm16; - UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16); - return norm16HasCompBoundaryAfter(norm16, onlyContiguous); -} - -const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p, - UBool onlyContiguous) const { - while (p != start) { - const UChar *codePointLimit = p; - UChar32 c; - uint16_t norm16; - UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); - if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - return codePointLimit; - } - if (hasCompBoundaryBefore(c, norm16)) { - return p; - } - } - return p; -} - -const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit, - UBool onlyContiguous) const { - while (p != limit) { - const UChar *codePointStart = p; - UChar32 c; - uint16_t norm16; - UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); - if (hasCompBoundaryBefore(c, norm16)) { - return codePointStart; - } - if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - return p; - } - } - return p; -} - -uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const { - if (start == p) { - return 0; - } - int32_t i = (int32_t)(p - start); - UChar32 c; - U16_PREV(start, 0, i, c); - return (uint8_t)getFCD16(c); -} - -uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const { - if (start == p) { - return 0; - } - int32_t i = (int32_t)(p - start); - UChar32 c; - U8_PREV(start, 0, i, c); - return (uint8_t)getFCD16(c); -} - -// Note: normalizer2impl.cpp r30982 (2011-nov-27) -// still had getFCDTrie() which built and cached an FCD trie. -// That provided faster access to FCD data than getFCD16FromNormData() -// but required synchronization and consumed some 10kB of heap memory -// in any process that uses FCD (e.g., via collation). -// minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance, -// at least for ASCII & CJK. - -// Gets the FCD value from the regular normalization data. -uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const { - uint16_t norm16=getNorm16(c); - if (norm16 >= limitNoNo) { - if(norm16>=MIN_NORMAL_MAYBE_YES) { - // combining mark - norm16=getCCFromNormalYesOrMaybe(norm16); - return norm16|(norm16<<8); - } else if(norm16>=minMaybeYes) { - return 0; - } else { // isDecompNoAlgorithmic(norm16) - uint16_t deltaTrailCC = norm16 & DELTA_TCCC_MASK; - if (deltaTrailCC <= DELTA_TCCC_1) { - return deltaTrailCC >> OFFSET_SHIFT; - } - // Maps to an isCompYesAndZeroCC. - c=mapAlgorithmic(c, norm16); - norm16=getRawNorm16(c); - } - } - if(norm16<=minYesNo || isHangulLVT(norm16)) { - // no decomposition or Hangul syllable, all zeros - return 0; - } - // c decomposes, get everything from the variable-length extra data - const uint16_t *mapping=getMapping(norm16); - uint16_t firstUnit=*mapping; - norm16=firstUnit>>8; // tccc - if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { - norm16|=*(mapping-1)&0xff00; // lccc - } - return norm16; -} - -// Dual functionality: -// buffer!=NULL: normalize -// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes -const UChar * -Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit, - ReorderingBuffer *buffer, - UErrorCode &errorCode) const { - // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. - // Similar to the prevBoundary in the compose() implementation. - const UChar *prevBoundary=src; - int32_t prevFCD16=0; - if(limit==NULL) { - src=copyLowPrefixFromNulTerminated(src, minLcccCP, buffer, errorCode); - if(U_FAILURE(errorCode)) { - return src; - } - if(prevBoundary1) { - --prevBoundary; - } - } - limit=u_strchr(src, 0); - } - - // Note: In this function we use buffer->appendZeroCC() because we track - // the lead and trail combining classes here, rather than leaving it to - // the ReorderingBuffer. - // The exception is the call to decomposeShort() which uses the buffer - // in the normal way. - - const UChar *prevSrc; - UChar32 c=0; - uint16_t fcd16=0; - - for(;;) { - // count code units with lccc==0 - for(prevSrc=src; src!=limit;) { - if((c=*src)appendZeroCC(prevSrc, src, errorCode)) { - break; - } - if(src==limit) { - break; - } - prevBoundary=src; - // We know that the previous character's lccc==0. - if(prevFCD16<0) { - // Fetching the fcd16 value was deferred for this below-minLcccCP code point. - UChar32 prev=~prevFCD16; - if(prev1) { - --prevBoundary; - } - } - } else { - const UChar *p=src-1; - if(U16_IS_TRAIL(*p) && prevSrc

1) { - prevBoundary=p; - } - } - // The start of the current character (c). - prevSrc=src; - } else if(src==limit) { - break; - } - - src+=U16_LENGTH(c); - // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. - // Check for proper order, and decompose locally if necessary. - if((prevFCD16&0xff)<=(fcd16>>8)) { - // proper order: prev tccc <= current lccc - if((fcd16&0xff)<=1) { - prevBoundary=src; - } - if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) { - break; - } - prevFCD16=fcd16; - continue; - } else if(buffer==NULL) { - return prevBoundary; // quick check "no" - } else { - /* - * Back out the part of the source that we copied or appended - * already but is now going to be decomposed. - * prevSrc is set to after what was copied/appended. - */ - buffer->removeSuffix((int32_t)(prevSrc-prevBoundary)); - /* - * Find the part of the source that needs to be decomposed, - * up to the next safe boundary. - */ - src=findNextFCDBoundary(src, limit); - /* - * The source text does not fulfill the conditions for FCD. - * Decompose and reorder a limited piece of the text. - */ - decomposeShort(prevBoundary, src, FALSE, FALSE, *buffer, errorCode); - if (U_FAILURE(errorCode)) { - break; - } - prevBoundary=src; - prevFCD16=0; - } - } - return src; -} - -void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit, - UBool doMakeFCD, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const { - if(!buffer.isEmpty()) { - const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit); - if(src!=firstBoundaryInSrc) { - const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(), - buffer.getLimit()); - int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest); - UnicodeString middle(lastBoundaryInDest, destSuffixLength); - buffer.removeSuffix(destSuffixLength); - safeMiddle=middle; - middle.append(src, (int32_t)(firstBoundaryInSrc-src)); - const UChar *middleStart=middle.getBuffer(); - makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode); - if(U_FAILURE(errorCode)) { - return; - } - src=firstBoundaryInSrc; - } - } - if(doMakeFCD) { - makeFCD(src, limit, &buffer, errorCode); - } else { - if(limit==NULL) { // appendZeroCC() needs limit!=NULL - limit=u_strchr(src, 0); - } - buffer.appendZeroCC(src, limit, errorCode); - } -} - -const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const { - while(startadd(firstOrigin); - } - } else { - set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)]; - } - set->add(origin); - } -} - -// C++ class for friend access to private Normalizer2Impl members. -class InitCanonIterData { -public: - static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode); -}; - -U_CDECL_BEGIN - -// UInitOnce instantiation function for CanonIterData -static void U_CALLCONV -initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) { - InitCanonIterData::doInit(impl, errorCode); -} - -U_CDECL_END - -void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { - U_ASSERT(impl->fCanonIterData == NULL); - impl->fCanonIterData = new CanonIterData(errorCode); - if (impl->fCanonIterData == NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } - if (U_SUCCESS(errorCode)) { - UChar32 start = 0, end; - uint32_t value; - while ((end = ucptrie_getRange(impl->normTrie, start, - UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT, - nullptr, nullptr, &value)) >= 0) { - // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. - if (value != Normalizer2Impl::INERT) { - impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode); - } - start = end + 1; - } -#ifdef UCPTRIE_DEBUG - umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData"); -#endif - impl->fCanonIterData->trie = umutablecptrie_buildImmutable( - impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode); - umutablecptrie_close(impl->fCanonIterData->mutableTrie); - impl->fCanonIterData->mutableTrie = nullptr; - } - if (U_FAILURE(errorCode)) { - delete impl->fCanonIterData; - impl->fCanonIterData = NULL; - } -} - -void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16, - CanonIterData &newData, - UErrorCode &errorCode) const { - if(isInert(norm16) || (minYesNo<=norm16 && norm16 minYesNo) { - // c decomposes, get everything from the variable-length extra data - const uint16_t *mapping=getMapping(norm16_2); - uint16_t firstUnit=*mapping; - int32_t length=firstUnit&MAPPING_LENGTH_MASK; - if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { - if(c==c2 && (*(mapping-1)&0xff)!=0) { - newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0 - } - } - // Skip empty mappings (no characters in the decomposition). - if(length!=0) { - ++mapping; // skip over the firstUnit - // add c to first code point's start set - int32_t i=0; - U16_NEXT_UNSAFE(mapping, i, c2); - newData.addToStartSet(c, c2, errorCode); - // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a - // one-way mapping. A 2-way mapping is possible here after - // intermediate algorithmic mapping. - if(norm16_2>=minNoNo) { - while(i(this); - umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode); - return U_SUCCESS(errorCode); -} - -int32_t Normalizer2Impl::getCanonValue(UChar32 c) const { - return (int32_t)ucptrie_get(fCanonIterData->trie, c); -} - -const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const { - return *(const UnicodeSet *)fCanonIterData->canonStartSets[n]; -} - -UBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const { - return getCanonValue(c)>=0; -} - -UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const { - int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER; - if(canonValue==0) { - return FALSE; - } - set.clear(); - int32_t value=canonValue&CANON_VALUE_MASK; - if((canonValue&CANON_HAS_SET)!=0) { - set.addAll(getCanonStartSet(value)); - } else if(value!=0) { - set.add(value); - } - if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { - uint16_t norm16=getRawNorm16(c); - if(norm16==JAMO_L) { - UChar32 syllable= - (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT); - set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1); - } else { - addComposites(getCompositionsList(norm16), set); - } - } - return TRUE; -} - -U_NAMESPACE_END - -// Normalizer2 data swapping ----------------------------------------------- *** - -U_NAMESPACE_USE - -U_CAPI int32_t U_EXPORT2 -unorm2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - const int32_t *inIndexes; - int32_t indexes[Normalizer2Impl::IX_TOTAL_SIZE+1]; - - int32_t i, offset, nextOffset, size; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - uint8_t formatVersion0=pInfo->formatVersion[0]; - if(!( - pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ - pInfo->dataFormat[1]==0x72 && - pInfo->dataFormat[2]==0x6d && - pInfo->dataFormat[3]==0x32 && - (1<=formatVersion0 && formatVersion0<=4) - )) { - udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - inIndexes=(const int32_t *)inBytes; - int32_t minIndexesLength; - if(formatVersion0==1) { - minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1; - } else if(formatVersion0==2) { - minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1; - } else { - minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1; - } - - if(length>=0) { - length-=headerSize; - if(length=0) { - if(lengthswapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); - offset=nextOffset; - - /* swap the trie */ - nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]; - utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); - offset=nextOffset; - - /* swap the uint16_t extraData[] */ - nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET]; - ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); - offset=nextOffset; - - /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */ - nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1]; - offset=nextOffset; - - U_ASSERT(offset==size); - } - - return headerSize+size; -} - -#endif // !UCONFIG_NO_NORMALIZATION diff --git a/deps/node/deps/icu-small/source/common/normalizer2impl.h b/deps/node/deps/icu-small/source/common/normalizer2impl.h deleted file mode 100644 index 2e6aff30..00000000 --- a/deps/node/deps/icu-small/source/common/normalizer2impl.h +++ /dev/null @@ -1,978 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: normalizer2impl.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009nov22 -* created by: Markus W. Scherer -*/ - -#ifndef __NORMALIZER2IMPL_H__ -#define __NORMALIZER2IMPL_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/normalizer2.h" -#include "unicode/ucptrie.h" -#include "unicode/unistr.h" -#include "unicode/unorm.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "mutex.h" -#include "udataswp.h" -#include "uset_imp.h" - -// When the nfc.nrm data is *not* hardcoded into the common library -// (with this constant set to 0), -// then it needs to be built into the data package: -// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT -#define NORM2_HARDCODE_NFC_DATA 1 - -U_NAMESPACE_BEGIN - -struct CanonIterData; - -class ByteSink; -class Edits; -class InitCanonIterData; -class LcccContext; - -class U_COMMON_API Hangul { -public: - /* Korean Hangul and Jamo constants */ - enum { - JAMO_L_BASE=0x1100, /* "lead" jamo */ - JAMO_L_END=0x1112, - JAMO_V_BASE=0x1161, /* "vowel" jamo */ - JAMO_V_END=0x1175, - JAMO_T_BASE=0x11a7, /* "trail" jamo */ - JAMO_T_END=0x11c2, - - HANGUL_BASE=0xac00, - HANGUL_END=0xd7a3, - - JAMO_L_COUNT=19, - JAMO_V_COUNT=21, - JAMO_T_COUNT=28, - - JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT, - - HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT, - HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT - }; - - static inline UBool isHangul(UChar32 c) { - return HANGUL_BASE<=c && c=MIN_NORMAL_MAYBE_YES) { - return getCCFromNormalYesOrMaybe(norm16); - } - if(norm16> OFFSET_SHIFT); - } - static uint8_t getCCFromYesOrMaybe(uint16_t norm16) { - return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0; - } - uint8_t getCCFromYesOrMaybeCP(UChar32 c) const { - if (c < minCompNoMaybeCP) { return 0; } - return getCCFromYesOrMaybe(getNorm16(c)); - } - - /** - * Returns the FCD data for code point c. - * @param c A Unicode code point. - * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. - */ - uint16_t getFCD16(UChar32 c) const { - if(c>8]; - if(bits==0) { return false; } - return (UBool)((bits>>((lead>>5)&7))&1); - } - /** Returns the FCD value from the regular normalization data. */ - uint16_t getFCD16FromNormData(UChar32 c) const; - - /** - * Gets the decomposition for one code point. - * @param c code point - * @param buffer out-only buffer for algorithmic decompositions - * @param length out-only, takes the length of the decomposition, if any - * @return pointer to the decomposition, or NULL if none - */ - const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const; - - /** - * Gets the raw decomposition for one code point. - * @param c code point - * @param buffer out-only buffer for algorithmic decompositions - * @param length out-only, takes the length of the decomposition, if any - * @return pointer to the decomposition, or NULL if none - */ - const UChar *getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const; - - UChar32 composePair(UChar32 a, UChar32 b) const; - - UBool isCanonSegmentStarter(UChar32 c) const; - UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const; - - enum { - // Fixed norm16 values. - MIN_YES_YES_WITH_CC=0xfe02, - JAMO_VT=0xfe00, - MIN_NORMAL_MAYBE_YES=0xfc00, - JAMO_L=2, // offset=1 hasCompBoundaryAfter=FALSE - INERT=1, // offset=0 hasCompBoundaryAfter=TRUE - - // norm16 bit 0 is comp-boundary-after. - HAS_COMP_BOUNDARY_AFTER=1, - OFFSET_SHIFT=1, - - // For algorithmic one-way mappings, norm16 bits 2..1 indicate the - // tccc (0, 1, >1) for quick FCC boundary-after tests. - DELTA_TCCC_0=0, - DELTA_TCCC_1=2, - DELTA_TCCC_GT_1=4, - DELTA_TCCC_MASK=6, - DELTA_SHIFT=3, - - MAX_DELTA=0x40 - }; - - enum { - // Byte offsets from the start of the data, after the generic header. - IX_NORM_TRIE_OFFSET, - IX_EXTRA_DATA_OFFSET, - IX_SMALL_FCD_OFFSET, - IX_RESERVED3_OFFSET, - IX_RESERVED4_OFFSET, - IX_RESERVED5_OFFSET, - IX_RESERVED6_OFFSET, - IX_TOTAL_SIZE, - - // Code point thresholds for quick check codes. - IX_MIN_DECOMP_NO_CP, - IX_MIN_COMP_NO_MAYBE_CP, - - // Norm16 value thresholds for quick check combinations and types of extra data. - - /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */ - IX_MIN_YES_NO, - /** Mappings are comp-normalized. */ - IX_MIN_NO_NO, - IX_LIMIT_NO_NO, - IX_MIN_MAYBE_YES, - - /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */ - IX_MIN_YES_NO_MAPPINGS_ONLY, - /** Mappings are not comp-normalized but have a comp boundary before. */ - IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE, - /** Mappings do not have a comp boundary before. */ - IX_MIN_NO_NO_COMP_NO_MAYBE_CC, - /** Mappings to the empty string. */ - IX_MIN_NO_NO_EMPTY, - - IX_MIN_LCCC_CP, - IX_RESERVED19, - IX_COUNT - }; - - enum { - MAPPING_HAS_CCC_LCCC_WORD=0x80, - MAPPING_HAS_RAW_MAPPING=0x40, - // unused bit 0x20, - MAPPING_LENGTH_MASK=0x1f - }; - - enum { - COMP_1_LAST_TUPLE=0x8000, - COMP_1_TRIPLE=1, - COMP_1_TRAIL_LIMIT=0x3400, - COMP_1_TRAIL_MASK=0x7ffe, - COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit - COMP_2_TRAIL_SHIFT=6, - COMP_2_TRAIL_MASK=0xffc0 - }; - - // higher-level functionality ------------------------------------------ *** - - // NFD without an NFD Normalizer2 instance. - UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest, - UErrorCode &errorCode) const; - /** - * Decomposes [src, limit[ and writes the result to dest. - * limit can be NULL if src is NUL-terminated. - * destLengthEstimate is the initial dest buffer capacity and can be -1. - */ - void decompose(const UChar *src, const UChar *limit, - UnicodeString &dest, int32_t destLengthEstimate, - UErrorCode &errorCode) const; - - const UChar *decompose(const UChar *src, const UChar *limit, - ReorderingBuffer *buffer, UErrorCode &errorCode) const; - void decomposeAndAppend(const UChar *src, const UChar *limit, - UBool doDecompose, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; - UBool compose(const UChar *src, const UChar *limit, - UBool onlyContiguous, - UBool doCompose, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; - const UChar *composeQuickCheck(const UChar *src, const UChar *limit, - UBool onlyContiguous, - UNormalizationCheckResult *pQCResult) const; - void composeAndAppend(const UChar *src, const UChar *limit, - UBool doCompose, - UBool onlyContiguous, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; - - /** sink==nullptr: isNormalized() */ - UBool composeUTF8(uint32_t options, UBool onlyContiguous, - const uint8_t *src, const uint8_t *limit, - ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const; - - const UChar *makeFCD(const UChar *src, const UChar *limit, - ReorderingBuffer *buffer, UErrorCode &errorCode) const; - void makeFCDAndAppend(const UChar *src, const UChar *limit, - UBool doMakeFCD, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; - - UBool hasDecompBoundaryBefore(UChar32 c) const; - UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const; - UBool hasDecompBoundaryAfter(UChar32 c) const; - UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const; - UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); } - - UBool hasCompBoundaryBefore(UChar32 c) const { - return c=minMaybeYes; } - static UBool isInert(uint16_t norm16) { return norm16==INERT; } - static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; } - static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; } - uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; } - UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; } - UBool isHangulLVT(uint16_t norm16) const { - return norm16==hangulLVT(); - } - UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16=MIN_YES_YES_WITH_CC || norm16=limitNoNo; } - - // For use with isCompYes(). - // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. - // static uint8_t getCCFromYes(uint16_t norm16) { - // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0; - // } - uint8_t getCCFromNoNo(uint16_t norm16) const { - const uint16_t *mapping=getMapping(norm16); - if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) { - return (uint8_t)*(mapping-1); - } else { - return 0; - } - } - // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC() - uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const { - if(norm16<=minYesNo) { - return 0; // yesYes and Hangul LV have ccc=tccc=0 - } else { - // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here. - return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo - } - } - uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const; - uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const; - - // Requires algorithmic-NoNo. - UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const { - return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta; - } - UChar32 getAlgorithmicDelta(uint16_t norm16) const { - return (norm16>>DELTA_SHIFT)-centerNoNoDelta; - } - - // Requires minYesNo>OFFSET_SHIFT); } - const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const { - if(norm16>OFFSET_SHIFT); - } - /** - * @param c code point must have compositions - * @return compositions list pointer - */ - const uint16_t *getCompositionsList(uint16_t norm16) const { - return isDecompYes(norm16) ? - getCompositionsListForDecompYes(norm16) : - getCompositionsListForComposite(norm16); - } - - const UChar *copyLowPrefixFromNulTerminated(const UChar *src, - UChar32 minNeedDataCP, - ReorderingBuffer *buffer, - UErrorCode &errorCode) const; - const UChar *decomposeShort(const UChar *src, const UChar *limit, - UBool stopAtCompBoundary, UBool onlyContiguous, - ReorderingBuffer &buffer, UErrorCode &errorCode) const; - UBool decompose(UChar32 c, uint16_t norm16, - ReorderingBuffer &buffer, UErrorCode &errorCode) const; - - const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit, - UBool stopAtCompBoundary, UBool onlyContiguous, - ReorderingBuffer &buffer, UErrorCode &errorCode) const; - - static int32_t combine(const uint16_t *list, UChar32 trail); - void addComposites(const uint16_t *list, UnicodeSet &set) const; - void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, - UBool onlyContiguous) const; - - UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const { - return cclone()), - currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), - buffer(copy.buffer), bufferPos(copy.bufferPos) -{ - init(); -} - -void -Normalizer::init() { - UErrorCode errorCode=U_ZERO_ERROR; - fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); - if(fOptions&UNORM_UNICODE_3_2) { - delete fFilteredNorm2; - fNorm2=fFilteredNorm2= - new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); - } - if(U_FAILURE(errorCode)) { - errorCode=U_ZERO_ERROR; - fNorm2=Normalizer2Factory::getNoopInstance(errorCode); - } -} - -Normalizer::~Normalizer() -{ - delete fFilteredNorm2; - delete text; -} - -Normalizer* -Normalizer::clone() const -{ - return new Normalizer(*this); -} - -/** - * Generates a hash code for this iterator. - */ -int32_t Normalizer::hashCode() const -{ - return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; -} - -UBool Normalizer::operator==(const Normalizer& that) const -{ - return - this==&that || - (fUMode==that.fUMode && - fOptions==that.fOptions && - *text==*that.text && - buffer==that.buffer && - bufferPos==that.bufferPos && - nextIndex==that.nextIndex); -} - -//------------------------------------------------------------------------- -// Static utility methods -//------------------------------------------------------------------------- - -void U_EXPORT2 -Normalizer::normalize(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UnicodeString& result, - UErrorCode &status) { - if(source.isBogus() || U_FAILURE(status)) { - result.setToBogus(); - if(U_SUCCESS(status)) { - status=U_ILLEGAL_ARGUMENT_ERROR; - } - } else { - UnicodeString localDest; - UnicodeString *dest; - - if(&source!=&result) { - dest=&result; - } else { - // the source and result strings are the same object, use a temporary one - dest=&localDest; - } - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); - if(U_SUCCESS(status)) { - if(options&UNORM_UNICODE_3_2) { - FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). - normalize(source, *dest, status); - } else { - n2->normalize(source, *dest, status); - } - } - if(dest==&localDest && U_SUCCESS(status)) { - result=*dest; - } - } -} - -void U_EXPORT2 -Normalizer::compose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status) { - normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); -} - -void U_EXPORT2 -Normalizer::decompose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status) { - normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); -} - -UNormalizationCheckResult -Normalizer::quickCheck(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); - if(U_SUCCESS(status)) { - if(options&UNORM_UNICODE_3_2) { - return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). - quickCheck(source, status); - } else { - return n2->quickCheck(source, status); - } - } else { - return UNORM_MAYBE; - } -} - -UBool -Normalizer::isNormalized(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); - if(U_SUCCESS(status)) { - if(options&UNORM_UNICODE_3_2) { - return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). - isNormalized(source, status); - } else { - return n2->isNormalized(source, status); - } - } else { - return FALSE; - } -} - -UnicodeString & U_EXPORT2 -Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right, - UnicodeString &result, - UNormalizationMode mode, int32_t options, - UErrorCode &errorCode) { - if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) { - result.setToBogus(); - if(U_SUCCESS(errorCode)) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - } else { - UnicodeString localDest; - UnicodeString *dest; - - if(&right!=&result) { - dest=&result; - } else { - // the right and result strings are the same object, use a temporary one - dest=&localDest; - } - *dest=left; - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); - if(U_SUCCESS(errorCode)) { - if(options&UNORM_UNICODE_3_2) { - FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). - append(*dest, right, errorCode); - } else { - n2->append(*dest, right, errorCode); - } - } - if(dest==&localDest && U_SUCCESS(errorCode)) { - result=*dest; - } - } - return result; -} - -//------------------------------------------------------------------------- -// Iteration API -//------------------------------------------------------------------------- - -/** - * Return the current character in the normalized text. - */ -UChar32 Normalizer::current() { - if(bufferPos0 || previousNormalize()) { - UChar32 c=buffer.char32At(bufferPos-1); - bufferPos-=U16_LENGTH(c); - return c; - } else { - return DONE; - } -} - -void Normalizer::reset() { - currentIndex=nextIndex=text->setToStart(); - clearBuffer(); -} - -void -Normalizer::setIndexOnly(int32_t index) { - text->setIndex(index); // pins index - currentIndex=nextIndex=text->getIndex(); - clearBuffer(); -} - -/** - * Return the first character in the normalized text. This resets - * the Normalizer's position to the beginning of the text. - */ -UChar32 Normalizer::first() { - reset(); - return next(); -} - -/** - * Return the last character in the normalized text. This resets - * the Normalizer's position to be just before the - * the input text corresponding to that normalized character. - */ -UChar32 Normalizer::last() { - currentIndex=nextIndex=text->setToEnd(); - clearBuffer(); - return previous(); -} - -/** - * Retrieve the current iteration position in the input text that is - * being normalized. This method is useful in applications such as - * searching, where you need to be able to determine the position in - * the input text that corresponds to a given normalized output character. - *

- * Note: This method sets the position in the input, while - * {@link #next} and {@link #previous} iterate through characters in the - * output. This means that there is not necessarily a one-to-one - * correspondence between characters returned by next and - * previous and the indices passed to and returned from - * setIndex and {@link #getIndex}. - * - */ -int32_t Normalizer::getIndex() const { - if(bufferPosCharacterIterator or the start (i.e. 0) of the String - * over which this Normalizer is iterating - */ -int32_t Normalizer::startIndex() const { - return text->startIndex(); -} - -/** - * Retrieve the index of the end of the input text. This is the end index - * of the CharacterIterator or the length of the String - * over which this Normalizer is iterating - */ -int32_t Normalizer::endIndex() const { - return text->endIndex(); -} - -//------------------------------------------------------------------------- -// Property access methods -//------------------------------------------------------------------------- - -void -Normalizer::setMode(UNormalizationMode newMode) -{ - fUMode = newMode; - init(); -} - -UNormalizationMode -Normalizer::getUMode() const -{ - return fUMode; -} - -void -Normalizer::setOption(int32_t option, - UBool value) -{ - if (value) { - fOptions |= option; - } else { - fOptions &= (~option); - } - init(); -} - -UBool -Normalizer::getOption(int32_t option) const -{ - return (fOptions & option) != 0; -} - -/** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning of the input text. - */ -void -Normalizer::setText(const UnicodeString& newText, - UErrorCode &status) -{ - if (U_FAILURE(status)) { - return; - } - CharacterIterator *newIter = new StringCharacterIterator(newText); - if (newIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - delete text; - text = newIter; - reset(); -} - -/** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning of the string. - */ -void -Normalizer::setText(const CharacterIterator& newText, - UErrorCode &status) -{ - if (U_FAILURE(status)) { - return; - } - CharacterIterator *newIter = newText.clone(); - if (newIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - delete text; - text = newIter; - reset(); -} - -void -Normalizer::setText(ConstChar16Ptr newText, - int32_t length, - UErrorCode &status) -{ - if (U_FAILURE(status)) { - return; - } - CharacterIterator *newIter = new UCharCharacterIterator(newText, length); - if (newIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - delete text; - text = newIter; - reset(); -} - -/** - * Copies the text under iteration into the UnicodeString referred to by "result". - * @param result Receives a copy of the text under iteration. - */ -void -Normalizer::getText(UnicodeString& result) -{ - text->getText(result); -} - -//------------------------------------------------------------------------- -// Private utility methods -//------------------------------------------------------------------------- - -void Normalizer::clearBuffer() { - buffer.remove(); - bufferPos=0; -} - -UBool -Normalizer::nextNormalize() { - clearBuffer(); - currentIndex=nextIndex; - text->setIndex(nextIndex); - if(!text->hasNext()) { - return FALSE; - } - // Skip at least one character so we make progress. - UnicodeString segment(text->next32PostInc()); - while(text->hasNext()) { - UChar32 c; - if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { - text->move32(-1, CharacterIterator::kCurrent); - break; - } - segment.append(c); - } - nextIndex=text->getIndex(); - UErrorCode errorCode=U_ZERO_ERROR; - fNorm2->normalize(segment, buffer, errorCode); - return U_SUCCESS(errorCode) && !buffer.isEmpty(); -} - -UBool -Normalizer::previousNormalize() { - clearBuffer(); - nextIndex=currentIndex; - text->setIndex(currentIndex); - if(!text->hasPrevious()) { - return FALSE; - } - UnicodeString segment; - while(text->hasPrevious()) { - UChar32 c=text->previous32(); - segment.insert(0, c); - if(fNorm2->hasBoundaryBefore(c)) { - break; - } - } - currentIndex=text->getIndex(); - UErrorCode errorCode=U_ZERO_ERROR; - fNorm2->normalize(segment, buffer, errorCode); - bufferPos=buffer.length(); - return U_SUCCESS(errorCode) && !buffer.isEmpty(); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/deps/node/deps/icu-small/source/common/parsepos.cpp b/deps/node/deps/icu-small/source/common/parsepos.cpp deleted file mode 100644 index 56c6c788..00000000 --- a/deps/node/deps/icu-small/source/common/parsepos.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2003-2003, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "unicode/parsepos.h" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ParsePosition) - -ParsePosition::~ParsePosition() {} - -ParsePosition * -ParsePosition::clone() const { - return new ParsePosition(*this); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/patternprops.cpp b/deps/node/deps/icu-small/source/common/patternprops.cpp deleted file mode 100644 index 01e33ce1..00000000 --- a/deps/node/deps/icu-small/source/common/patternprops.cpp +++ /dev/null @@ -1,220 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: patternprops.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011mar13 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "patternprops.h" - -U_NAMESPACE_BEGIN - -/* - * One byte per Latin-1 character. - * Bit 0 is set if either Pattern property is true, - * bit 1 if Pattern_Syntax is true, - * bit 2 if Pattern_White_Space is true. - * That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5. - */ -static const uint8_t latin1[256]={ - // WS: 9..D - 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // WS: 20 Syntax: 21..2F - 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - // Syntax: 3A..40 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // Syntax: 5B..5E - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, - // Syntax: 60 - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // Syntax: 7B..7E - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, - // WS: 85 - 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // Syntax: A1..A7, A9, AB, AC, AE - 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0, - // Syntax: B0, B1, B6, BB, BF - 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // Syntax: D7 - 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // Syntax: F7 - 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* - * One byte per 32 characters from U+2000..U+303F indexing into - * a small table of 32-bit data words. - * The first two data words are all-zeros and all-ones. - */ -static const uint8_t index2000[130]={ - 2, 3, 4, 0, 0, 0, 0, 0, // 20xx - 0, 0, 0, 0, 5, 1, 1, 1, // 21xx - 1, 1, 1, 1, 1, 1, 1, 1, // 22xx - 1, 1, 1, 1, 1, 1, 1, 1, // 23xx - 1, 1, 1, 0, 0, 0, 0, 0, // 24xx - 1, 1, 1, 1, 1, 1, 1, 1, // 25xx - 1, 1, 1, 1, 1, 1, 1, 1, // 26xx - 1, 1, 1, 6, 7, 1, 1, 1, // 27xx - 1, 1, 1, 1, 1, 1, 1, 1, // 28xx - 1, 1, 1, 1, 1, 1, 1, 1, // 29xx - 1, 1, 1, 1, 1, 1, 1, 1, // 2Axx - 1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx - 0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx - 0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx - 1, 1, 1, 1, 0, 0, 0, 0, // 2Exx - 0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx - 8, 9 // 3000..303F -}; - -/* - * One 32-bit integer per 32 characters. Ranges of all-false and all-true - * are mapped to the first two values, other ranges map to appropriate bit patterns. - */ -static const uint32_t syntax2000[]={ - 0, - 0xffffffff, - 0xffff0000, // 2: 2010..201F - 0x7fff00ff, // 3: 2020..2027, 2030..203E - 0x7feffffe, // 4: 2041..2053, 2055..205E - 0xffff0000, // 5: 2190..219F - 0x003fffff, // 6: 2760..2775 - 0xfff00000, // 7: 2794..279F - 0xffffff0e, // 8: 3001..3003, 3008..301F - 0x00010001 // 9: 3020, 3030 -}; - -/* - * Same as syntax2000, but with additional bits set for the - * Pattern_White_Space characters 200E 200F 2028 2029. - */ -static const uint32_t syntaxOrWhiteSpace2000[]={ - 0, - 0xffffffff, - 0xffffc000, // 2: 200E..201F - 0x7fff03ff, // 3: 2020..2029, 2030..203E - 0x7feffffe, // 4: 2041..2053, 2055..205E - 0xffff0000, // 5: 2190..219F - 0x003fffff, // 6: 2760..2775 - 0xfff00000, // 7: 2794..279F - 0xffffff0e, // 8: 3001..3003, 3008..301F - 0x00010001 // 9: 3020, 3030 -}; - -UBool -PatternProps::isSyntax(UChar32 c) { - if(c<0) { - return FALSE; - } else if(c<=0xff) { - return (UBool)(latin1[c]>>1)&1; - } else if(c<0x2010) { - return FALSE; - } else if(c<=0x3030) { - uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]]; - return (UBool)((bits>>(c&0x1f))&1); - } else if(0xfd3e<=c && c<=0xfe46) { - return c<=0xfd3f || 0xfe45<=c; - } else { - return FALSE; - } -} - -UBool -PatternProps::isSyntaxOrWhiteSpace(UChar32 c) { - if(c<0) { - return FALSE; - } else if(c<=0xff) { - return (UBool)(latin1[c]&1); - } else if(c<0x200e) { - return FALSE; - } else if(c<=0x3030) { - uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]]; - return (UBool)((bits>>(c&0x1f))&1); - } else if(0xfd3e<=c && c<=0xfe46) { - return c<=0xfd3f || 0xfe45<=c; - } else { - return FALSE; - } -} - -UBool -PatternProps::isWhiteSpace(UChar32 c) { - if(c<0) { - return FALSE; - } else if(c<=0xff) { - return (UBool)(latin1[c]>>2)&1; - } else if(0x200e<=c && c<=0x2029) { - return c<=0x200f || 0x2028<=c; - } else { - return FALSE; - } -} - -const UChar * -PatternProps::skipWhiteSpace(const UChar *s, int32_t length) { - while(length>0 && isWhiteSpace(*s)) { - ++s; - --length; - } - return s; -} - -const UChar * -PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) { - if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) { - return s; - } - int32_t start=0; - int32_t limit=length; - while(start0 && !isSyntaxOrWhiteSpace(*s)) { - ++s; - --length; - } - return s; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/patternprops.h b/deps/node/deps/icu-small/source/common/patternprops.h deleted file mode 100644 index a42eb3c2..00000000 --- a/deps/node/deps/icu-small/source/common/patternprops.h +++ /dev/null @@ -1,91 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: patternprops.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011mar13 -* created by: Markus W. Scherer -*/ - -#ifndef __PATTERNPROPS_H__ -#define __PATTERNPROPS_H__ - -#include "unicode/utypes.h" - -U_NAMESPACE_BEGIN - -/** - * Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space. - * Hardcodes these properties, does not load data, does not depend on other ICU classes. - *

- * Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points, - * and both properties only include BMP code points (no supplementary ones). - * Pattern_Syntax includes some unassigned code points. - *

- * [:Pattern_White_Space:] = - * [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029] - *

- * [:Pattern_Syntax:] = - * [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE - * \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7 - * \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E - * \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F - * \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46] - * @author mscherer - */ -class U_COMMON_API PatternProps { -public: - /** - * @return TRUE if c is a Pattern_Syntax code point. - */ - static UBool isSyntax(UChar32 c); - - /** - * @return TRUE if c is a Pattern_Syntax or Pattern_White_Space code point. - */ - static UBool isSyntaxOrWhiteSpace(UChar32 c); - - /** - * @return TRUE if c is a Pattern_White_Space character. - */ - static UBool isWhiteSpace(UChar32 c); - - /** - * Skips over Pattern_White_Space starting at s. - * @return The smallest pointer at or after s with a non-white space character. - */ - static const UChar *skipWhiteSpace(const UChar *s, int32_t length); - - /** - * @return s except with leading and trailing Pattern_White_Space removed and length adjusted. - */ - static const UChar *trimWhiteSpace(const UChar *s, int32_t &length); - - /** - * Tests whether the string contains a "pattern identifier", that is, - * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters. - * @return TRUE if there are no Pattern_White_Space or Pattern_Syntax characters in s. - */ - static UBool isIdentifier(const UChar *s, int32_t length); - - /** - * Skips over a "pattern identifier" starting at index s. - * @return The smallest pointer at or after s with - * a Pattern_White_Space or Pattern_Syntax character. - */ - static const UChar *skipIdentifier(const UChar *s, int32_t length); - -private: - PatternProps(); // no constructor: all static methods -}; - -U_NAMESPACE_END - -#endif // __PATTERNPROPS_H__ diff --git a/deps/node/deps/icu-small/source/common/pluralmap.cpp b/deps/node/deps/icu-small/source/common/pluralmap.cpp deleted file mode 100644 index a85dd1c9..00000000 --- a/deps/node/deps/icu-small/source/common/pluralmap.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - * Copyright (C) 2015, International Business Machines Corporation and - * others. All Rights Reserved. - */ - -#include "unicode/unistr.h" -#include "charstr.h" -#include "cstring.h" -#include "pluralmap.h" - -U_NAMESPACE_BEGIN - -static const char * const gPluralForms[] = { - "other", "zero", "one", "two", "few", "many"}; - -PluralMapBase::Category -PluralMapBase::toCategory(const char *pluralForm) { - for (int32_t i = 0; i < UPRV_LENGTHOF(gPluralForms); ++i) { - if (uprv_strcmp(pluralForm, gPluralForms[i]) == 0) { - return static_cast(i); - } - } - return NONE; -} - -PluralMapBase::Category -PluralMapBase::toCategory(const UnicodeString &pluralForm) { - CharString cCategory; - UErrorCode status = U_ZERO_ERROR; - cCategory.appendInvariantChars(pluralForm, status); - return U_FAILURE(status) ? NONE : toCategory(cCategory.data()); -} - -const char *PluralMapBase::getCategoryName(Category c) { - int32_t index = c; - return (index < 0 || index >= UPRV_LENGTHOF(gPluralForms)) ? - NULL : gPluralForms[index]; -} - - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/pluralmap.h b/deps/node/deps/icu-small/source/common/pluralmap.h deleted file mode 100644 index db644093..00000000 --- a/deps/node/deps/icu-small/source/common/pluralmap.h +++ /dev/null @@ -1,292 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2015, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* -* File pluralmap.h - PluralMap class that maps plural categories to values. -****************************************************************************** -*/ - -#ifndef __PLURAL_MAP_H__ -#define __PLURAL_MAP_H__ - -#include "unicode/uobject.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -class UnicodeString; - -class U_COMMON_API PluralMapBase : public UMemory { -public: - /** - * The names of all the plural categories. NONE is not an actual plural - * category, but rather represents the absense of a plural category. - */ - enum Category { - NONE = -1, - OTHER, - ZERO, - ONE, - TWO, - FEW, - MANY, - CATEGORY_COUNT - }; - - /** - * Converts a category name such as "zero", "one", "two", "few", "many" - * or "other" to a category enum. Returns NONE for an unrecognized - * category name. - */ - static Category toCategory(const char *categoryName); - - /** - * Converts a category name such as "zero", "one", "two", "few", "many" - * or "other" to a category enum. Returns NONE for urecongized - * category name. - */ - static Category toCategory(const UnicodeString &categoryName); - - /** - * Converts a category to a name. - * Passing NONE or CATEGORY_COUNT for category returns NULL. - */ - static const char *getCategoryName(Category category); -}; - -/** - * A Map of plural categories to values. It maintains ownership of the - * values. - * - * Type T is the value type. T must provide the followng: - * 1) Default constructor - * 2) Copy constructor - * 3) Assignment operator - * 4) Must extend UMemory - */ -template -class PluralMap : public PluralMapBase { -public: - /** - * Other category is maps to a copy of the default value. - */ - PluralMap() : fOtherVariant() { - initializeNew(); - } - - /** - * Other category is mapped to otherVariant. - */ - PluralMap(const T &otherVariant) : fOtherVariant(otherVariant) { - initializeNew(); - } - - PluralMap(const PluralMap &other) : fOtherVariant(other.fOtherVariant) { - fVariants[0] = &fOtherVariant; - for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { - fVariants[i] = other.fVariants[i] ? - new T(*other.fVariants[i]) : NULL; - } - } - - PluralMap &operator=(const PluralMap &other) { - if (this == &other) { - return *this; - } - for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) { - if (fVariants[i] != NULL && other.fVariants[i] != NULL) { - *fVariants[i] = *other.fVariants[i]; - } else if (fVariants[i] != NULL) { - delete fVariants[i]; - fVariants[i] = NULL; - } else if (other.fVariants[i] != NULL) { - fVariants[i] = new T(*other.fVariants[i]); - } else { - // do nothing - } - } - return *this; - } - - ~PluralMap() { - for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { - delete fVariants[i]; - } - } - - /** - * Removes all mappings and makes 'other' point to the default value. - */ - void clear() { - *fVariants[0] = T(); - for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { - delete fVariants[i]; - fVariants[i] = NULL; - } - } - - /** - * Iterates through the mappings in this instance, set index to NONE - * prior to using. Call next repeatedly to get the values until it - * returns NULL. Each time next returns, caller may pass index - * to getCategoryName() to get the name of the plural category. - * When this function returns NULL, index is CATEGORY_COUNT - */ - const T *next(Category &index) const { - int32_t idx = index; - ++idx; - for (; idx < UPRV_LENGTHOF(fVariants); ++idx) { - if (fVariants[idx] != NULL) { - index = static_cast(idx); - return fVariants[idx]; - } - } - index = static_cast(idx); - return NULL; - } - - /** - * non const version of next. - */ - T *nextMutable(Category &index) { - const T *result = next(index); - return const_cast(result); - } - - /** - * Returns the 'other' variant. - * Same as calling get(OTHER). - */ - const T &getOther() const { - return get(OTHER); - } - - /** - * Returns the value associated with a category. - * If no value found, or v is NONE or CATEGORY_COUNT, falls - * back to returning the value for the 'other' category. - */ - const T &get(Category v) const { - int32_t index = v; - if (index < 0 || index >= UPRV_LENGTHOF(fVariants) || fVariants[index] == NULL) { - return *fVariants[0]; - } - return *fVariants[index]; - } - - /** - * Convenience routine to get the value by category name. Otherwise - * works just like get(Category). - */ - const T &get(const char *category) const { - return get(toCategory(category)); - } - - /** - * Convenience routine to get the value by category name as a - * UnicodeString. Otherwise works just like get(category). - */ - const T &get(const UnicodeString &category) const { - return get(toCategory(category)); - } - - /** - * Returns a pointer to the value associated with a category - * that caller can safely modify. If the value was defaulting to the 'other' - * variant because no explicit value was stored, this method creates a - * new value using the default constructor at the returned pointer. - * - * @param category the category with the value to change. - * @param status error returned here if index is NONE or CATEGORY_COUNT - * or memory could not be allocated, or any other error happens. - */ - T *getMutable( - Category category, - UErrorCode &status) { - return getMutable(category, NULL, status); - } - - /** - * Convenience routine to get a mutable pointer to a value by category name. - * Otherwise works just like getMutable(Category, UErrorCode &). - * reports an error if the category name is invalid. - */ - T *getMutable( - const char *category, - UErrorCode &status) { - return getMutable(toCategory(category), NULL, status); - } - - /** - * Just like getMutable(Category, UErrorCode &) but copies defaultValue to - * returned pointer if it was defaulting to the 'other' variant - * because no explicit value was stored. - */ - T *getMutableWithDefault( - Category category, - const T &defaultValue, - UErrorCode &status) { - return getMutable(category, &defaultValue, status); - } - - /** - * Returns TRUE if this object equals rhs. - */ - UBool equals( - const PluralMap &rhs, - UBool (*eqFunc)(const T &, const T &)) const { - for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) { - if (fVariants[i] == rhs.fVariants[i]) { - continue; - } - if (fVariants[i] == NULL || rhs.fVariants[i] == NULL) { - return FALSE; - } - if (!eqFunc(*fVariants[i], *rhs.fVariants[i])) { - return FALSE; - } - } - return TRUE; - } - -private: - T fOtherVariant; - T* fVariants[6]; - - T *getMutable( - Category category, - const T *defaultValue, - UErrorCode &status) { - if (U_FAILURE(status)) { - return NULL; - } - int32_t index = category; - if (index < 0 || index >= UPRV_LENGTHOF(fVariants)) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - if (fVariants[index] == NULL) { - fVariants[index] = defaultValue == NULL ? - new T() : new T(*defaultValue); - } - if (!fVariants[index]) { - status = U_MEMORY_ALLOCATION_ERROR; - } - return fVariants[index]; - } - - void initializeNew() { - fVariants[0] = &fOtherVariant; - for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { - fVariants[i] = NULL; - } - } -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/propname.cpp b/deps/node/deps/icu-small/source/common/propname.cpp deleted file mode 100644 index a12eb7d9..00000000 --- a/deps/node/deps/icu-small/source/common/propname.cpp +++ /dev/null @@ -1,328 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: October 30 2002 -* Since: ICU 2.4 -* 2010nov19 Markus Scherer Rewrite for formatVersion 2. -********************************************************************** -*/ -#include "propname.h" -#include "unicode/uchar.h" -#include "unicode/udata.h" -#include "unicode/uscript.h" -#include "umutex.h" -#include "cmemory.h" -#include "cstring.h" -#include "uarrsort.h" -#include "uinvchar.h" - -#define INCLUDED_FROM_PROPNAME_CPP -#include "propname_data.h" - -U_CDECL_BEGIN - -/** - * Get the next non-ignorable ASCII character from a property name - * and lowercases it. - * @return ((advance count for the name)<<8)|character - */ -static inline int32_t -getASCIIPropertyNameChar(const char *name) { - int32_t i; - char c; - - /* Ignore delimiters '-', '_', and ASCII White_Space */ - for(i=0; - (c=name[i++])==0x2d || c==0x5f || - c==0x20 || (0x09<=c && c<=0x0d); - ) {} - - if(c!=0) { - return (i<<8)|(uint8_t)uprv_asciitolower((char)c); - } else { - return i<<8; - } -} - -/** - * Get the next non-ignorable EBCDIC character from a property name - * and lowercases it. - * @return ((advance count for the name)<<8)|character - */ -static inline int32_t -getEBCDICPropertyNameChar(const char *name) { - int32_t i; - char c; - - /* Ignore delimiters '-', '_', and EBCDIC White_Space */ - for(i=0; - (c=name[i++])==0x60 || c==0x6d || - c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; - ) {} - - if(c!=0) { - return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); - } else { - return i<<8; - } -} - -/** - * Unicode property names and property value names are compared "loosely". - * - * UCD.html 4.0.1 says: - * For all property names, property value names, and for property values for - * Enumerated, Binary, or Catalog properties, use the following - * loose matching rule: - * - * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. - * - * This function does just that, for (char *) name strings. - * It is almost identical to ucnv_compareNames() but also ignores - * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). - * - * @internal - */ - -U_CAPI int32_t U_EXPORT2 -uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { - int32_t rc, r1, r2; - - for(;;) { - r1=getASCIIPropertyNameChar(name1); - r2=getASCIIPropertyNameChar(name2); - - /* If we reach the ends of both strings then they match */ - if(((r1|r2)&0xff)==0) { - return 0; - } - - /* Compare the lowercased characters */ - if(r1!=r2) { - rc=(r1&0xff)-(r2&0xff); - if(rc!=0) { - return rc; - } - } - - name1+=r1>>8; - name2+=r2>>8; - } -} - -U_CAPI int32_t U_EXPORT2 -uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { - int32_t rc, r1, r2; - - for(;;) { - r1=getEBCDICPropertyNameChar(name1); - r2=getEBCDICPropertyNameChar(name2); - - /* If we reach the ends of both strings then they match */ - if(((r1|r2)&0xff)==0) { - return 0; - } - - /* Compare the lowercased characters */ - if(r1!=r2) { - rc=(r1&0xff)-(r2&0xff); - if(rc!=0) { - return rc; - } - } - - name1+=r1>>8; - name2+=r2>>8; - } -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -int32_t PropNameData::findProperty(int32_t property) { - int32_t i=1; // valueMaps index, initially after numRanges - for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { - // Read and skip the start and limit of this range. - int32_t start=valueMaps[i]; - int32_t limit=valueMaps[i+1]; - i+=2; - if(property0; --numRanges) { - // Read and skip the start and limit of this range. - int32_t start=valueMaps[valueMapIndex]; - int32_t limit=valueMaps[valueMapIndex+1]; - valueMapIndex+=2; - if(value0; --nameIndex) { - nameGroup=uprv_strchr(nameGroup, 0)+1; - } - if(*nameGroup==0) { - return NULL; // no name (Property[Value]Aliases.txt has "n/a") - } - return nameGroup; -} - -UBool PropNameData::containsName(BytesTrie &trie, const char *name) { - if(name==NULL) { - return FALSE; - } - UStringTrieResult result=USTRINGTRIE_NO_VALUE; - char c; - while((c=*name++)!=0) { - c=uprv_invCharToLowercaseAscii(c); - // Ignore delimiters '-', '_', and ASCII White_Space. - if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { - continue; - } - if(!USTRINGTRIE_HAS_NEXT(result)) { - return FALSE; - } - result=trie.next((uint8_t)c); - } - return USTRINGTRIE_HAS_VALUE(result); -} - -const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { - int32_t valueMapIndex=findProperty(property); - if(valueMapIndex==0) { - return NULL; // Not a known property. - } - return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); -} - -const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { - int32_t valueMapIndex=findProperty(property); - if(valueMapIndex==0) { - return NULL; // Not a known property. - } - int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); - if(nameGroupOffset==0) { - return NULL; - } - return getName(nameGroups+nameGroupOffset, nameChoice); -} - -int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { - BytesTrie trie(bytesTries+bytesTrieOffset); - if(containsName(trie, alias)) { - return trie.getValue(); - } else { - return UCHAR_INVALID_CODE; - } -} - -int32_t PropNameData::getPropertyEnum(const char *alias) { - return getPropertyOrValueEnum(0, alias); -} - -int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { - int32_t valueMapIndex=findProperty(property); - if(valueMapIndex==0) { - return UCHAR_INVALID_CODE; // Not a known property. - } - valueMapIndex=valueMaps[valueMapIndex+1]; - if(valueMapIndex==0) { - return UCHAR_INVALID_CODE; // The property does not have named values. - } - // valueMapIndex is the start of the property's valueMap, - // where the first word is the BytesTrie offset. - return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); -} -U_NAMESPACE_END - -//---------------------------------------------------------------------- -// Public API implementation - -U_CAPI const char* U_EXPORT2 -u_getPropertyName(UProperty property, - UPropertyNameChoice nameChoice) { - U_NAMESPACE_USE - return PropNameData::getPropertyName(property, nameChoice); -} - -U_CAPI UProperty U_EXPORT2 -u_getPropertyEnum(const char* alias) { - U_NAMESPACE_USE - return (UProperty)PropNameData::getPropertyEnum(alias); -} - -U_CAPI const char* U_EXPORT2 -u_getPropertyValueName(UProperty property, - int32_t value, - UPropertyNameChoice nameChoice) { - U_NAMESPACE_USE - return PropNameData::getPropertyValueName(property, value, nameChoice); -} - -U_CAPI int32_t U_EXPORT2 -u_getPropertyValueEnum(UProperty property, - const char* alias) { - U_NAMESPACE_USE - return PropNameData::getPropertyValueEnum(property, alias); -} - -U_CAPI const char* U_EXPORT2 -uscript_getName(UScriptCode scriptCode){ - return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, - U_LONG_PROPERTY_NAME); -} - -U_CAPI const char* U_EXPORT2 -uscript_getShortName(UScriptCode scriptCode){ - return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, - U_SHORT_PROPERTY_NAME); -} diff --git a/deps/node/deps/icu-small/source/common/propname.h b/deps/node/deps/icu-small/source/common/propname.h deleted file mode 100644 index 1a8ced5b..00000000 --- a/deps/node/deps/icu-small/source/common/propname.h +++ /dev/null @@ -1,212 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: October 30 2002 -* Since: ICU 2.4 -* 2010nov19 Markus Scherer Rewrite for formatVersion 2. -********************************************************************** -*/ -#ifndef PROPNAME_H -#define PROPNAME_H - -#include "unicode/utypes.h" -#include "unicode/bytestrie.h" -#include "unicode/uchar.h" -#include "udataswp.h" -#include "uprops.h" - -/* - * This header defines the in-memory layout of the property names data - * structure representing the UCD data files PropertyAliases.txt and - * PropertyValueAliases.txt. It is used by: - * propname.cpp - reads data - * genpname - creates data - */ - -/* low-level char * property name comparison -------------------------------- */ - -U_CDECL_BEGIN - -/** - * \var uprv_comparePropertyNames - * Unicode property names and property value names are compared "loosely". - * - * UCD.html 4.0.1 says: - * For all property names, property value names, and for property values for - * Enumerated, Binary, or Catalog properties, use the following - * loose matching rule: - * - * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. - * - * This function does just that, for (char *) name strings. - * It is almost identical to ucnv_compareNames() but also ignores - * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). - * - * @internal - */ - -U_CAPI int32_t U_EXPORT2 -uprv_compareASCIIPropertyNames(const char *name1, const char *name2); - -U_CAPI int32_t U_EXPORT2 -uprv_compareEBCDICPropertyNames(const char *name1, const char *name2); - -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames -#else -# error U_CHARSET_FAMILY is not valid -#endif - -U_CDECL_END - -/* UDataMemory structure and signatures ------------------------------------- */ - -#define PNAME_DATA_NAME "pnames" -#define PNAME_DATA_TYPE "icu" - -/* Fields in UDataInfo: */ - -/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */ -#define PNAME_SIG_0 ((uint8_t)0x70) /* p */ -#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */ -#define PNAME_SIG_2 ((uint8_t)0x61) /* a */ -#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */ - -U_NAMESPACE_BEGIN - -class PropNameData { -public: - enum { - // Byte offsets from the start of the data, after the generic header. - IX_VALUE_MAPS_OFFSET, - IX_BYTE_TRIES_OFFSET, - IX_NAME_GROUPS_OFFSET, - IX_RESERVED3_OFFSET, - IX_RESERVED4_OFFSET, - IX_TOTAL_SIZE, - - // Other values. - IX_MAX_NAME_LENGTH, - IX_RESERVED7, - IX_COUNT - }; - - static const char *getPropertyName(int32_t property, int32_t nameChoice); - static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice); - - static int32_t getPropertyEnum(const char *alias); - static int32_t getPropertyValueEnum(int32_t property, const char *alias); - -private: - static int32_t findProperty(int32_t property); - static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value); - static const char *getName(const char *nameGroup, int32_t nameIndex); - static UBool containsName(BytesTrie &trie, const char *name); - - static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias); - - static const int32_t indexes[]; - static const int32_t valueMaps[]; - static const uint8_t bytesTries[]; - static const char nameGroups[]; -}; - -/* - * pnames.icu formatVersion 2 - * - * formatVersion 2 is new in ICU 4.8. - * In ICU 4.8, the pnames.icu data file is used only in ICU4J. - * ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h. - * - * For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01) - * or earlier versions of this header file (source/common/propname.h). - * - * The pnames.icu begins with the standard ICU DataHeader/UDataInfo. - * After that: - * - * int32_t indexes[8]; - * - * (See the PropNameData::IX_... constants.) - * - * The first 6 indexes are byte offsets from the beginning of the data - * (beginning of indexes[]) to following structures. - * The length of each structure is the difference between its offset - * and the next one. - * All offsets are filled in: Where there is no data between two offsets, - * those two offsets are the same. - * The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the - * total number of bytes in the file. (Not counting the standard headers.) - * - * The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the - * maximum length of any Unicode property (or property value) alias. - * (Without normalization, that is, including underscores etc.) - * - * int32_t valueMaps[]; - * - * The valueMaps[] begins with a map from UProperty enums to properties, - * followed by the per-property value maps from property values to names, - * for those properties that have named values. - * (Binary & enumerated, plus General_Category_Mask.) - * - * valueMaps[0] contains the number of UProperty enum ranges. - * For each range: - * int32_t start, limit -- first and last+1 UProperty enum of a dense range - * Followed by (limit-start) pairs of - * int32_t nameGroupOffset; - * Offset into nameGroups[] for the property's names/aliases. - * int32_t valueMapIndex; - * Offset of the property's value map in the valueMaps[] array. - * If the valueMapIndex is 0, then the property does not have named values. - * - * For each property's value map: - * int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping. - * int32_t numRanges; - * If numRanges is in the range 1..15, then that many ranges of values follow. - * Per range: - * int32_t start, limit -- first and last+1 UProperty enum of a range - * Followed by (limit-start) entries of - * int32_t nameGroupOffset; - * Offset into nameGroups[] for the property value's names/aliases. - * If the nameGroupOffset is 0, then this is not a named value for this property. - * (That is, the ranges need not be dense.) - * If numRanges is >=0x10, then (numRanges-0x10) sorted values - * and then (numRanges-0x10) corresponding nameGroupOffsets follow. - * Values are sorted as signed integers. - * In this case, the set of values is dense; no nameGroupOffset will be 0. - * - * For both properties and property values, ranges are sorted by their start/limit values. - * - * uint8_t bytesTries[]; - * - * This is a sequence of BytesTrie structures, byte-serialized tries for - * mapping from names/aliases to values. - * The first one maps from property names/aliases to UProperty enum constants. - * The following ones are indexed by property value map bytesTrieOffsets - * for mapping each property's names/aliases to their property values. - * - * char nameGroups[]; - * - * This is a sequence of property name groups. - * Each group is a list of names/aliases (invariant-character strings) for - * one property or property value, in the order of UCharNameChoice. - * The first byte of each group is the number of names in the group. - * It is followed by that many NUL-terminated strings. - * The first string is for the short name; if there is no short name, - * then the first string is empty. - * The second string is the long name. Further strings are additional aliases. - * - * The first name group is for a property rather than a property value, - * so that a nameGroupOffset of 0 can be used to indicate "no value" - * in a property's sparse value ranges. - */ - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/propname_data.h b/deps/node/deps/icu-small/source/common/propname_data.h deleted file mode 100644 index 15a3301e..00000000 --- a/deps/node/deps/icu-small/source/common/propname_data.h +++ /dev/null @@ -1,1861 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// Copyright (C) 1999-2016, International Business Machines -// Corporation and others. All Rights Reserved. -// -// file name: propname_data.h -// -// machine-generated by: icu/tools/unicode/c/genprops/pnamesbuilder.cpp - - -#ifdef INCLUDED_FROM_PROPNAME_CPP - -U_NAMESPACE_BEGIN - -const int32_t PropNameData::indexes[8]={0x20,0x1548,0x4e03,0xa114,0xa114,0xa114,0x2f,0}; - -const int32_t PropNameData::valueMaps[1354]={ -6,0,0x41,0,0xe3,0x356,0xe3,0x36c,0xe3,0x381,0xe3,0x397,0xe3,0x3a2,0xe3,0x3c3, -0xe3,0x3d3,0xe3,0x3e2,0xe3,0x3f0,0xe3,0x414,0xe3,0x42b,0xe3,0x443,0xe3,0x45a,0xe3,0x469, -0xe3,0x478,0xe3,0x489,0xe3,0x497,0xe3,0x4a9,0xe3,0x4c3,0xe3,0x4de,0xe3,0x4f3,0xe3,0x510, -0xe3,0x521,0xe3,0x52c,0xe3,0x54b,0xe3,0x561,0xe3,0x572,0xe3,0x582,0xe3,0x59d,0xe3,0x5b6, -0xe3,0x5c7,0xe3,0x5e1,0xe3,0x5f4,0xe3,0x604,0xe3,0x61e,0xe3,0x637,0xe3,0x64e,0xe3,0x662, -0xe3,0x678,0xe3,0x68c,0xe3,0x6a2,0xe3,0x6bc,0xe3,0x6d4,0xe3,0x6f0,0xe3,0x6f8,0xe3,0x700, -0xe3,0x708,0xe3,0x710,0xe3,0x719,0xe3,0x726,0xe3,0x739,0xe3,0x756,0xe3,0x773,0xe3,0x790, -0xe3,0x7ae,0xe3,0x7cc,0xe3,0x7f0,0xe3,0x7fd,0xe3,0x817,0xe3,0x82c,0xe3,0x847,0xe3,0x85e, -0xe3,0x875,0xe3,0x897,0xe3,0x1000,0x1019,0x8b6,0x15d,0xad6,0x178,0x2c95,0xe9,0x2cb4,0x2a0,0x2df2, -0x2b6,0x2e4c,0x2c0,0x30a9,0x2e2,0x39a4,0x34c,0x3a14,0x356,0x3cae,0x385,0x3cec,0x38d,0x474b,0x44a,0x47c9, -0x454,0x47ee,0x45a,0x4808,0x460,0x4829,0x467,0x4843,0xe9,0x4868,0xe9,0x488e,0x46e,0x4938,0x484,0x49b1, -0x497,0x4a63,0x4b2,0x4a9a,0x4b9,0x4c49,0x4cc,0x50c9,0x4f4,0x2000,0x2001,0x5128,0x4fc,0x3000,0x3001,0x51b4, -0,0x4000,0x400e,0x51c6,0,0x51cf,0,0x51e9,0,0x51fa,0,0x520b,0,0x5221,0,0x522a, -0,0x5247,0,0x5265,0,0x5283,0,0x52a1,0,0x52b7,0,0x52cb,0,0x52e1,0,0x7000, -0x7001,0x52fa,0,0x7d6,0x12,0,1,0x12,0x20,0x7f4,0x49,0,1,7,8,9, -0xa,0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19, -0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x54,0x5b,0x67,0x6b,0x76, -0x7a,0x81,0x82,0x84,0x85,0xc8,0xca,0xd6,0xd8,0xda,0xdc,0xde,0xe0,0xe2,0xe4,0xe6, -0xe8,0xe9,0xea,0xf0,0x2e,0x40,0x4c,0x56,0x67,0x72,0x7f,0x8c,0x99,0xa6,0xb3,0xc0, -0xcd,0xda,0xe7,0xf4,0x101,0x10e,0x11b,0x128,0x135,0x142,0x14f,0x15c,0x169,0x176,0x183,0x190, -0x19d,0x1aa,0x1b7,0x1c4,0x1d1,0x1de,0x1eb,0x1fa,0x209,0x218,0x227,0x236,0x245,0x254,0x263,0x27d, -0x291,0x2a5,0x2c0,0x2cf,0x2d8,0x2e8,0x2f0,0x2f9,0x308,0x311,0x321,0x332,0x343,0x986,1,0, -0x17,0x8c5,0x8d6,0x8e7,0x8fb,0x912,0x92a,0x93c,0x951,0x968,0x97d,0x98d,0x99f,0x9bc,0x9d8,0x9ea, -0xa07,0xa23,0xa3f,0xa54,0xa69,0xa83,0xa9e,0xab9,0xb28,1,0,0x124,0xae1,0xaee,0xb01,0xb29, -0xb47,0xb65,0xb7d,0xba8,0xbd2,0xbea,0xbfd,0xc10,0xc1f,0xc2e,0xc3d,0xc4c,0xc63,0xc74,0xc87,0xc9a, -0xca7,0xcb4,0xcc3,0xcd4,0xce9,0xcfa,0xd05,0xd0e,0xd1f,0xd30,0xd43,0xd55,0xd68,0xd7b,0xdba,0xdc7, -0xdd4,0xde1,0xdf6,0xe26,0xe40,0xe61,0xe8c,0xeaf,0xf0d,0xf34,0xf4f,0xf5e,0xf85,0xfad,0xfd0,0xff3, -0x101d,0x1036,0x1055,0x1078,0x109c,0x10af,0x10c9,0x10f3,0x110b,0x1133,0x115c,0x116f,0x1182,0x1195,0x11bc,0x11cb, -0x11eb,0x1219,0x1237,0x1265,0x1281,0x129c,0x12b5,0x12ce,0x12ef,0x131f,0x133e,0x1360,0x1394,0x13c1,0x1406,0x1427, -0x1451,0x1472,0x149b,0x14ae,0x14e1,0x14f8,0x1507,0x1518,0x1543,0x155a,0x158b,0x15b9,0x15fc,0x1607,0x1640,0x1651, -0x1662,0x166f,0x1682,0x16bc,0x16e0,0x1704,0x173e,0x1776,0x17a1,0x17b9,0x17e5,0x1811,0x181e,0x182d,0x184a,0x186c, -0x189a,0x18ba,0x18e1,0x1908,0x1927,0x193a,0x194b,0x195c,0x1981,0x19a6,0x19cd,0x1a01,0x1a2e,0x1a4c,0x1a5f,0x1a78, -0x1ab1,0x1ac0,0x1ae0,0x1b02,0x1b24,0x1b3b,0x1b52,0x1b7f,0x1b98,0x1bb1,0x1be2,0x1c0c,0x1c27,0x1c3a,0x1c59,0x1c62, -0x1c75,0x1c93,0x1cb1,0x1cc4,0x1cdb,0x1cf0,0x1d25,0x1d49,0x1d5e,0x1d6d,0x1d80,0x1da4,0x1dad,0x1dd1,0x1de8,0x1dfb, -0x1e0a,0x1e15,0x1e36,0x1e4e,0x1e5d,0x1e6c,0x1e7b,0x1e92,0x1ea7,0x1ebc,0x1ef5,0x1f08,0x1f24,0x1f2f,0x1f3c,0x1f6a, -0x1f8e,0x1fb1,0x1fc4,0x1fe6,0x1ff9,0x2014,0x2037,0x205a,0x207f,0x2090,0x20bf,0x20ec,0x2103,0x211e,0x212d,0x2158, -0x2190,0x21ca,0x21f8,0x2209,0x2216,0x223a,0x2249,0x2265,0x227f,0x229c,0x22d4,0x22e9,0x2316,0x2335,0x2363,0x2383, -0x23b7,0x23c6,0x23f0,0x2413,0x243e,0x2449,0x245a,0x2475,0x2499,0x24a6,0x24bb,0x24e2,0x250d,0x2544,0x2557,0x2568, -0x2598,0x25a9,0x25b8,0x25cd,0x25eb,0x25fe,0x2611,0x2628,0x2645,0x2650,0x2659,0x267b,0x2690,0x26b5,0x26cc,0x26f5, -0x2710,0x2725,0x273e,0x275f,0x2794,0x27a5,0x27d6,0x27fa,0x280b,0x2824,0x282f,0x285c,0x287e,0x28ac,0x28df,0x28ee, -0x28ff,0x291c,0x295e,0x2985,0x2992,0x29a7,0x29cb,0x29f1,0x2a2a,0x2a3b,0x2a5f,0x2a6a,0x2a77,0x2a86,0x2aab,0x2ad9, -0x2af5,0x2b12,0x2b1f,0x2b30,0x2b4e,0x2b71,0x2b8e,0x2b9b,0x2bbb,0x2bd8,0x2bf9,0x2c22,0x2c33,0x2c52,0x2c6b,0x2c84, -0x1d4e,1,0,0x12,0x2ccb,0x2cdb,0x2cee,0x2cfe,0x2d0e,0x2d1d,0x2d2d,0x2d3f,0x2d52,0x2d64,0x2d74,0x2d84, -0x2d93,0x2da2,0x2db2,0x2dbf,0x2dce,0x2de2,0x1e0c,1,0,6,0x2e07,0x2e12,0x2e1f,0x2e2c,0x2e39,0x2e44, -0x1e50,1,0,0x1e,0x2e61,0x2e70,0x2e85,0x2e9a,0x2eaf,0x2ec3,0x2ed4,0x2ee8,0x2efb,0x2f0c,0x2f25,0x2f37, -0x2f48,0x2f5c,0x2f6f,0x2f87,0x2f99,0x2fa4,0x2fb4,0x2fc2,0x2fd7,0x2fec,0x3002,0x301c,0x3032,0x3042,0x3056,0x306a, -0x307b,0x3093,0x207b,1,0,0x66,0x30bb,0x30de,0x30e7,0x30f4,0x30ff,0x3108,0x3113,0x311c,0x3135,0x313a, -0x3143,0x3160,0x3169,0x3176,0x317f,0x31a3,0x31aa,0x31b3,0x31c6,0x31d1,0x31da,0x31e5,0x31fe,0x3207,0x3216,0x3221, -0x322a,0x3235,0x323e,0x3245,0x324e,0x3259,0x3262,0x327b,0x3284,0x3291,0x329c,0x32ad,0x32b8,0x32cd,0x32e4,0x32ed, -0x32f6,0x330f,0x331a,0x3323,0x332c,0x3343,0x3360,0x336b,0x337c,0x3387,0x338e,0x339b,0x33a8,0x33d5,0x33ea,0x33f3, -0x340e,0x3431,0x3452,0x3473,0x3498,0x34bf,0x34e0,0x3503,0x3524,0x354b,0x356c,0x3591,0x35b0,0x35cf,0x35ee,0x360b, -0x362c,0x364d,0x3670,0x3695,0x36b4,0x36d3,0x36f4,0x371b,0x3740,0x375f,0x3780,0x37a3,0x37be,0x37d7,0x37f2,0x380b, -0x3828,0x3843,0x3860,0x387f,0x389c,0x38b9,0x38d8,0x38f5,0x3910,0x392d,0x394a,0x397d,0x23c0,1,0,6, -0x39b5,0x39c4,0x39d4,0x39e4,0x39f4,0x3a05,0x241e,1,0,0x2b,0x3a23,0x3a2f,0x3a3d,0x3a4c,0x3a5b,0x3a6b, -0x3a7c,0x3a90,0x3aa5,0x3abb,0x3ace,0x3ae2,0x3af2,0x3afb,0x3b06,0x3b16,0x3b32,0x3b44,0x3b52,0x3b61,0x3b6d,0x3b82, -0x3b96,0x3ba9,0x3bb7,0x3bcb,0x3bd9,0x3be3,0x3bf5,0x3c01,0x3c0f,0x3c1f,0x3c26,0x3c2d,0x3c34,0x3c3b,0x3c42,0x3c58, -0x3c79,0x85e,0x3c8b,0x3c96,0x3ca5,0x2677,1,0,4,0x3cbf,0x3cca,0x3cd6,0x3ce0,0x269d,1,0, -0xb9,0x3cf7,0x3d04,0x3d19,0x3d26,0x3d35,0x3d43,0x3d52,0x3d61,0x3d73,0x3d82,0x3d90,0x3da1,0x3db0,0x3dbf,0x3dcc, -0x3dd8,0x3de7,0x3df6,0x3e00,0x3e0d,0x3e1a,0x3e29,0x3e37,0x3e46,0x3e52,0x3e5c,0x3e68,0x3e78,0x3e88,0x3e96,0x3ea2, -0x3eb3,0x3ebf,0x3ecb,0x3ed9,0x3ee6,0x3ef2,0x3eff,0xcfa,0x3f0c,0x3f1a,0x3f34,0x3f3d,0x3f4b,0x3f59,0x3f65,0x3f74, -0x3f82,0x3f90,0x3f9c,0x3fab,0x3fb9,0x3fc7,0x3fd4,0x3fe3,0x3ffe,0x400d,0x401e,0x402f,0x4042,0x4054,0x4063,0x4075, -0x4084,0x4090,0x409b,0x1e0a,0x40a8,0x40b3,0x40be,0x40c9,0x40d4,0x40ef,0x40fa,0x4105,0x4110,0x4123,0x4137,0x4142, -0x4151,0x4160,0x416b,0x4176,0x4183,0x4192,0x41a0,0x41ab,0x41c6,0x41d0,0x41e1,0x41f2,0x4201,0x4212,0x421d,0x4228, -0x4233,0x423e,0x4249,0x4254,0x425f,0x4269,0x4274,0x4284,0x428f,0x429d,0x42aa,0x42b5,0x42c4,0x42d1,0x42de,0x42ed, -0x42fa,0x430b,0x431d,0x432d,0x4338,0x434b,0x4362,0x4370,0x437d,0x4388,0x4395,0x43a6,0x43c2,0x43d8,0x43e3,0x4400, -0x4410,0x441f,0x442a,0x4435,0x1f24,0x4441,0x444c,0x4464,0x4474,0x4483,0x4491,0x449f,0x44aa,0x44b5,0x44c9,0x44e0, -0x44f8,0x4508,0x4518,0x4528,0x453a,0x4545,0x4550,0x455a,0x4566,0x4574,0x4587,0x4593,0x45a0,0x45ab,0x45c7,0x45d4, -0x45e2,0x45fb,0x2824,0x460a,0x2645,0x4617,0x4625,0x4637,0x4645,0x4651,0x4661,0x2a5f,0x466f,0x467b,0x4686,0x4691, -0x469c,0x46b0,0x46be,0x46d5,0x46e1,0x46f5,0x4703,0x4715,0x472b,0x4739,0x3000,1,0,6,0x4765,0x4778, -0x4788,0x4796,0x47a7,0x47b7,0x305c,0x12,0,1,0x47e1,0x47e7,0x3069,0x12,0,1,0x47e1,0x47e7, -0x3076,1,0,3,0x47e1,0x47e7,0x4820,0x308c,1,0,3,0x47e1,0x47e7,0x4820,0x30a2,1, -0,0x12,0x48aa,0x48b4,0x48c0,0x48c7,0x48d2,0x48d7,0x48de,0x48e5,0x48ee,0x48f3,0x48f8,0x4908,0x85e,0x3c8b, -0x4914,0x3c96,0x4924,0x3ca5,0x314b,1,0,0xf,0x48aa,0x494b,0x4955,0x495f,0x496a,0x3b61,0x4974,0x4980, -0x4988,0x498f,0x4999,0x48c0,0x48c7,0x48d7,0x49a3,0x31d2,1,0,0x17,0x48aa,0x49c0,0x495f,0x49cc,0x49d9, -0x49e7,0x3b61,0x49f2,0x48c0,0x4a03,0x48d7,0x4a12,0x4a20,0x85e,0x3c79,0x4a2c,0x4a3d,0x3c8b,0x4914,0x3c96,0x4924, -0x3ca5,0x4a4e,0x32ef,1,0,3,0x4a81,0x4a89,0x4a91,0x3308,1,0,0xf,0x4aba,0x4ac1,0x4ad0, -0x4af1,0x4b14,0x4b1f,0x4b3e,0x4b55,0x4b62,0x4b6b,0x4b8a,0x4bbd,0x4bd8,0x4c07,0x4c24,0x3398,1,0,0x24, -0x4c67,0x4c74,0x4c87,0x4c94,0x4cc1,0x4ce6,0x4cfb,0x4d1a,0x4d3b,0x4d68,0x4da1,0x4dc4,0x4de7,0x4e14,0x4e49,0x4e70, -0x4e99,0x4ed0,0x4eff,0x4f20,0x4f45,0x4f54,0x4f77,0x4f8e,0x4f9b,0x4faa,0x4fc7,0x4fe0,0x5003,0x5028,0x5041,0x5056, -0x5065,0x5076,0x5083,0x50a4,0x3568,1,0,4,0x50e2,0x50ed,0x5105,0x511d,0x35a4,0x36,1,2, -4,8,0xe,0x10,0x20,0x3e,0x40,0x80,0x100,0x1c0,0x200,0x400,0x800,0xe00,0x1000,0x2000, -0x4000,0x7000,0x8000,0x10000,0x20000,0x40000,0x78001,0x80000,0x100000,0x200000,0x400000,0x800000,0x1000000,0x2000000,0x4000000,0x8000000, -0xf000000,0x10000000,0x20000000,0x30f80000,0x2e61,0x2e70,0x2e85,0x2e9a,0x5156,0x2eaf,0x2ec3,0x514c,0x2ed4,0x2ee8,0x2efb,0x5167, -0x2f0c,0x2f25,0x2f37,0x517e,0x2f48,0x2f5c,0x2f6f,0x51a7,0x2f87,0x2f99,0x2fa4,0x2fb4,0x5143,0x2fc2,0x2fd7,0x2fec, -0x3002,0x301c,0x3032,0x3042,0x3056,0x306a,0x519d,0x307b,0x3093,0x5188 -}; - -const uint8_t PropNameData::bytesTries[14523]={ -0,0x15,0x6d,0xc3,0x78,0x73,0xc2,0x12,0x76,0x7a,0x76,0x6a,0x77,0xa2,0x52,0x78, -1,0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74, -0x63,0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74, -0x81,3,0x61,0x2e,0x65,0x4c,0x6f,0xc3,0x18,0x73,0x69,0x1e,0x72,0x69,0x61,0x74, -0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65,0x63,0x74,0x6f,0x72,0x69,0x10,0x72,0x1f,0x74, -0x69,0x63,0x61,0x6c,0x6f,0x72,0x69,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0xc3, -0x18,3,0x62,0xc3,0x14,0x68,0x32,0x6f,0x42,0x73,0x13,0x70,0x61,0x63,0x65,0x5f, -0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x5f,0x16,0x72,0x64,0x62,0x72,0x65, -0x61,0x6b,0xc3,0x14,0x73,0xa2,0x49,0x74,0xa4,0x3b,0x75,3,0x63,0xd9,0x40,0xc, -0x69,0x52,0x6e,0x58,0x70,0x12,0x70,0x65,0x72,0x5c,0x13,0x63,0x61,0x73,0x65,0x5c, -0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xc,0x12,0x64,0x65,0x6f,0x5b, -0x10,0x69,1,0x63,0x3e,0x66,0x1b,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,0x72, -0x61,0x70,0x68,0x5b,0x17,0x6f,0x64,0x65,0x31,0x6e,0x61,0x6d,0x65,0xd9,0x40,0xb, -0xa,0x69,0x84,0x70,0x19,0x70,0x30,0x74,0x36,0x75,0x10,0x63,0xd9,0x40,9,0x12, -0x61,0x63,0x65,0x5f,1,0x63,0xd9,0x40,8,0x65,0x11,0x72,0x6d,0x67,0x69,0x3c, -0x6c,0xa2,0x5f,0x6f,0x17,0x66,0x74,0x64,0x6f,0x74,0x74,0x65,0x64,0x57,0x13,0x6d, -0x70,0x6c,0x65,3,0x63,0x50,0x6c,0x68,0x74,0x8a,0x75,0x1e,0x70,0x70,0x65,0x72, -0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,9,0x19,0x61, -0x73,0x65,0x66,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,6,0x1e,0x6f,0x77,0x65, -0x72,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,7,0x1e, -0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9, -0x40,8,0x10,0x63,0xd9,0x40,7,0x62,0xc3,0x13,0x63,0x34,0x64,0x57,0x65,0x6e, -0x66,0x10,0x63,0xd9,0x40,6,0xc2,0xa,2,0x66,0xd9,0x40,6,0x72,0x28,0x78, -0xd9,0x70,0,0x12,0x69,0x70,0x74,0xc2,0xa,0x19,0x65,0x78,0x74,0x65,0x6e,0x73, -0x69,0x6f,0x6e,0x73,0xd9,0x70,0,1,0x67,0x6a,0x6e,1,0x73,0x54,0x74,0x13, -0x65,0x6e,0x63,0x65,1,0x62,0x34,0x74,0x16,0x65,0x72,0x6d,0x69,0x6e,0x61,0x6c, -0x67,0x13,0x72,0x65,0x61,0x6b,0xc3,0x13,0x14,0x69,0x74,0x69,0x76,0x65,0x65,1, -0x6d,0x2e,0x73,0x13,0x74,0x61,0x72,0x74,0x73,0x19,0x65,0x6e,0x74,0x73,0x74,0x61, -0x72,0x74,0x65,0x72,0x73,3,0x63,0x66,0x65,0x72,0x69,0x98,0x72,0x19,0x61,0x69, -0x6c,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69, -0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x11,0xd8,0x40,0xa,0x11,0x63, -0x63,0xc3,0x11,0x11,0x72,0x6d,0x58,0x1e,0x69,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63, -0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,0x1d,0x74,0x6c,0x65,0x63,0x61,0x73,0x65, -0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xa,0x6d,0x70,0x6e,0x76,0x70,0xa2, -0xf1,0x71,0xa4,0x43,0x72,2,0x61,0x28,0x65,0x32,0x69,0x9d,0x14,0x64,0x69,0x63, -0x61,0x6c,0x55,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61, -0x74,0x6f,0x72,0x9d,0x12,0x61,0x74,0x68,0x4f,6,0x6f,0x39,0x6f,0x32,0x74,0xc3, -9,0x75,0x54,0x76,0xd9,0x30,0,0x12,0x6e,0x63,0x68,0x1f,0x61,0x72,0x61,0x63, -0x74,0x65,0x72,0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x51,0x14,0x6d,0x65, -0x72,0x69,0x63,1,0x74,0x32,0x76,0x13,0x61,0x6c,0x75,0x65,0xd9,0x30,0,0x12, -0x79,0x70,0x65,0xc3,9,0x61,0xa2,0x77,0x63,0xa2,0x82,0x66,2,0x63,0x98,0x64, -0xa2,0x53,0x6b,1,0x63,0x56,0x64,1,0x69,0x42,0x71,1,0x63,0xc3,0xd,0x75, -0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xd,0x13,0x6e,0x65,0x72,0x74, -0x6d,1,0x69,0x42,0x71,1,0x63,0xc3,0xf,0x75,0x17,0x69,0x63,0x6b,0x63,0x68, -0x65,0x63,0x6b,0xc3,0xf,0x13,0x6e,0x65,0x72,0x74,0x71,1,0x69,0x42,0x71,1, -0x63,0xc3,0xe,0x75,0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xe,0x13, -0x6e,0x65,0x72,0x74,0x6f,1,0x69,0x42,0x71,1,0x63,0xc3,0xc,0x75,0x17,0x69, -0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xc,0x13,0x6e,0x65,0x72,0x74,0x6b,0xd8, -0x40,5,1,0x31,0xd9,0x40,0xb,0x6d,0x10,0x65,0xd9,0x40,5,0x12,0x68,0x61, -0x72,0x51,2,0x61,0x6c,0x63,0xa2,0x4c,0x72,1,0x65,0x2a,0x69,0x11,0x6e,0x74, -0x7f,0x16,0x70,0x65,0x6e,0x64,0x65,0x64,0x63,0x1f,0x6f,0x6e,0x63,0x61,0x74,0x65, -0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x9f,0x10,0x74,2,0x73,0x2c, -0x74,0x30,0x77,0x10,0x73,0x77,0x11,0x79,0x6e,0x75,0x12,0x65,0x72,0x6e,1,0x73, -0x38,0x77,0x18,0x68,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x77,0x14,0x79,0x6e, -0x74,0x61,0x78,0x75,0x10,0x6d,0x9f,1,0x6d,0x3c,0x75,0x1a,0x6f,0x74,0x61,0x74, -0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x53,0x12,0x61,0x72,0x6b,0x53,0x66,0xc1,0xf8, -0x69,0xc1,0x3c,0x69,0xa2,0x6f,0x6a,0xa4,9,0x6c,4,0x62,0xc3,8,0x63,0x8c, -0x65,0x98,0x69,0xa2,0x56,0x6f,2,0x65,0x4b,0x67,0x4c,0x77,0x11,0x65,0x72,0x4c, -0x13,0x63,0x61,0x73,0x65,0x4c,0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40, -4,0x11,0x69,0x63,0x1f,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x65,0x78,0x63,0x65, -0x70,0x74,0x69,0x6f,0x6e,0x4b,0xd8,0x40,4,0x11,0x63,0x63,0xc3,0x10,0x18,0x61, -0x64,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69, -0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x10,0x16,0x6e,0x65,0x62,0x72, -0x65,0x61,0x6b,0xc3,8,2,0x64,0x4a,0x6e,0xa2,0x5b,0x73,1,0x63,0xd9,0x40, -3,0x6f,0x16,0x63,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0xd9,0x40,3,2,0x63,0x80, -0x65,0x90,0x73,0x40,1,0x62,0x52,0x74,0x46,1,0x61,0x40,0x72,0x1c,0x69,0x6e, -0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x47,0x11,0x72,0x74,0x41, -0x44,0x1c,0x69,0x6e,0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x45, -0x3e,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3f,0x10,0x6f,0x42,0x16,0x67,0x72, -0x61,0x70,0x68,0x69,0x63,0x43,2,0x64,0x2e,0x70,0x86,0x73,0x10,0x63,0xc3,0x17, -0x11,0x69,0x63,1,0x70,0x46,0x73,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x63, -0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x17,0x10,0x6f,0x1f,0x73,0x69,0x74,0x69, -0x6f,0x6e,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x16,0x10,0x63, -0xc3,0x16,2,0x67,0xc3,6,0x6f,0x26,0x74,0xc3,7,0x11,0x69,0x6e,1,0x63, -0x4a,0x69,0x11,0x6e,0x67,1,0x67,0x2e,0x74,0x12,0x79,0x70,0x65,0xc3,7,0x13, -0x72,0x6f,0x75,0x70,0xc3,6,0x48,0x15,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x49,0x66, -0x86,0x67,0xa2,0x4a,0x68,3,0x61,0x36,0x65,0x58,0x73,0x68,0x79,0x13,0x70,0x68, -0x65,0x6e,0x3d,0x1f,0x6e,0x67,0x75,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65, -0x74,0x79,0x70,0x65,0xc3,0xb,0x10,0x78,0x3a,0x14,0x64,0x69,0x67,0x69,0x74,0x3b, -0x10,0x74,0xc3,0xb,0x16,0x75,0x6c,0x6c,0x63,0x6f,0x6d,0x70,0x1f,0x6f,0x73,0x69, -0x74,0x69,0x6f,0x6e,0x65,0x78,0x63,0x6c,0x75,0x73,0x69,0x6f,0x6e,0x33,2,0x63, -0xa2,0x44,0x65,0xa2,0x4b,0x72,3,0x61,0x34,0x62,0x84,0x65,0x8a,0x6c,0x12,0x69, -0x6e,0x6b,0x39,0x11,0x70,0x68,0x7c,0x12,0x65,0x6d,0x65,3,0x62,0x5e,0x63,0x30, -0x65,0x48,0x6c,0x12,0x69,0x6e,0x6b,0x39,0x1a,0x6c,0x75,0x73,0x74,0x65,0x72,0x62, -0x72,0x65,0x61,0x6b,0xc3,0x12,0x14,0x78,0x74,0x65,0x6e,0x64,0x37,0x12,0x61,0x73, -0x65,0x35,0x11,0x78,0x74,0x37,0xc2,5,1,0x62,0xc3,0x12,0x6d,0xd9,0x20,0, -0x1c,0x6e,0x65,0x72,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc2,5, -0x13,0x6d,0x61,0x73,0x6b,0xd9,0x20,0,0x61,0xa2,0x90,0x62,0xa2,0xbe,0x63,0xa4, -0x30,0x64,0xa4,0xfd,0x65,5,0x6d,0x63,0x6d,0x6e,0x70,0xa2,0x59,0x78,0x10,0x74, -0x30,1,0x65,0x2c,0x70,0x12,0x69,0x63,0x74,0xa1,0x12,0x6e,0x64,0x65,1,0x64, -0x24,0x72,0x31,0x1b,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63, -0xa1,0x10,0x6f,1,0x64,0x97,0x6a,0x10,0x69,0x92,2,0x63,0x40,0x6d,0x50,0x70, -0x1a,0x72,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x95,0x17,0x6f,0x6d, -0x70,0x6f,0x6e,0x65,0x6e,0x74,0x9b,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x96, -0x13,0x62,0x61,0x73,0x65,0x99,0x12,0x72,0x65,0x73,0x95,0x61,0x30,0x62,0x4e,0x63, -0x12,0x6f,0x6d,0x70,0x9b,0xc2,4,0x1b,0x73,0x74,0x61,0x73,0x69,0x61,0x6e,0x77, -0x69,0x64,0x74,0x68,0xc3,4,0x12,0x61,0x73,0x65,0x99,3,0x67,0x44,0x68,0x4a, -0x6c,0x4e,0x73,0x1a,0x63,0x69,0x69,0x68,0x65,0x78,0x64,0x69,0x67,0x69,0x74,0x23, -0x10,0x65,0xd9,0x40,0,0x11,0x65,0x78,0x23,1,0x6e,0x38,0x70,0x11,0x68,0x61, -0x20,0x14,0x62,0x65,0x74,0x69,0x63,0x21,0x11,0x75,0x6d,0x79,4,0x63,0xc3,0, -0x69,0x3e,0x6c,0xa2,0x57,0x6d,0xa2,0x64,0x70,1,0x62,0xd9,0x40,0xd,0x74,0xc3, -0x15,0x11,0x64,0x69,2,0x63,0x54,0x6d,0x74,0x70,0x1b,0x61,0x69,0x72,0x65,0x64, -0x62,0x72,0x61,0x63,0x6b,0x65,0x74,0xd8,0x40,0xd,0x13,0x74,0x79,0x70,0x65,0xc3, -0x15,0x24,1,0x6c,0x30,0x6f,0x14,0x6e,0x74,0x72,0x6f,0x6c,0x25,0x12,0x61,0x73, -0x73,0xc3,0,0x26,0x14,0x69,0x72,0x72,0x6f,0x72,1,0x65,0x38,0x69,0x16,0x6e, -0x67,0x67,0x6c,0x79,0x70,0x68,0xd9,0x40,1,0x10,0x64,0x27,2,0x61,0x32,0x6b, -0xc3,1,0x6f,0x11,0x63,0x6b,0xc3,1,0x11,0x6e,0x6b,0x7b,0x10,0x67,0xd9,0x40, -1,6,0x68,0x7c,0x68,0x54,0x69,0x85,0x6f,0xa2,0x6f,0x77,4,0x63,0x30,0x6b, -0x36,0x6c,0x87,0x74,0x8b,0x75,0x89,1,0x66,0x8d,0x6d,0x8f,0x11,0x63,0x66,0x91, -0x18,0x61,0x6e,0x67,0x65,0x73,0x77,0x68,0x65,0x6e,4,0x63,0x44,0x6c,0x6c,0x6e, -0x7e,0x74,0x98,0x75,0x18,0x70,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x89,0x12, -0x61,0x73,0x65,1,0x66,0x30,0x6d,0x14,0x61,0x70,0x70,0x65,0x64,0x8f,0x14,0x6f, -0x6c,0x64,0x65,0x64,0x8d,0x18,0x6f,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x87, -0x1c,0x66,0x6b,0x63,0x63,0x61,0x73,0x65,0x66,0x6f,0x6c,0x64,0x65,0x64,0x91,0x18, -0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x64,0x8b,0x13,0x6d,0x70,0x65,0x78,0x33, -0x61,0x2e,0x63,0xa2,0x48,0x66,0xd9,0x40,2,1,0x6e,0x72,0x73,0x10,0x65,3, -0x64,0x83,0x66,0x3a,0x69,0x4a,0x73,0x17,0x65,0x6e,0x73,0x69,0x74,0x69,0x76,0x65, -0x65,0x15,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,2,0x17,0x67,0x6e,0x6f,0x72, -0x61,0x62,0x6c,0x65,0x85,0x13,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d, -0x62,0x69,0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,2,0x10,0x63,0xc3, -2,3,0x61,0x30,0x65,0x34,0x69,0xa2,0x41,0x74,0xc3,3,0x11,0x73,0x68,0x29, -2,0x63,0x3a,0x66,0x58,0x70,0x2c,0x16,0x72,0x65,0x63,0x61,0x74,0x65,0x64,0x2d, -0x1d,0x6f,0x6d,0x70,0x6f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x74,0x79,0x70,0x65,0xc3, -3,0x15,0x61,0x75,0x6c,0x74,0x69,0x67,0x1f,0x6e,0x6f,0x72,0x61,0x62,0x6c,0x65, -0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x2b,0x2a,0x10,0x61,0x2e,0x15,0x63, -0x72,0x69,0x74,0x69,0x63,0x2f,3,0x66,0x34,0x6e,0x3e,0x74,0x42,0x79,0x22,0x11, -0x65,0x73,0x23,0x20,0x13,0x61,0x6c,0x73,0x65,0x21,0x20,0x10,0x6f,0x21,0x22,0x12, -0x72,0x75,0x65,0x23,0xa,0x6b,0x5b,0x6f,0x23,0x6f,0x3c,0x72,0x4c,0x76,1,0x69, -0x24,0x72,0x33,0x13,0x72,0x61,0x6d,0x61,0x33,0x10,0x76,0x22,0x14,0x65,0x72,0x6c, -0x61,0x79,0x23,0xa2,0xe2,0x13,0x69,0x67,0x68,0x74,0xa3,0xe2,0x6b,0x58,0x6c,0x74, -0x6e,3,0x6b,0x2f,0x6f,0x30,0x72,0x21,0x75,0x12,0x6b,0x74,0x61,0x2f,0x19,0x74, -0x72,0x65,0x6f,0x72,0x64,0x65,0x72,0x65,0x64,0x21,1,0x61,0x24,0x76,0x31,0x18, -0x6e,0x61,0x76,0x6f,0x69,0x63,0x69,0x6e,0x67,0x31,0xa2,0xe0,0x12,0x65,0x66,0x74, -0xa3,0xe0,0x61,0x5c,0x62,0xa2,0x77,0x63,0xa2,0x96,0x64,0xa4,0xa,0x69,1,0x6f, -0x26,0x73,0xa3,0xf0,0x1a,0x74,0x61,0x73,0x75,0x62,0x73,0x63,0x72,0x69,0x70,0x74, -0xa3,0xf0,0xa2,0xe6,3,0x62,0xa0,0x6c,0xa3,0xe4,0x72,0xa3,0xe8,0x74,2,0x61, -0x74,0x62,0x7c,0x74,0x14,0x61,0x63,0x68,0x65,0x64,1,0x61,0x3e,0x62,0x13,0x65, -0x6c,0x6f,0x77,0xa2,0xca,0x13,0x6c,0x65,0x66,0x74,0xa3,0xc8,0x13,0x62,0x6f,0x76, -0x65,0xa2,0xd6,0x14,0x72,0x69,0x67,0x68,0x74,0xa3,0xd8,0xa2,0xd6,0x10,0x72,0xa3, -0xd8,0xa2,0xca,0x10,0x6c,0xa3,0xc8,0x12,0x6f,0x76,0x65,0xa2,0xe6,1,0x6c,0x30, -0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xe8,0x12,0x65,0x66,0x74,0xa3,0xe4,0xa2,0xdc, -2,0x65,0x2c,0x6c,0xa3,0xda,0x72,0xa3,0xde,0x12,0x6c,0x6f,0x77,0xa2,0xdc,1, -0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xde,0x12,0x65,0x66,0x74,0xa3,0xda, -0x11,0x63,0x63,4,0x31,0x3c,0x32,0xa2,0x42,0x33,0xa2,0x56,0x38,0xa2,0x64,0x39, -0x10,0x31,0xa3,0x5b,9,0x35,0xa,0x35,0x3f,0x36,0x41,0x37,0x43,0x38,0x45,0x39, -0x47,0x30,0x30,0x31,0x3c,0x32,0x42,0x33,0x4e,0x34,0x3d,0x34,1,0x33,0xa3,0x67, -0x37,0xa3,0x6b,0x36,0x10,0x38,0xa3,0x76,0x38,1,0x32,0xa3,0x7a,0x39,0xa3,0x81, -0x3a,2,0x30,0xa3,0x82,0x32,0xa3,0x84,0x33,0xa3,0x85,9,0x35,0xa,0x35,0x53, -0x36,0x55,0x37,0x57,0x38,0x59,0x39,0x5b,0x30,0x49,0x31,0x4b,0x32,0x4d,0x33,0x4f, -0x34,0x51,6,0x33,8,0x33,0x63,0x34,0x65,0x35,0x67,0x36,0x69,0x30,0x5d,0x31, -0x5f,0x32,0x61,0x10,0x34,0xa3,0x54,2,0x61,0xa3,0xea,0x62,0xa3,0xe9,0x6f,0x13, -0x75,0x62,0x6c,0x65,1,0x61,0x30,0x62,0x13,0x65,0x6c,0x6f,0x77,0xa3,0xe9,0x13, -0x62,0x6f,0x76,0x65,0xa3,0xea,0xb,0x6e,0xc0,0xca,0x72,0x5f,0x72,0x46,0x73,0xa2, -0x48,0x77,1,0x68,0x24,0x73,0x33,0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65, -0x33,0x22,1,0x69,0x30,0x6c,2,0x65,0x3d,0x69,0x4b,0x6f,0x3f,0x18,0x67,0x68, -0x74,0x74,0x6f,0x6c,0x65,0x66,0x74,0x22,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76, -0x65,0x72,0x72,0x69,0x64,0x65,0x3f,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67, -0x3d,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4b,0x30,0x1e,0x65,0x67,0x6d,0x65,0x6e, -0x74,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x31,0x6e,0xa2,0x41,0x6f,0xa2, -0x53,0x70,2,0x61,0x66,0x64,0x86,0x6f,0x1b,0x70,0x64,0x69,0x72,0x65,0x63,0x74, -0x69,0x6f,0x6e,0x61,0x6c,1,0x66,0x32,0x69,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65, -0x4d,0x14,0x6f,0x72,0x6d,0x61,0x74,0x41,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68, -0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x2f,1,0x66,0x41,0x69,0x4d,1, -0x6f,0x28,0x73,0x10,0x6d,0x43,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d, -0x61,0x72,0x6b,0x43,1,0x6e,0x35,0x74,0x19,0x68,0x65,0x72,0x6e,0x65,0x75,0x74, -0x72,0x61,0x6c,0x35,0x65,0x88,0x65,0x98,0x66,0xa2,0x6a,0x6c,0x20,1,0x65,0x30, -0x72,2,0x65,0x37,0x69,0x49,0x6f,0x39,0x18,0x66,0x74,0x74,0x6f,0x72,0x69,0x67, -0x68,0x74,0x20,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,0x72,0x72,0x69,0x64, -0x65,0x39,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x37,0x15,0x73,0x6f,0x6c, -0x61,0x74,0x65,0x49,3,0x6e,0x25,0x73,0x27,0x74,0x29,0x75,0x15,0x72,0x6f,0x70, -0x65,0x61,0x6e,2,0x6e,0x3c,0x73,0x46,0x74,0x18,0x65,0x72,0x6d,0x69,0x6e,0x61, -0x74,0x6f,0x72,0x29,0x14,0x75,0x6d,0x62,0x65,0x72,0x25,0x17,0x65,0x70,0x61,0x72, -0x61,0x74,0x6f,0x72,0x27,1,0x69,0x28,0x73,0x10,0x69,0x47,0x1f,0x72,0x73,0x74, -0x73,0x74,0x72,0x6f,0x6e,0x67,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x47,0x61,0x4e, -0x62,0x84,0x63,1,0x6f,0x24,0x73,0x2d,0x1c,0x6d,0x6d,0x6f,0x6e,0x73,0x65,0x70, -0x61,0x72,0x61,0x74,0x6f,0x72,0x2d,2,0x6c,0x3b,0x6e,0x2b,0x72,0x13,0x61,0x62, -0x69,0x63,1,0x6c,0x30,0x6e,0x14,0x75,0x6d,0x62,0x65,0x72,0x2b,0x14,0x65,0x74, -0x74,0x65,0x72,0x3b,0x2e,1,0x6e,0x45,0x6f,0x1c,0x75,0x6e,0x64,0x61,0x72,0x79, -0x6e,0x65,0x75,0x74,0x72,0x61,0x6c,0x45,0,0x16,0x6d,0xc7,0xfe,0x74,0xc1,0xb8, -0x77,0x57,0x77,0x48,0x79,0x5c,0x7a,0x1d,0x61,0x6e,0x61,0x62,0x61,0x7a,0x61,0x72, -0x73,0x71,0x75,0x61,0x72,0x65,0xa5,0x18,0x18,0x61,0x72,0x61,0x6e,0x67,0x63,0x69, -0x74,0x69,0xa3,0xfc,0x10,0x69,2,0x6a,0x3c,0x72,0x68,0x73,0x17,0x79,0x6c,0x6c, -0x61,0x62,0x6c,0x65,0x73,0xa3,0x48,0x12,0x69,0x6e,0x67,0xa2,0x74,0x1e,0x68,0x65, -0x78,0x61,0x67,0x72,0x61,0x6d,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x74,0x16, -0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0xa3,0x49,0x74,0xa2,0x59,0x75,0xa4,0x12,0x76, -2,0x61,0x36,0x65,0x7a,0x73,0xa2,0x6c,0x12,0x73,0x75,0x70,0xa3,0x7d,1,0x69, -0xa3,0x9f,0x72,0x1e,0x69,0x61,0x74,0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65,0x63,0x74, -0x6f,0x72,0x73,0xa2,0x6c,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74, -0xa3,0x7d,1,0x64,0x3c,0x72,0x19,0x74,0x69,0x63,0x61,0x6c,0x66,0x6f,0x72,0x6d, -0x73,0xa3,0x91,0x14,0x69,0x63,0x65,0x78,0x74,0xa2,0xaf,0x16,0x65,0x6e,0x73,0x69, -0x6f,0x6e,0x73,0xa3,0xaf,4,0x61,0x68,0x65,0xa2,0x8a,0x68,0xa2,0x8d,0x69,0xa2, -0x95,0x72,0x1c,0x61,0x6e,0x73,0x70,0x6f,0x72,0x74,0x61,0x6e,0x64,0x6d,0x61,0x70, -0xa2,0xcf,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xcf,4,0x67,0x58,0x69, -0x7e,0x6b,0xa2,0x58,0x6d,0xa2,0x5a,0x6e,0x12,0x67,0x75,0x74,0xa4,0x10,0x19,0x63, -0x6f,0x6d,0x70,0x6f,0x6e,0x65,0x6e,0x74,0x73,0xa5,0x11,2,0x61,0x2a,0x62,0x32, -0x73,0xa3,0x60,0x12,0x6c,0x6f,0x67,0xa3,0x62,0x13,0x61,0x6e,0x77,0x61,0xa3,0x65, -3,0x6c,0x52,0x74,0x56,0x76,0x5e,0x78,0x16,0x75,0x61,0x6e,0x6a,0x69,0x6e,0x67, -0xa2,0x7c,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x7c,0x10,0x65,0xa3,0x70, -0x12,0x68,0x61,0x6d,0xa3,0xae,0x12,0x69,0x65,0x74,0xa3,0xb7,0x11,0x72,0x69,0xa3, -0xdc,0x11,0x69,0x6c,0x49,0x13,0x6c,0x75,0x67,0x75,0x4b,0x10,0x61,1,0x61,0x24, -0x69,0x53,0x11,0x6e,0x61,0x3d,2,0x62,0x34,0x66,0x3c,0x72,0x13,0x68,0x75,0x74, -0x61,0xa3,0xfb,0x13,0x65,0x74,0x61,0x6e,0x57,0x14,0x69,0x6e,0x61,0x67,0x68,0xa3, -0x90,2,0x63,0x82,0x67,0x92,0x6e,0x1f,0x69,0x66,0x69,0x65,0x64,0x63,0x61,0x6e, -0x61,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x1f,0x72,0x69,0x67,0x69,0x6e,0x61,0x6c, -0x73,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x73,0x62,0x17,0x65,0x78,0x74,0x65,0x6e, -0x64,0x65,0x64,0xa3,0xad,0x11,0x61,0x73,0x62,0x12,0x65,0x78,0x74,0xa3,0xad,0x15, -0x61,0x72,0x69,0x74,0x69,0x63,0xa3,0x78,0x70,0xc2,0xf5,0x70,0xa6,0xb,0x72,0xa6, -0xc7,0x73,7,0x6f,0xc1,0x7f,0x6f,0x76,0x70,0xa2,0x47,0x75,0xa2,0x66,0x79,1, -0x6c,0x4c,0x72,0x12,0x69,0x61,0x63,0x3a,0x12,0x73,0x75,0x70,0xa4,0x17,0x16,0x70, -0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x17,0x17,0x6f,0x74,0x69,0x6e,0x61,0x67,0x72, -0x69,0xa3,0x8f,2,0x67,0x34,0x72,0x3e,0x79,0x13,0x6f,0x6d,0x62,0x6f,0xa5,0x16, -0x13,0x64,0x69,0x61,0x6e,0xa5,0x23,0x17,0x61,0x73,0x6f,0x6d,0x70,0x65,0x6e,0x67, -0xa3,0xda,1,0x61,0x32,0x65,0x14,0x63,0x69,0x61,0x6c,0x73,0xa3,0x56,0x12,0x63, -0x69,0x6e,0x1f,0x67,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x6c,0x65,0x74,0x74, -0x65,0x72,0x73,0x2d,2,0x6e,0x48,0x70,0x76,0x74,0x1d,0x74,0x6f,0x6e,0x73,0x69, -0x67,0x6e,0x77,0x72,0x69,0x74,0x69,0x6e,0x67,0xa5,6,0x15,0x64,0x61,0x6e,0x65, -0x73,0x65,0xa2,0x9b,0x12,0x73,0x75,0x70,0xa2,0xdb,0x16,0x70,0x6c,0x65,0x6d,0x65, -0x6e,0x74,0xa3,0xdb,4,0x61,0xa2,0xa8,0x65,0x5c,0x6d,0x9e,0x70,0xa2,0x4b,0x73, -0x13,0x79,0x6d,0x62,0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,0x69,0x63,0x74,0x6f, -0x67,0x72,0x61,0x70,0x68,0x73,0xa5,5,0x10,0x72,1,0x61,0x4e,0x73,0x12,0x63, -0x72,0x69,0x1f,0x70,0x74,0x73,0x61,0x6e,0x64,0x73,0x75,0x62,0x73,0x63,0x72,0x69, -0x70,0x74,0x73,0x73,0x14,0x6e,0x64,0x73,0x75,0x62,0x73,0x1b,0x61,0x74,0x68,0x6f, -0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,1,0x6c,0x40,0x75,1,0x61, -0x6e,0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x8e,0x15,0x65,0x6d, -0x65,0x6e,0x74,0x61,1,0x6c,0x50,0x72,0x1e,0x79,0x70,0x72,0x69,0x76,0x61,0x74, -0x65,0x75,0x73,0x65,0x61,0x72,0x65,0x61,1,0x61,0xa3,0x6d,0x62,0xa3,0x6e,3, -0x61,0x5c,0x6d,0x78,0x70,0xa2,0x41,0x73,0x13,0x79,0x6d,0x62,0x6f,0x1f,0x6c,0x73, -0x61,0x6e,0x64,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa5,5, -0x14,0x72,0x72,0x6f,0x77,0x73,2,0x61,0xa3,0x67,0x62,0xa3,0x68,0x63,0xa3,0xfa, -0x13,0x61,0x74,0x68,0x65,0x1f,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,0x6f,0x70,0x65, -0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,0x19,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, -0x69,0x6f,0x6e,0xa3,0x8e,0x61,0x5a,0x68,0x84,0x69,0xa2,0x5b,0x6d,0x16,0x61,0x6c, -0x6c,0x66,0x6f,0x72,0x6d,1,0x73,0xa3,0x54,0x76,0x16,0x61,0x72,0x69,0x61,0x6e, -0x74,0x73,0xa3,0x54,1,0x6d,0x36,0x75,0x16,0x72,0x61,0x73,0x68,0x74,0x72,0x61, -0xa3,0xa1,0x15,0x61,0x72,0x69,0x74,0x61,0x6e,0xa3,0xac,1,0x61,0x52,0x6f,0x13, -0x72,0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e, -0x74,0x72,0x6f,0x6c,0x73,0xa3,0xf7,1,0x72,0x2e,0x76,0x12,0x69,0x61,0x6e,0xa3, -0x79,0x12,0x61,0x64,0x61,0xa3,0xd9,1,0x64,0x50,0x6e,0x13,0x68,0x61,0x6c,0x61, -0x50,0x1d,0x61,0x72,0x63,0x68,0x61,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73, -0xa3,0xf9,0x13,0x64,0x68,0x61,0x6d,0xa3,0xf8,5,0x72,0x35,0x72,0x44,0x73,0x64, -0x75,1,0x61,0xa3,0x4e,0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x71, -0x17,0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0xa2,0x4e,0x13,0x61,0x72,0x65,0x61, -0xa3,0x4e,0x1b,0x61,0x6c,0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3, -0xf6,0x61,0x40,0x68,0x82,0x6c,0x19,0x61,0x79,0x69,0x6e,0x67,0x63,0x61,0x72,0x64, -0x73,0xa3,0xcc,2,0x68,0x38,0x6c,0x4a,0x75,0x15,0x63,0x69,0x6e,0x68,0x61,0x75, -0xa3,0xf5,0x17,0x61,0x77,0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xf3,0x15,0x6d,0x79, -0x72,0x65,0x6e,0x65,0xa3,0xf4,1,0x61,0x8e,0x6f,1,0x65,0x74,0x6e,0x16,0x65, -0x74,0x69,0x63,0x65,0x78,0x74,0xa2,0x72,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3, -0x8d,0x15,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa2,0x72,0x19,0x73,0x75,0x70,0x70,0x6c, -0x65,0x6d,0x65,0x6e,0x74,0xa3,0x8d,0x15,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x97, -1,0x67,0x3e,0x69,0x13,0x73,0x74,0x6f,0x73,0xa2,0xa6,0x13,0x64,0x69,0x73,0x63, -0xa3,0xa6,0x12,0x73,0x70,0x61,0xa3,0x96,1,0x65,0x5c,0x75,1,0x6d,0x2a,0x6e, -0x11,0x69,0x63,0x67,0x10,0x69,0xa2,0xc0,0x1d,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c, -0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xc0,0x13,0x6a,0x61,0x6e,0x67,0xa3,0xa3, -0x6d,0xa2,0xce,0x6e,0xa8,1,0x6f,5,0x70,0x4b,0x70,0x46,0x72,0x7a,0x73,1, -0x61,0x30,0x6d,0x13,0x61,0x6e,0x79,0x61,0xa3,0x7a,0x11,0x67,0x65,0xa5,0xf,0x18, -0x74,0x69,0x63,0x61,0x6c,0x63,0x68,0x61,0x72,0x1f,0x61,0x63,0x74,0x65,0x72,0x72, -0x65,0x63,0x6f,0x67,0x6e,0x69,0x74,0x69,0x6f,0x6e,0x85,1,0x69,0x46,0x6e,0x1e, -0x61,0x6d,0x65,0x6e,0x74,0x61,0x6c,0x64,0x69,0x6e,0x67,0x62,0x61,0x74,0x73,0xa3, -0xf2,0x11,0x79,0x61,0x47,0x63,0xa2,0x71,0x67,0xa2,0x71,0x6c,1,0x63,0xa2,0x62, -0x64,5,0x70,0x38,0x70,0x36,0x73,0x56,0x74,0x14,0x75,0x72,0x6b,0x69,0x63,0xa3, -0xbf,0x11,0x65,0x72,1,0x6d,0x2e,0x73,0x12,0x69,0x61,0x6e,0xa3,0x8c,0x11,0x69, -0x63,0xa3,0xf1,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,0x68,0x61,0x72,0x61,0x62, -0x69,0x61,0x6e,0xa3,0xbb,0x13,0x64,0x69,0x61,0x6e,0xa5,0x22,0x68,0x42,0x69,0x54, -0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0xf0,0x17, -0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa5,4,0x14,0x74,0x61,0x6c,0x69,0x63, -0xa3,0x58,0x13,0x68,0x69,0x6b,0x69,0xa3,0x9d,0x10,0x72,0x85,0x12,0x68,0x61,0x6d, -0x65,6,0x6f,0x86,0x6f,0x6c,0x72,0xa2,0x61,0x75,0xa2,0x62,0x79,0x14,0x61,0x6e, -0x6d,0x61,0x72,0x58,0x12,0x65,0x78,0x74,2,0x61,0xa3,0xb6,0x62,0xa3,0xee,0x65, -0x13,0x6e,0x64,0x65,0x64,1,0x61,0xa3,0xb6,0x62,0xa3,0xee,1,0x64,0x52,0x6e, -0x15,0x67,0x6f,0x6c,0x69,0x61,0x6e,0x6a,0x12,0x73,0x75,0x70,0xa4,0xd,0x16,0x70, -0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xd,0x10,0x69,0xa2,0xec,0x13,0x66,0x69,0x65, -0x72,1,0x6c,0x3c,0x74,0x19,0x6f,0x6e,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x73, -0xa3,0x8a,0x15,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,0x10,0x6f,0xa3,0xed,1,0x6c, -0x44,0x73,0x11,0x69,0x63,0xa2,0x5c,0x18,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c, -0x73,0xa3,0x5c,0x13,0x74,0x61,0x6e,0x69,0xa5,3,0x61,0xa2,0x9b,0x65,0xa4,0x4c, -0x69,1,0x61,0xa2,0x8f,0x73,0x10,0x63,5,0x70,0x18,0x70,0xa2,0x71,0x73,0x36, -0x74,0x17,0x65,0x63,0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x15,0x79,0x6d,0x62,0x6f, -0x6c,0x73,0x8f,0x61,0xa2,0x66,0x65,0x46,0x6d,0x19,0x61,0x74,0x68,0x73,0x79,0x6d, -0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x17,0x6c,0x6c,0x61,0x6e, -0x65,0x6f,0x75,0x73,2,0x6d,0x3a,0x73,0x6c,0x74,0x17,0x65,0x63,0x68,0x6e,0x69, -0x63,0x61,0x6c,0x81,0x11,0x61,0x74,0x1f,0x68,0x65,0x6d,0x61,0x74,0x69,0x63,0x61, -0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x15, -0x79,0x6d,0x62,0x6f,0x6c,0x73,0x8e,0x12,0x61,0x6e,0x64,1,0x61,0x3c,0x70,0x19, -0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa3,0xcd,0x14,0x72,0x72,0x6f, -0x77,0x73,0xa3,0x73,0x10,0x6f,0xa3,0xd8,7,0x72,0x6f,0x72,0x44,0x73,0x4e,0x74, -0x62,0x79,0x19,0x61,0x6e,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0xa5,0x20,0x13, -0x63,0x68,0x65,0x6e,0xa5,0xc,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f,0x6e,0x64,0x69, -0xa5,0x14,0x10,0x68,2,0x61,0x3a,0x65,0x4a,0x6f,0x17,0x70,0x65,0x72,0x61,0x74, -0x6f,0x72,0x73,0x7f,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0xa3,0x5d,0x16,0x6d, -0x61,0x74,0x69,0x63,0x61,0x6c,1,0x61,0x36,0x6f,0x17,0x70,0x65,0x72,0x61,0x74, -0x6f,0x72,0x73,0x7f,0x11,0x6c,0x70,0x1f,0x68,0x61,0x6e,0x75,0x6d,0x65,0x72,0x69, -0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5d,0x68,0x50,0x6b,0x7e,0x6c,0x88, -0x6e,1,0x64,0x34,0x69,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3,0xea,0x12,0x61, -0x69,0x63,0xa3,0xc6,1,0x61,0x3e,0x6a,0x12,0x6f,0x6e,0x67,0xa2,0xaa,0x14,0x74, -0x69,0x6c,0x65,0x73,0xa3,0xaa,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xe9,0x13,0x61,0x73, -0x61,0x72,0xa5,0x1f,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x4f,3,0x64,0x6c,0x65, -0x7e,0x6e,0xa2,0x47,0x72,0x14,0x6f,0x69,0x74,0x69,0x63,1,0x63,0x3c,0x68,0x19, -0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xd7,0x15,0x75,0x72,0x73, -0x69,0x76,0x65,0xa3,0xd6,0x17,0x65,0x66,0x61,0x69,0x64,0x72,0x69,0x6e,0xa5,0x21, -0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa2,0xb8,0x12,0x65,0x78,0x74,0xa2, -0xd5,0x16,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0xd5,0x18,0x64,0x65,0x6b,0x69, -0x6b,0x61,0x6b,0x75,0x69,0xa3,0xeb,5,0x6b,0x23,0x6b,0x4c,0x6f,0x50,0x75,1, -0x6d,0x2c,0x73,0x11,0x68,0x75,0xa5,0x15,0x17,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d, -0x73,0x7b,0x10,0x6f,0xa3,0x92,0x14,0x62,0x6c,0x6f,0x63,0x6b,0x21,0x61,0x44,0x62, -0x21,0x65,0x10,0x77,1,0x61,0xa5,0xe,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,0xa3, -0x8b,0x16,0x62,0x61,0x74,0x61,0x65,0x61,0x6e,0xa3,0xef,0x67,0xc4,0xe,0x6a,0xc1, -0x95,0x6a,0xa2,0xc5,0x6b,0xa2,0xde,0x6c,4,0x61,0x54,0x65,0xa2,0x61,0x69,0xa2, -0x78,0x6f,0xa2,0xa7,0x79,1,0x63,0x2e,0x64,0x12,0x69,0x61,0x6e,0xa3,0xa9,0x12, -0x69,0x61,0x6e,0xa3,0xa7,1,0x6f,0x55,0x74,0x11,0x69,0x6e,1,0x31,0x82,0x65, -0x11,0x78,0x74,4,0x61,0x5c,0x62,0x29,0x63,0xa3,0x94,0x64,0xa3,0x95,0x65,0xa2, -0xe7,0x13,0x6e,0x64,0x65,0x64,4,0x61,0x36,0x62,0x29,0x63,0xa3,0x94,0x64,0xa3, -0x95,0x65,0xa3,0xe7,0x26,0x18,0x64,0x64,0x69,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x6d, -0x24,0x12,0x73,0x75,0x70,0x24,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x25,1, -0x70,0x42,0x74,0x1d,0x74,0x65,0x72,0x6c,0x69,0x6b,0x65,0x73,0x79,0x6d,0x62,0x6f, -0x6c,0x73,0x79,0x12,0x63,0x68,0x61,0xa3,0x9c,2,0x6d,0x2e,0x6e,0x34,0x73,0x10, -0x75,0xa3,0xb0,0x11,0x62,0x75,0xa3,0x6f,0x12,0x65,0x61,0x72,1,0x61,0xa3,0xe8, -0x62,1,0x69,0x38,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x75, -0x17,0x64,0x65,0x6f,0x67,0x72,0x61,0x6d,0x73,0xa3,0x76,0x1a,0x77,0x73,0x75,0x72, -0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4d,0x10,0x61,1,0x6d,0x32,0x76,0x14, -0x61,0x6e,0x65,0x73,0x65,0xa3,0xb5,0x10,0x6f,0x5c,0x12,0x65,0x78,0x74,1,0x61, -0xa3,0xb4,0x62,0xa3,0xb9,1,0x61,0x80,0x68,3,0x61,0x3c,0x6d,0x4c,0x6f,0x64, -0x75,0x15,0x64,0x61,0x77,0x61,0x64,0x69,0xa3,0xe6,0x16,0x72,0x6f,0x73,0x68,0x74, -0x68,0x69,0xa3,0x89,0x11,0x65,0x72,0x68,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73, -0xa3,0x71,0x12,0x6a,0x6b,0x69,0xa3,0xe5,3,0x69,0x3a,0x6e,0x42,0x74,0xa2,0x51, -0x79,0x13,0x61,0x68,0x6c,0x69,0xa3,0xa2,0x12,0x74,0x68,0x69,0xa3,0xc1,3,0x61, -0x34,0x62,0x76,0x67,0x7c,0x6e,0x12,0x61,0x64,0x61,0x4d,1,0x65,0x40,0x73,0x11, -0x75,0x70,0xa2,0xcb,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xcb,0x11,0x78, -0x74,1,0x61,0xa5,0x13,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa5,0x13,0x11,0x75, -0x6e,0xa3,0x42,0x11,0x78,0x69,0x96,0x17,0x72,0x61,0x64,0x69,0x63,0x61,0x6c,0x73, -0x97,0x14,0x61,0x6b,0x61,0x6e,0x61,0x9e,1,0x65,0x4c,0x70,0x10,0x68,0x1f,0x6f, -0x6e,0x65,0x74,0x69,0x63,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3, -0x6b,0x11,0x78,0x74,0xa3,0x6b,0x67,0xa2,0xb5,0x68,0xa4,0x84,0x69,3,0x64,0x4c, -0x6d,0xa2,0x55,0x6e,0xa2,0x62,0x70,0x13,0x61,0x65,0x78,0x74,0x2a,0x16,0x65,0x6e, -0x73,0x69,0x6f,0x6e,0x73,0x2b,1,0x63,0x99,0x65,0x17,0x6f,0x67,0x72,0x61,0x70, -0x68,0x69,0x63,1,0x64,0x56,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa4,0xb, -0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa5, -0xb,0x13,0x65,0x73,0x63,0x72,0x1f,0x69,0x70,0x74,0x69,0x6f,0x6e,0x63,0x68,0x61, -0x72,0x61,0x63,0x74,0x65,0x72,0x73,0x99,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61, -0x72,0x61,0x6d,0x61,0x69,0x63,0xa3,0xba,1,0x64,0x62,0x73,0x1b,0x63,0x72,0x69, -0x70,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68, -0x69,0x61,0x6e,0xa3,0xbd,0x13,0x6c,0x61,0x76,0x69,0xa3,0xbe,0x11,0x69,0x63,1, -0x6e,0x3e,0x73,0x1a,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa5, -0x1e,0x19,0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0xa3,0xb2,4,0x65, -0x74,0x6c,0xa2,0x82,0x6f,0xa2,0x9a,0x72,0xa2,0x9e,0x75,2,0x6a,0x34,0x6e,0x3e, -0x72,0x14,0x6d,0x75,0x6b,0x68,0x69,0x43,0x14,0x61,0x72,0x61,0x74,0x69,0x45,0x18, -0x6a,0x61,0x6c,0x61,0x67,0x6f,0x6e,0x64,0x69,0xa5,0x1c,1,0x6e,0xa2,0x46,0x6f, -1,0x6d,0x6e,0x72,0x13,0x67,0x69,0x61,0x6e,0x5a,1,0x65,0x40,0x73,0x11,0x75, -0x70,0xa2,0x87,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x87,0x11,0x78,0x74, -0xa4,0x1b,0x14,0x65,0x6e,0x64,0x65,0x64,0xa5,0x1b,0x1a,0x65,0x74,0x72,0x69,0x63, -0x73,0x68,0x61,0x70,0x65,0x73,0x8c,0x12,0x65,0x78,0x74,0xa2,0xe3,0x14,0x65,0x6e, -0x64,0x65,0x64,0xa3,0xe3,0x1e,0x65,0x72,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75, -0x61,0x74,0x69,0x6f,0x6e,0x71,0x17,0x61,0x67,0x6f,0x6c,0x69,0x74,0x69,0x63,0xa2, -0x88,0x12,0x73,0x75,0x70,0xa4,0xa,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5, -0xa,0x13,0x74,0x68,0x69,0x63,0xa3,0x59,1,0x61,0x5c,0x65,0x11,0x65,0x6b,0x30, -1,0x61,0x38,0x65,0x11,0x78,0x74,0x6e,0x14,0x65,0x6e,0x64,0x65,0x64,0x6f,0x17, -0x6e,0x64,0x63,0x6f,0x70,0x74,0x69,0x63,0x31,0x13,0x6e,0x74,0x68,0x61,0xa3,0xe4, -2,0x61,0xa2,0x48,0x65,0xa2,0xdf,0x69,1,0x67,0x30,0x72,0x14,0x61,0x67,0x61, -0x6e,0x61,0x9d,0x10,0x68,1,0x70,0x3a,0x73,0x18,0x75,0x72,0x72,0x6f,0x67,0x61, -0x74,0x65,0x73,0xa3,0x4b,1,0x72,0x3c,0x75,0x19,0x73,0x75,0x72,0x72,0x6f,0x67, -0x61,0x74,0x65,0x73,0xa3,0x4c,0x11,0x69,0x76,0x1f,0x61,0x74,0x65,0x75,0x73,0x65, -0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4c,2,0x6c,0x32,0x6e, -0x9a,0x74,0x12,0x72,0x61,0x6e,0xa5,2,0x10,0x66,2,0x61,0x58,0x6d,0x70,0x77, -0x14,0x69,0x64,0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x77,0x69,0x64, -0x74,0x68,0x66,0x6f,0x72,0x6d,0x73,0xa3,0x57,0x1a,0x6e,0x64,0x66,0x75,0x6c,0x6c, -0x66,0x6f,0x72,0x6d,0x73,0xa3,0x57,0x13,0x61,0x72,0x6b,0x73,0xa3,0x52,2,0x67, -0x34,0x69,0xa2,0x45,0x75,0x12,0x6e,0x6f,0x6f,0xa3,0x63,0x11,0x75,0x6c,0xa2,0x4a, -2,0x63,0x3c,0x6a,0x5e,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,0xa3, -0x4a,0x1f,0x6f,0x6d,0x70,0x61,0x74,0x69,0x62,0x69,0x6c,0x69,0x74,0x79,0x6a,0x61, -0x6d,0x6f,0xa3,0x41,0x12,0x61,0x6d,0x6f,0x5c,0x17,0x65,0x78,0x74,0x65,0x6e,0x64, -0x65,0x64,1,0x61,0xa3,0xb4,0x62,0xa3,0xb9,0x19,0x66,0x69,0x72,0x6f,0x68,0x69, -0x6e,0x67,0x79,0x61,0xa5,0x1d,0x13,0x62,0x72,0x65,0x77,0x37,0x61,0xa2,0xe9,0x62, -0xa6,0x29,0x63,0xa6,0xfe,0x64,0xac,0x8a,0x65,5,0x6d,0xa2,0x6d,0x86,0x6e,0x96, -0x74,0x15,0x68,0x69,0x6f,0x70,0x69,0x63,0x5e,1,0x65,0x40,0x73,0x11,0x75,0x70, -0xa2,0x86,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x86,0x11,0x78,0x74,0xa2, -0x85,1,0x61,0xa3,0xc8,0x65,0x13,0x6e,0x64,0x65,0x64,0xa2,0x85,0x10,0x61,0xa3, -0xc8,0x16,0x6f,0x74,0x69,0x63,0x6f,0x6e,0x73,0xa3,0xce,0x15,0x63,0x6c,0x6f,0x73, -0x65,0x64,2,0x61,0x5a,0x63,0x9e,0x69,0x1c,0x64,0x65,0x6f,0x67,0x72,0x61,0x70, -0x68,0x69,0x63,0x73,0x75,0x70,0xa2,0xc4,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74, -0xa3,0xc4,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0x86,1,0x65,0x2c,0x73,0x11, -0x75,0x70,0xa3,0xc3,0x13,0x72,0x69,0x63,0x73,0x86,0x18,0x75,0x70,0x70,0x6c,0x65, -0x6d,0x65,0x6e,0x74,0xa3,0xc3,0x11,0x6a,0x6b,0xa2,0x44,0x1f,0x6c,0x65,0x74,0x74, -0x65,0x72,0x73,0x61,0x6e,0x64,0x6d,0x6f,0x6e,0x74,0x68,0x73,0xa3,0x44,0x61,0x36, -0x67,0x62,0x6c,0x14,0x62,0x61,0x73,0x61,0x6e,0xa3,0xe2,0x13,0x72,0x6c,0x79,0x64, -0x1f,0x79,0x6e,0x61,0x73,0x74,0x69,0x63,0x63,0x75,0x6e,0x65,0x69,0x66,0x6f,0x72, -0x6d,0xa5,1,0x10,0x79,0x1f,0x70,0x74,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f, -0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xc2,7,0x6e,0xc0,0xe5,0x6e,0x3e,0x72,0xa2, -0x5d,0x73,0xa2,0xd8,0x76,0x14,0x65,0x73,0x74,0x61,0x6e,0xa3,0xbc,1,0x61,0x92, -0x63,0x13,0x69,0x65,0x6e,0x74,1,0x67,0x34,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c, -0x73,0xa3,0xa5,0x13,0x72,0x65,0x65,0x6b,1,0x6d,0x34,0x6e,0x15,0x75,0x6d,0x62, -0x65,0x72,0x73,0xa3,0x7f,0x13,0x75,0x73,0x69,0x63,0xa2,0x7e,0x19,0x61,0x6c,0x6e, -0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x7e,0x10,0x74,0x1f,0x6f,0x6c,0x69,0x61, -0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xfe,2,0x61, -0x32,0x6d,0xa2,0x71,0x72,0x12,0x6f,0x77,0x73,0x7d,0x12,0x62,0x69,0x63,0x38,3, -0x65,0x4a,0x6d,0x66,0x70,0xa2,0x43,0x73,0x11,0x75,0x70,0xa2,0x80,0x16,0x70,0x6c, -0x65,0x6d,0x65,0x6e,0x74,0xa3,0x80,0x11,0x78,0x74,1,0x61,0xa3,0xd2,0x65,0x14, -0x6e,0x64,0x65,0x64,0x61,0xa3,0xd2,0x12,0x61,0x74,0x68,0xa2,0xd3,0x18,0x65,0x6d, -0x61,0x74,0x69,0x63,0x61,0x6c,0x61,0x1f,0x6c,0x70,0x68,0x61,0x62,0x65,0x74,0x69, -0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xd3,1,0x66,0x42,0x72,0x1e,0x65, -0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73,1,0x61, -0xa3,0x51,0x62,0xa3,0x55,0x14,0x65,0x6e,0x69,0x61,0x6e,0x35,0x12,0x63,0x69,0x69, -0x23,0x64,0x9e,0x65,0xa2,0x42,0x68,0xa2,0x4d,0x6c,1,0x63,0x62,0x70,0x17,0x68, -0x61,0x62,0x65,0x74,0x69,0x63,0x70,1,0x66,0xa3,0x50,0x72,0x1e,0x65,0x73,0x65, -0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73,0xa3,0x50,0x16,0x68, -0x65,0x6d,0x69,0x63,0x61,0x6c,0xa2,0xd0,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73, -0xa3,0xd0,0x12,0x6c,0x61,0x6d,0xa5,7,0x1a,0x67,0x65,0x61,0x6e,0x6e,0x75,0x6d, -0x62,0x65,0x72,0x73,0xa3,0x77,0x11,0x6f,0x6d,0xa3,0xfd,7,0x6f,0x71,0x6f,0x64, -0x72,0xa2,0x41,0x75,0xa2,0x58,0x79,0x1b,0x7a,0x61,0x6e,0x74,0x69,0x6e,0x65,0x6d, -0x75,0x73,0x69,0x63,0xa2,0x5b,0x18,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73, -0xa3,0x5b,1,0x70,0x34,0x78,0x16,0x64,0x72,0x61,0x77,0x69,0x6e,0x67,0x89,0x14, -0x6f,0x6d,0x6f,0x66,0x6f,0xa0,0x12,0x65,0x78,0x74,0xa2,0x43,0x14,0x65,0x6e,0x64, -0x65,0x64,0xa3,0x43,0x10,0x61,1,0x68,0x40,0x69,0x12,0x6c,0x6c,0x65,0x92,0x17, -0x70,0x61,0x74,0x74,0x65,0x72,0x6e,0x73,0x93,0x11,0x6d,0x69,0xa3,0xc9,1,0x67, -0x2c,0x68,0x11,0x69,0x64,0xa3,0x64,0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x81,0x61, -0x48,0x65,0xa2,0x4e,0x68,0xa2,0x52,0x6c,0x1a,0x6f,0x63,0x6b,0x65,0x6c,0x65,0x6d, -0x65,0x6e,0x74,0x73,0x8b,3,0x6c,0x34,0x6d,0x40,0x73,0x66,0x74,0x11,0x61,0x6b, -0xa3,0xc7,0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x93,0x11,0x75,0x6d,0xa2,0xb1,0x12, -0x73,0x75,0x70,0xa2,0xca,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xca,1, -0x69,0x30,0x73,0x13,0x61,0x76,0x61,0x68,0xa3,0xdd,0x15,0x63,0x6c,0x61,0x74,0x69, -0x6e,0x23,0x14,0x6e,0x67,0x61,0x6c,0x69,0x41,0x16,0x61,0x69,0x6b,0x73,0x75,0x6b, -0x69,0xa5,8,5,0x6f,0xc1,0x4c,0x6f,0xa2,0x55,0x75,0xa4,0x10,0x79,1,0x70, -0x9c,0x72,0x14,0x69,0x6c,0x6c,0x69,0x63,0x32,1,0x65,0x4c,0x73,0x11,0x75,0x70, -0xa2,0x61,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa2,0x61,0x12,0x61,0x72,0x79, -0xa3,0x61,0x11,0x78,0x74,3,0x61,0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x65, -0x13,0x6e,0x64,0x65,0x64,2,0x61,0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x1c, -0x72,0x69,0x6f,0x74,0x73,0x79,0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x7b,3, -0x6d,0x5a,0x6e,0xa2,0x95,0x70,0xa2,0xa0,0x75,0x17,0x6e,0x74,0x69,0x6e,0x67,0x72, -0x6f,0x64,0xa2,0x9a,0x17,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0xa3,0x9a,2, -0x62,0x3a,0x6d,0xa2,0x5f,0x70,0x15,0x61,0x74,0x6a,0x61,0x6d,0x6f,0xa3,0x41,0x14, -0x69,0x6e,0x69,0x6e,0x67,2,0x64,0x46,0x68,0x9e,0x6d,0x1d,0x61,0x72,0x6b,0x73, -0x66,0x6f,0x72,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,0x1e,0x69,0x61,0x63,0x72, -0x69,0x74,0x69,0x63,0x61,0x6c,0x6d,0x61,0x72,0x6b,0x73,0x2e,2,0x65,0x40,0x66, -0xa6,0x2a,0x73,0x18,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x83,0x16, -0x78,0x74,0x65,0x6e,0x64,0x65,0x64,0xa3,0xe0,0x17,0x61,0x6c,0x66,0x6d,0x61,0x72, -0x6b,0x73,0xa3,0x52,0x11,0x6f,0x6e,0x1f,0x69,0x6e,0x64,0x69,0x63,0x6e,0x75,0x6d, -0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0xa3,0xb2,0x1b,0x74,0x72,0x6f,0x6c,0x70, -0x69,0x63,0x74,0x75,0x72,0x65,0x73,0x83,0x12,0x74,0x69,0x63,0xa2,0x84,0x1b,0x65, -0x70,0x61,0x63,0x74,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0xdf,1,0x6e,0x3e, -0x72,0x1b,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x75,0x15, -0x65,0x69,0x66,0x6f,0x72,0x6d,0xa2,0x98,0x16,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73, -0xa2,0x99,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f, -0x6e,0xa3,0x99,0x61,0xa2,0xdb,0x68,0xa4,5,0x6a,0x10,0x6b,0xa2,0x47,4,0x63, -0x86,0x65,0xa2,0x7d,0x72,0xa2,0x92,0x73,0xa2,0xa4,0x75,0x1f,0x6e,0x69,0x66,0x69, -0x65,0x64,0x69,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x47,0x18,0x65, -0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e,5,0x64,0x65,0x64,0xa3,0xd1,0x65,0xa5, -0,0x66,0xa5,0x12,0x14,0x6f,0x6d,0x70,0x61,0x74,0xa2,0x45,1,0x66,0x96,0x69, -1,0x62,0x44,0x64,0x17,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x12, -0x73,0x75,0x70,0xa3,0x5f,0x14,0x69,0x6c,0x69,0x74,0x79,0xa2,0x45,1,0x66,0x54, -0x69,0x18,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x19,0x73,0x75, -0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x5f,0x13,0x6f,0x72,0x6d,0x73,0xa3, -0x53,0x11,0x78,0x74,5,0x64,9,0x64,0xa3,0xd1,0x65,0xa5,0,0x66,0xa5,0x12, -0x61,0xa3,0x46,0x62,0xa3,0x5e,0x63,0xa3,0xc5,0x19,0x61,0x64,0x69,0x63,0x61,0x6c, -0x73,0x73,0x75,0x70,0x94,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x95,1,0x74, -0x50,0x79,0x14,0x6d,0x62,0x6f,0x6c,0x73,0x9a,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e, -0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x9b,0x14,0x72,0x6f,0x6b,0x65,0x73,0xa3, -0x82,2,0x6e,0x48,0x72,0x64,0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c, -0x62,0x61,0x6e,0x69,0x61,0x6e,0xa3,0xde,0x1d,0x61,0x64,0x69,0x61,0x6e,0x73,0x79, -0x6c,0x6c,0x61,0x62,0x69,0x63,0x73,0x63,0x12,0x69,0x61,0x6e,0xa3,0xa8,1,0x61, -0x6c,0x65,1,0x72,0x38,0x73,0x17,0x73,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa5, -0x19,0x13,0x6f,0x6b,0x65,0x65,0x60,0x12,0x73,0x75,0x70,0xa2,0xff,0x16,0x70,0x6c, -0x65,0x6d,0x65,0x6e,0x74,0xa3,0xff,1,0x6b,0x26,0x6d,0xa3,0xa4,0x11,0x6d,0x61, -0xa3,0xd4,3,0x65,0x3e,0x69,0x7e,0x6f,0xa2,0x5d,0x75,0x15,0x70,0x6c,0x6f,0x79, -0x61,0x6e,0xa3,0xe1,1,0x73,0x50,0x76,0x16,0x61,0x6e,0x61,0x67,0x61,0x72,0x69, -0x3e,0x12,0x65,0x78,0x74,0xa2,0xb3,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0xb3,0x13, -0x65,0x72,0x65,0x74,0xa3,0x5a,1,0x61,0x30,0x6e,0x14,0x67,0x62,0x61,0x74,0x73, -0x91,0x18,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x73,0x2e,2,0x65,0x30,0x66, -0x36,0x73,0x11,0x75,0x70,0xa3,0x83,0x11,0x78,0x74,0xa3,0xe0,0x18,0x6f,0x72,0x73, -0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,1,0x67,0x3e,0x6d,0x12,0x69,0x6e,0x6f,0xa2, -0xab,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xab,0x11,0x72,0x61,0xa5,0x1a,8,0x6d, -0x5f,0x6d,0x3a,0x6e,0x48,0x73,0x7a,0x76,0xa2,0x4b,0x77,0x12,0x69,0x64,0x65,0x43, -0x11,0x65,0x64,0x32,0x12,0x69,0x61,0x6c,0x33,2,0x61,0x40,0x62,0x37,0x6f,1, -0x62,0x28,0x6e,0x10,0x65,0x21,0x13,0x72,0x65,0x61,0x6b,0x37,0x10,0x72,0x34,0x12, -0x72,0x6f,0x77,0x35,2,0x6d,0x38,0x71,0x46,0x75,1,0x62,0x3d,0x70,0x3e,0x11, -0x65,0x72,0x3f,1,0x61,0x24,0x6c,0x39,0x11,0x6c,0x6c,0x39,1,0x72,0x3b,0x75, -0x12,0x61,0x72,0x65,0x3b,0x12,0x65,0x72,0x74,0x40,0x13,0x69,0x63,0x61,0x6c,0x41, -0x63,0x58,0x65,0x92,0x66,0x96,0x69,1,0x6e,0x36,0x73,0x10,0x6f,0x30,0x14,0x6c, -0x61,0x74,0x65,0x64,0x31,0x11,0x69,0x74,0x2e,0x12,0x69,0x61,0x6c,0x2f,2,0x61, -0x36,0x69,0x48,0x6f,0x10,0x6d,0x24,0x12,0x70,0x61,0x74,0x25,0x10,0x6e,0x22,0x15, -0x6f,0x6e,0x69,0x63,0x61,0x6c,0x23,0x13,0x72,0x63,0x6c,0x65,0x27,0x11,0x6e,0x63, -0x27,2,0x69,0x3a,0x6f,0x44,0x72,0x10,0x61,0x2c,0x14,0x63,0x74,0x69,0x6f,0x6e, -0x2d,0x10,0x6e,0x28,0x11,0x61,0x6c,0x29,0x11,0x6e,0x74,0x2b,4,0x61,0x3a,0x66, -0x4c,0x68,0x5e,0x6e,0x70,0x77,0x2a,0x12,0x69,0x64,0x65,0x2b,0x22,0x17,0x6d,0x62, -0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x26,0x17,0x75,0x6c,0x6c,0x77,0x69,0x64,0x74, -0x68,0x27,0x24,0x17,0x61,0x6c,0x66,0x77,0x69,0x64,0x74,0x68,0x25,0x20,1,0x61, -0x30,0x65,0x14,0x75,0x74,0x72,0x61,0x6c,0x21,0x28,0x13,0x72,0x72,0x6f,0x77,0x29, -0xd,0x6e,0xc0,0xfb,0x73,0x6d,0x73,0x3a,0x74,0x98,0x75,0xa2,0x49,0x7a,2,0x6c, -0x3b,0x70,0x3d,0x73,0x39,5,0x6f,0x28,0x6f,0x57,0x70,0x34,0x75,0x16,0x72,0x72, -0x6f,0x67,0x61,0x74,0x65,0x45,0x11,0x61,0x63,1,0x65,0x32,0x69,0x15,0x6e,0x67, -0x6d,0x61,0x72,0x6b,0x31,0x18,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x39, -0x63,0x53,0x6b,0x55,0x6d,0x51,0x1d,0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6c, -0x65,0x74,0x74,0x65,0x72,0x27,1,0x6e,0x40,0x70,0x1c,0x70,0x65,0x72,0x63,0x61, -0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,0x17,0x61,0x73,0x73,0x69,0x67,0x6e, -0x65,0x64,0x21,0x6e,0x8a,0x6f,0xa2,0x47,0x70,8,0x66,0x14,0x66,0x5b,0x69,0x59, -0x6f,0x4f,0x72,0x24,0x73,0x49,0x17,0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0x43, -0x61,0x2c,0x63,0x4d,0x64,0x47,0x65,0x4b,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68, -0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x3d,2,0x64,0x33,0x6c,0x35,0x6f, -0x36,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x2d,1, -0x70,0x7c,0x74,0x12,0x68,0x65,0x72,3,0x6c,0x38,0x6e,0x42,0x70,0x4c,0x73,0x14, -0x79,0x6d,0x62,0x6f,0x6c,0x57,0x14,0x65,0x74,0x74,0x65,0x72,0x2b,0x14,0x75,0x6d, -0x62,0x65,0x72,0x37,0x19,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4f, -0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x49,0x66, -0x9e,0x66,0x88,0x69,0xa2,0x4b,0x6c,0xa2,0x5c,0x6d,4,0x61,0x60,0x63,0x31,0x65, -0x2f,0x6e,0x2d,0x6f,0x15,0x64,0x69,0x66,0x69,0x65,0x72,1,0x6c,0x30,0x73,0x14, -0x79,0x6d,0x62,0x6f,0x6c,0x55,0x14,0x65,0x74,0x74,0x65,0x72,0x29,0x17,0x74,0x68, -0x73,0x79,0x6d,0x62,0x6f,0x6c,0x51,1,0x69,0x2e,0x6f,0x13,0x72,0x6d,0x61,0x74, -0x41,0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e, -0x5b,0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61, -0x74,0x69,0x6f,0x6e,0x59,6,0x6d,0x18,0x6d,0x29,0x6f,0x28,0x74,0x27,0x75,0x23, -0x2a,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x25, -0x65,0x28,0x69,0x3c,0x6c,0x25,0x19,0x74,0x74,0x65,0x72,0x6e,0x75,0x6d,0x62,0x65, -0x72,0x35,0x1a,0x6e,0x65,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x3b,0x63, -0x44,0x64,0xa2,0x60,0x65,0x1b,0x6e,0x63,0x6c,0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61, -0x72,0x6b,0x2f,6,0x6e,0x39,0x6e,0x46,0x6f,0x4e,0x73,0x45,0x75,0x1b,0x72,0x72, -0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x53,0x20,0x12,0x74,0x72,0x6c, -0x3f,0x42,0x10,0x6e,1,0x6e,0x2c,0x74,0x12,0x72,0x6f,0x6c,0x3f,0x1f,0x65,0x63, -0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4d,0x63, -0x3f,0x66,0x41,0x6c,0x1d,0x6f,0x73,0x65,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, -0x69,0x6f,0x6e,0x4b,2,0x61,0x30,0x65,0x4a,0x69,0x12,0x67,0x69,0x74,0x33,0x1c, -0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x47,0x1a,0x63, -0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65,0x72,0x33,0,0x12,0x6d,0xc2,0x3f, -0x73,0xa1,0x73,0x4e,0x74,0xa2,0x56,0x77,0xa2,0x72,0x79,0xa2,0x73,0x7a,1,0x61, -0x2c,0x68,0x12,0x61,0x69,0x6e,0x8b,0x11,0x69,0x6e,0x85,5,0x74,0x22,0x74,0x38, -0x77,0x4c,0x79,0x16,0x72,0x69,0x61,0x63,0x77,0x61,0x77,0x6f,0x18,0x72,0x61,0x69, -0x67,0x68,0x74,0x77,0x61,0x77,0xa3,0x55,0x15,0x61,0x73,0x68,0x6b,0x61,0x66,0x6d, -0x61,0x2e,0x65,0x38,0x68,0x11,0x69,0x6e,0x6b,0x10,0x64,0x62,0x11,0x68,0x65,0x65, -1,0x65,0x2e,0x6d,0x13,0x6b,0x61,0x74,0x68,0x69,0x10,0x6e,0x67,1,0x61,0x4e, -0x65,1,0x68,0x28,0x74,0x10,0x68,0x77,0x16,0x6d,0x61,0x72,0x62,0x75,0x74,0x61, -0x74,0x13,0x67,0x6f,0x61,0x6c,0x3d,1,0x68,0x71,0x77,0x73,0x11,0x61,0x77,0x79, -1,0x65,0x32,0x75,0x11,0x64,0x68,0x80,0x11,0x68,0x65,0x83,0x10,0x68,0x7a,1, -0x62,0x34,0x77,0x16,0x69,0x74,0x68,0x74,0x61,0x69,0x6c,0x7f,0x14,0x61,0x72,0x72, -0x65,0x65,0x7d,0x6d,0x6c,0x6e,0xa4,0x6b,0x70,0xa4,0x88,0x71,0xa4,0x88,0x72,1, -0x65,0x38,0x6f,0x18,0x68,0x69,0x6e,0x67,0x79,0x61,0x79,0x65,0x68,0x93,1,0x68, -0x5f,0x76,0x16,0x65,0x72,0x73,0x65,0x64,0x70,0x65,0x61,2,0x61,0x2e,0x65,0xa4, -0x3e,0x69,0x10,0x6d,0x53,1,0x6c,0xa2,0xe7,0x6e,0x16,0x69,0x63,0x68,0x61,0x65, -0x61,0x6e,0,0x12,0x6e,0x76,0x73,0x51,0x73,0x3e,0x74,0x5c,0x77,0xa0,0x79,0xa2, -0x42,0x7a,0x13,0x61,0x79,0x69,0x6e,0xa3,0x54,0x10,0x61,1,0x64,0x2e,0x6d,0x12, -0x65,0x6b,0x68,0xa3,0x4c,0x11,0x68,0x65,0xa3,0x4b,3,0x61,0x38,0x65,0x3c,0x68, -0x4a,0x77,0x13,0x65,0x6e,0x74,0x79,0xa3,0x51,0x10,0x77,0xa3,0x4d,1,0x6e,0xa3, -0x4e,0x74,0x10,0x68,0xa3,0x4f,0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x50,0x11,0x61, -0x77,0xa3,0x52,0x12,0x6f,0x64,0x68,0xa3,0x53,0x6e,0x3a,0x6f,0x40,0x70,0x46,0x71, -0x4a,0x72,0x12,0x65,0x73,0x68,0xa3,0x4a,0x11,0x75,0x6e,0xa3,0x46,0x11,0x6e,0x65, -0xa3,0x47,0x10,0x65,0xa3,0x48,0x12,0x6f,0x70,0x68,0xa3,0x49,0x67,0x33,0x67,0x38, -0x68,0x40,0x6b,0x5e,0x6c,0x66,0x6d,0x11,0x65,0x6d,0xa3,0x45,0x13,0x69,0x6d,0x65, -0x6c,0xa1,1,0x65,0x32,0x75,0x14,0x6e,0x64,0x72,0x65,0x64,0xa3,0x42,0x11,0x74, -0x68,0xa3,0x41,0x12,0x61,0x70,0x68,0xa3,0x43,0x14,0x61,0x6d,0x65,0x64,0x68,0xa3, -0x44,0x61,0x34,0x62,0x4a,0x64,0x50,0x66,0x12,0x69,0x76,0x65,0x9f,1,0x6c,0x2a, -0x79,0x11,0x69,0x6e,0x97,0x12,0x65,0x70,0x68,0x95,0x12,0x65,0x74,0x68,0x99,1, -0x61,0x30,0x68,0x14,0x61,0x6d,0x65,0x64,0x68,0x9d,0x13,0x6c,0x65,0x74,0x68,0x9b, -0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,6,0x6e,0x2c,0x6e,0x34,0x72,0x5e,0x73,0x62, -0x74,0x11,0x74,0x61,0xa3,0x63,2,0x67,0x2e,0x6e,0x32,0x79,0x10,0x61,0xa3,0x60, -0x10,0x61,0xa3,0x5d,1,0x61,0xa3,0x5e,0x6e,0x10,0x61,0xa3,0x5f,0x10,0x61,0xa3, -0x61,0x11,0x73,0x61,0xa3,0x62,0x62,0x3c,0x6a,0x42,0x6c,0x10,0x6c,1,0x61,0xa3, -0x5b,0x6c,0x10,0x61,0xa3,0x5c,0x11,0x68,0x61,0xa3,0x59,0x10,0x61,0xa3,0x5a,0x11, -0x65,0x6d,0x51,2,0x6f,0x2c,0x75,0x50,0x79,0x10,0x61,0x91,1,0x6a,0x28,0x6f, -0x10,0x6e,0x55,0x1a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x67,0x72,0x6f,0x75,0x70,0x21, -0x10,0x6e,0x57,0x10,0x65,0x59,0x10,0x61,1,0x66,0x5b,0x70,0x10,0x68,0x5d,0x66, -0x9a,0x66,0x42,0x67,0x7a,0x68,0x8a,0x6b,0xa2,0x75,0x6c,0x11,0x61,0x6d,0x4c,0x12, -0x61,0x64,0x68,0x4f,2,0x61,0x3e,0x65,0x4a,0x69,0x19,0x6e,0x61,0x6c,0x73,0x65, -0x6d,0x6b,0x61,0x74,0x68,0x35,0x15,0x72,0x73,0x69,0x79,0x65,0x68,0x8f,0x86,0x10, -0x68,0x33,0x10,0x61,1,0x66,0x37,0x6d,0x11,0x61,0x6c,0x39,1,0x61,0x40,0x65, -0x3e,1,0x68,0x28,0x74,0x10,0x68,0x45,0x40,0x13,0x67,0x6f,0x61,0x6c,0x43,2, -0x68,0x3b,0x6d,0x5c,0x6e,0x1a,0x69,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79, -0x61,1,0x6b,0x2a,0x70,0x10,0x61,0xa3,0x65,0x15,0x69,0x6e,0x6e,0x61,0x79,0x61, -0xa3,0x64,0x1a,0x7a,0x61,0x6f,0x6e,0x68,0x65,0x68,0x67,0x6f,0x61,0x6c,0x3d,2, -0x61,0x3a,0x68,0x44,0x6e,0x17,0x6f,0x74,0x74,0x65,0x64,0x68,0x65,0x68,0x4b,1, -0x66,0x47,0x70,0x10,0x68,0x49,0x12,0x61,0x70,0x68,0x89,0x61,0x2e,0x62,0x8a,0x64, -0xa2,0x51,0x65,0x31,2,0x66,0x3c,0x69,0x70,0x6c,1,0x61,0x28,0x65,0x10,0x66, -0x27,0x11,0x70,0x68,0x25,0x14,0x72,0x69,0x63,0x61,0x6e,2,0x66,0x30,0x6e,0x36, -0x71,0x11,0x61,0x66,0xa3,0x58,0x11,0x65,0x68,0xa3,0x56,0x12,0x6f,0x6f,0x6e,0xa3, -0x57,0x10,0x6e,0x23,1,0x65,0x4a,0x75,0x10,0x72,0x1f,0x75,0x73,0x68,0x61,0x73, -0x6b,0x69,0x79,0x65,0x68,0x62,0x61,0x72,0x72,0x65,0x65,0x8d,1,0x68,0x29,0x74, -0x10,0x68,0x2b,0x11,0x61,0x6c,0x2c,0x16,0x61,0x74,0x68,0x72,0x69,0x73,0x68,0x2f, -7,0x6e,0x2e,0x6e,0x2c,0x72,0x3e,0x74,0x56,0x75,0x21,0x18,0x6f,0x6e,0x6a,0x6f, -0x69,0x6e,0x69,0x6e,0x67,0x21,0x28,0x1a,0x69,0x67,0x68,0x74,0x6a,0x6f,0x69,0x6e, -0x69,0x6e,0x67,0x29,0x2a,0x19,0x72,0x61,0x6e,0x73,0x70,0x61,0x72,0x65,0x6e,0x74, -0x2b,0x63,0x23,0x64,0x40,0x6a,0x56,0x6c,0x26,0x19,0x65,0x66,0x74,0x6a,0x6f,0x69, -0x6e,0x69,0x6e,0x67,0x27,0x24,0x19,0x75,0x61,0x6c,0x6a,0x6f,0x69,0x6e,0x69,0x6e, -0x67,0x25,0x19,0x6f,0x69,0x6e,0x63,0x61,0x75,0x73,0x69,0x6e,0x67,0x23,0,0x13, -0x6e,0xc0,0xd0,0x73,0x49,0x73,0x48,0x75,0x78,0x77,0x84,0x78,0x9c,0x7a,0x10,0x77, -0x58,1,0x6a,0x75,0x73,0x13,0x70,0x61,0x63,0x65,0x59,4,0x61,0x51,0x67,0x53, -0x70,0x28,0x75,0x30,0x79,0x57,0x54,0x12,0x61,0x63,0x65,0x55,0x16,0x72,0x72,0x6f, -0x67,0x61,0x74,0x65,0x53,0x15,0x6e,0x6b,0x6e,0x6f,0x77,0x6e,0x21,1,0x6a,0x5d, -0x6f,0x17,0x72,0x64,0x6a,0x6f,0x69,0x6e,0x65,0x72,0x5d,0x10,0x78,0x21,0x6e,0x60, -0x6f,0xa2,0x41,0x70,0xa2,0x50,0x71,0xa2,0x6e,0x72,1,0x65,0x24,0x69,0x6f,0x1e, -0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x6f, -4,0x65,0x3e,0x6c,0x5b,0x6f,0x46,0x73,0x45,0x75,0x46,0x14,0x6d,0x65,0x72,0x69, -0x63,0x47,0x15,0x78,0x74,0x6c,0x69,0x6e,0x65,0x5b,0x17,0x6e,0x73,0x74,0x61,0x72, -0x74,0x65,0x72,0x45,0x10,0x70,0x48,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75, -0x61,0x74,0x69,0x6f,0x6e,0x49,1,0x6f,0x3e,0x72,0x4c,0x1a,0x65,0x66,0x69,0x78, -0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4d,0x4a,0x1b,0x73,0x74,0x66,0x69,0x78,0x6e, -0x75,0x6d,0x65,0x72,0x69,0x63,0x4b,0x10,0x75,0x4e,0x16,0x6f,0x74,0x61,0x74,0x69, -0x6f,0x6e,0x4f,0x68,0x7b,0x68,0x50,0x69,0x86,0x6a,0xa2,0x61,0x6c,0xa2,0x65,0x6d, -0x1c,0x61,0x6e,0x64,0x61,0x74,0x6f,0x72,0x79,0x62,0x72,0x65,0x61,0x6b,0x2d,4, -0x32,0x5f,0x33,0x61,0x65,0x34,0x6c,0x6d,0x79,0x3a,0x13,0x70,0x68,0x65,0x6e,0x3b, -0x19,0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,0x65,0x72,0x6d,2,0x64,0x28,0x6e, -0x3c,0x73,0x41,0x3c,0x18,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63,0x3d,0x3e, -1,0x66,0x3e,0x73,0x11,0x65,0x70,1,0x61,0x22,0x65,0x14,0x72,0x61,0x62,0x6c, -0x65,0x3f,0x18,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x41,2,0x6c,0x63, -0x74,0x65,0x76,0x67,1,0x66,0x43,0x69,0x15,0x6e,0x65,0x66,0x65,0x65,0x64,0x43, -0x61,0x40,0x62,0x70,0x63,0xa2,0x55,0x65,0xa2,0xdb,0x67,0x10,0x6c,0x38,0x11,0x75, -0x65,0x39,2,0x69,0x23,0x6c,0x34,0x6d,0x16,0x62,0x69,0x67,0x75,0x6f,0x75,0x73, -0x23,0x24,0x17,0x70,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x25,4,0x32,0x27,0x61, -0x29,0x62,0x2b,0x6b,0x2d,0x72,0x12,0x65,0x61,0x6b,2,0x61,0x36,0x62,0x3e,0x73, -0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x57,0x13,0x66,0x74,0x65,0x72,0x29,1,0x65, -0x2a,0x6f,0x11,0x74,0x68,0x27,0x13,0x66,0x6f,0x72,0x65,0x2b,7,0x6d,0x51,0x6d, -0x33,0x6f,0x28,0x70,0x69,0x72,0x35,1,0x6d,0x76,0x6e,1,0x64,0x3c,0x74,0x1a, -0x69,0x6e,0x67,0x65,0x6e,0x74,0x62,0x72,0x65,0x61,0x6b,0x2f,0x15,0x69,0x74,0x69, -0x6f,0x6e,0x61,0x1f,0x6c,0x6a,0x61,0x70,0x61,0x6e,0x65,0x73,0x65,0x73,0x74,0x61, -0x72,0x74,0x65,0x72,0x6b,1,0x62,0x3a,0x70,0x19,0x6c,0x65,0x78,0x63,0x6f,0x6e, -0x74,0x65,0x78,0x74,0x51,0x18,0x69,0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x33, -0x61,0x6a,0x62,0x2f,0x6a,0x6b,0x6c,0x30,0x13,0x6f,0x73,0x65,0x70,1,0x61,0x38, -0x75,0x18,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x31,0x18,0x72,0x65,0x6e, -0x74,0x68,0x65,0x73,0x69,0x73,0x69,0x1b,0x72,0x72,0x69,0x61,0x67,0x65,0x72,0x65, -0x74,0x75,0x72,0x6e,0x35,2,0x62,0x3e,0x6d,0x46,0x78,0x36,0x18,0x63,0x6c,0x61, -0x6d,0x61,0x74,0x69,0x6f,0x6e,0x37,0x70,0x12,0x61,0x73,0x65,0x71,0x72,0x16,0x6f, -0x64,0x69,0x66,0x69,0x65,0x72,0x73,1,0x64,0x42,0x6e,1,0x6f,0x32,0x75,0x26, -0x14,0x6d,0x65,0x72,0x69,0x63,0x27,0x11,0x6e,0x65,0x21,1,0x65,0x2e,0x69,0x24, -0x12,0x67,0x69,0x74,0x25,0x22,0x14,0x63,0x69,0x6d,0x61,0x6c,0x23,0,0x18,0x6e, -0xc3,0xe6,0x74,0xc1,0x51,0x77,0x7a,0x77,0xa2,0x4c,0x78,0xa2,0x60,0x79,0xa2,0x6a, -0x7a,6,0x73,0x1e,0x73,0x34,0x78,0x42,0x79,0x48,0x7a,0x11,0x7a,0x7a,0xa3,0x67, -0x10,0x79,1,0x65,0xa3,0xae,0x6d,0xa3,0x81,0x11,0x78,0x78,0xa3,0x66,0x11,0x79, -0x79,0x21,0x61,0x30,0x69,0x58,0x6d,0x11,0x74,0x68,0xa3,0x80,0x10,0x6e,1,0x61, -0x26,0x62,0xa3,0xb1,0x1a,0x62,0x61,0x7a,0x61,0x72,0x73,0x71,0x75,0x61,0x72,0x65, -0xa3,0xb1,0x11,0x6e,0x68,0x23,1,0x61,0x2c,0x6f,0x11,0x6c,0x65,0xa3,0x9b,0x11, -0x72,0x61,0xa2,0x92,0x15,0x6e,0x67,0x63,0x69,0x74,0x69,0xa3,0x92,1,0x70,0x2c, -0x73,0x11,0x75,0x78,0xa3,0x65,0x11,0x65,0x6f,0x9b,0x10,0x69,0x72,0x11,0x69,0x69, -0x73,0x74,0x4a,0x75,0xa2,0xba,0x76,1,0x61,0x2c,0x69,0x11,0x73,0x70,0xa3,0x64, -0x10,0x69,0xa2,0x63,0x10,0x69,0xa3,0x63,5,0x67,0x36,0x67,0x68,0x68,0x6c,0x69, -2,0x62,0x3a,0x66,0x4a,0x72,0x10,0x68,0xa2,0x9e,0x12,0x75,0x74,0x61,0xa3,0x9e, -1,0x65,0x24,0x74,0x6f,0x12,0x74,0x61,0x6e,0x6f,0x14,0x69,0x6e,0x61,0x67,0x68, -0x99,0x11,0x6c,0x67,0x75,0x10,0x61,1,0x61,0x24,0x69,0x6d,0x6a,0x11,0x6e,0x61, -0x6b,0x61,0x30,0x65,0xa2,0x5b,0x66,0x11,0x6e,0x67,0x99,6,0x6c,0x21,0x6c,0x32, -0x6d,0x38,0x6e,0x44,0x76,0x10,0x74,0xa3,0x7f,1,0x65,0x89,0x75,0x97,1,0x69, -0x24,0x6c,0x67,0x10,0x6c,0x67,0x10,0x67,0xa2,0x9a,0x11,0x75,0x74,0xa3,0x9a,0x67, -0x36,0x69,0x52,0x6b,0x10,0x72,0xa2,0x99,0x10,0x69,0xa3,0x99,1,0x61,0x30,0x62, -0x7a,0x13,0x61,0x6e,0x77,0x61,0x7b,0x12,0x6c,0x6f,0x67,0x75,2,0x6c,0x32,0x74, -0x34,0x76,0x12,0x69,0x65,0x74,0xa3,0x7f,0x10,0x65,0x89,0x12,0x68,0x61,0x6d,0xa3, -0x6a,1,0x6c,0x2a,0x6e,0x10,0x67,0xa3,0x62,0x10,0x75,0x68,0x11,0x67,0x75,0x69, -1,0x67,0x32,0x6e,0x14,0x6b,0x6e,0x6f,0x77,0x6e,0xa3,0x67,0x11,0x61,0x72,0x8a, -0x13,0x69,0x74,0x69,0x63,0x8b,0x71,0xc1,0x13,0x71,0xa2,0xde,0x72,0xa2,0xe3,0x73, -6,0x69,0x8a,0x69,0x72,0x6f,0xa2,0x4c,0x75,0xa2,0x75,0x79,1,0x6c,0x46,0x72, -4,0x63,0x65,0x65,0xa3,0x5f,0x69,0x2c,0x6a,0xa3,0x60,0x6e,0xa3,0x61,0x11,0x61, -0x63,0x65,0x10,0x6f,0x94,0x16,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,0x95,2,0x64, -0x3c,0x67,0x4c,0x6e,1,0x64,0xa3,0x91,0x68,0x62,0x12,0x61,0x6c,0x61,0x63,0x10, -0x64,0xa2,0xa6,0x12,0x68,0x61,0x6d,0xa3,0xa6,0x17,0x6e,0x77,0x72,0x69,0x74,0x69, -0x6e,0x67,0xa3,0x70,2,0x67,0x3a,0x72,0x52,0x79,0x10,0x6f,0xa2,0xb0,0x12,0x6d, -0x62,0x6f,0xa3,0xb0,1,0x64,0x26,0x6f,0xa3,0xb8,0xa2,0xb7,0x12,0x69,0x61,0x6e, -0xa3,0xb7,0x10,0x61,0xa2,0x98,0x16,0x73,0x6f,0x6d,0x70,0x65,0x6e,0x67,0xa3,0x98, -0x11,0x6e,0x64,0xa2,0x71,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0x71,0x61,0x5c,0x67, -0xa2,0x43,0x68,1,0x61,0x2a,0x72,0x10,0x64,0xa3,0x97,2,0x72,0x28,0x76,0x30, -0x77,0x87,0x12,0x61,0x64,0x61,0xa3,0x97,0x12,0x69,0x61,0x6e,0x87,2,0x6d,0x40, -0x72,0x58,0x75,0x10,0x72,0xa2,0x6f,0x15,0x61,0x73,0x68,0x74,0x72,0x61,0xa3,0x6f, -1,0x61,0x26,0x72,0xa3,0x7e,0x14,0x72,0x69,0x74,0x61,0x6e,0xa3,0x7e,1,0x61, -0xa3,0x5e,0x62,0xa3,0x85,0x11,0x6e,0x77,0xa3,0x70,0x11,0x61,0x61,1,0x63,0x2f, -0x69,0x23,3,0x65,0x3e,0x6a,0x48,0x6f,0x4e,0x75,0x10,0x6e,1,0x69,0x24,0x72, -0x61,0x10,0x63,0x61,0x13,0x6a,0x61,0x6e,0x67,0xa3,0x6e,0x11,0x6e,0x67,0xa3,0x6e, -1,0x68,0x2a,0x72,0x10,0x6f,0xa3,0x5d,0x10,0x67,0xa3,0xb6,0x6e,0xa2,0x83,0x6f, -0xa2,0xca,0x70,5,0x6c,0x1e,0x6c,0x44,0x72,0x4a,0x73,0x1b,0x61,0x6c,0x74,0x65, -0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0x7b,0x11,0x72,0x64,0xa3,0x5c,0x11, -0x74,0x69,0xa3,0x7d,0x61,0x7c,0x65,0xa2,0x54,0x68,3,0x61,0x3e,0x6c,0x4e,0x6e, -0x5e,0x6f,0x16,0x65,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x5b,0x10,0x67,0xa2,0x5a, -0x12,0x73,0x70,0x61,0xa3,0x5a,2,0x69,0xa3,0x7a,0x70,0xa3,0x7b,0x76,0xa3,0x7c, -0x10,0x78,0xa3,0x5b,2,0x68,0x3e,0x6c,0x50,0x75,0x10,0x63,0xa2,0xa5,0x14,0x69, -0x6e,0x68,0x61,0x75,0xa3,0xa5,0x17,0x61,0x77,0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3, -0x4b,0x10,0x6d,0xa2,0x90,0x14,0x79,0x72,0x65,0x6e,0x65,0xa3,0x90,0x11,0x72,0x6d, -0xa3,0x59,5,0x6b,0x1e,0x6b,0x32,0x73,0x4a,0x75,0x12,0x73,0x68,0x75,0xa3,0x96, -1,0x67,0x2e,0x6f,0xa2,0x57,0x10,0x6f,0xa3,0x57,0x10,0x62,0xa3,0x84,0x11,0x68, -0x75,0xa3,0x96,0x61,0x42,0x62,0x60,0x65,0x10,0x77,1,0x61,0xa3,0xaa,0x74,0x14, -0x61,0x69,0x6c,0x75,0x65,0x97,1,0x62,0x2a,0x72,0x10,0x62,0xa3,0x8e,0x15,0x61, -0x74,0x61,0x65,0x61,0x6e,0xa3,0x8f,0x11,0x61,0x74,0xa3,0x8f,3,0x67,0x5a,0x6c, -0x6c,0x72,0xa2,0x93,0x73,2,0x61,0x36,0x67,0x3c,0x6d,0x10,0x61,0x84,0x12,0x6e, -0x79,0x61,0x85,0x11,0x67,0x65,0xa3,0xab,0x10,0x65,0xa3,0xab,1,0x61,0x2a,0x68, -0x11,0x61,0x6d,0x5b,0x10,0x6d,0x5b,1,0x63,0xa2,0x60,0x64,5,0x70,0x37,0x70, -0x36,0x73,0x54,0x74,0x14,0x75,0x72,0x6b,0x69,0x63,0xa3,0x58,0x11,0x65,0x72,1, -0x6d,0x2c,0x73,0x12,0x69,0x61,0x6e,0x9b,0x11,0x69,0x63,0xa3,0x59,0x10,0x6f,1, -0x67,0x3a,0x75,0x18,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0x85,0x13, -0x64,0x69,0x61,0x6e,0xa3,0xb8,0x68,0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68, -0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0x8e,0x17,0x75,0x6e,0x67,0x61,0x72,0x69, -0x61,0x6e,0xa3,0x4c,0x14,0x74,0x61,0x6c,0x69,0x63,0x5d,1,0x68,0x26,0x6b,0xa3, -0x6d,0x12,0x69,0x6b,0x69,0xa3,0x6d,2,0x69,0x2c,0x6b,0x30,0x79,0x10,0x61,0x5f, -0x11,0x79,0x61,0x5f,0x10,0x68,0xa3,0x58,0x68,0xc2,0xef,0x6b,0xc2,0xa,0x6b,0xa4, -0x17,0x6c,0xa4,0x98,0x6d,8,0x6f,0x46,0x6f,0x48,0x72,0x74,0x74,0x80,0x75,0x86, -0x79,1,0x61,0x28,0x6d,0x10,0x72,0x59,0x13,0x6e,0x6d,0x61,0x72,0x59,2,0x64, -0x2e,0x6e,0x32,0x6f,0x10,0x6e,0xa3,0x72,0x10,0x69,0xa3,0xa3,0x10,0x67,0x56,0x14, -0x6f,0x6c,0x69,0x61,0x6e,0x57,0x10,0x6f,0xa2,0x95,0x10,0x6f,0xa3,0x95,0x11,0x65, -0x69,0xa3,0x73,0x11,0x6c,0x74,0xa2,0xa4,0x12,0x61,0x6e,0x69,0xa3,0xa4,0x61,0x36, -0x65,0xa2,0x67,0x69,0xa2,0xbd,0x6c,0x11,0x79,0x6d,0x55,6,0x6e,0x38,0x6e,0x32, -0x72,0x5c,0x73,0x6c,0x79,0x10,0x61,0xa3,0x55,1,0x64,0x38,0x69,0xa2,0x79,0x15, -0x63,0x68,0x61,0x65,0x61,0x6e,0xa3,0x79,0xa2,0x54,0x12,0x61,0x69,0x63,0xa3,0x54, -0x10,0x63,0xa2,0xa9,0x12,0x68,0x65,0x6e,0xa3,0xa9,0x18,0x61,0x72,0x61,0x6d,0x67, -0x6f,0x6e,0x64,0x69,0xa3,0xaf,0x68,0x36,0x6b,0x4c,0x6c,0x15,0x61,0x79,0x61,0x6c, -0x61,0x6d,0x55,1,0x61,0x26,0x6a,0xa3,0xa0,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xa0, -0x10,0x61,0xa2,0xb4,0x12,0x73,0x61,0x72,0xa3,0xb4,3,0x64,0x78,0x65,0x94,0x6e, -0xa2,0x42,0x72,1,0x63,0xa3,0x8d,0x6f,0xa2,0x56,0x13,0x69,0x74,0x69,0x63,1, -0x63,0x3c,0x68,0x19,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x56, -0x15,0x75,0x72,0x73,0x69,0x76,0x65,0xa3,0x8d,1,0x65,0x26,0x66,0xa3,0xb5,0x16, -0x66,0x61,0x69,0x64,0x72,0x69,0x6e,0xa3,0xb5,0x17,0x74,0x65,0x69,0x6d,0x61,0x79, -0x65,0x6b,0xa3,0x73,0x10,0x64,0xa2,0x8c,0x17,0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75, -0x69,0xa3,0x8c,0x11,0x61,0x6f,0xa3,0x5c,5,0x6f,0x14,0x6f,0x30,0x70,0x36,0x74, -0x11,0x68,0x69,0xa3,0x78,0x11,0x72,0x65,0xa3,0x77,0x11,0x65,0x6c,0xa3,0x8a,0x61, -0x2e,0x68,0x98,0x6e,0x11,0x64,0x61,0x4b,4,0x69,0x3c,0x6c,0x44,0x6e,0x48,0x74, -0x56,0x79,0x13,0x61,0x68,0x6c,0x69,0xa3,0x4f,0x12,0x74,0x68,0x69,0xa3,0x78,0x10, -0x69,0xa3,0x4f,1,0x61,0x4d,0x6e,0x12,0x61,0x64,0x61,0x4b,0x14,0x61,0x6b,0x61, -0x6e,0x61,0x4c,0x19,0x6f,0x72,0x68,0x69,0x72,0x61,0x67,0x61,0x6e,0x61,0x8d,3, -0x61,0x3c,0x6d,0x4e,0x6f,0x5a,0x75,0x15,0x64,0x61,0x77,0x61,0x64,0x69,0xa3,0x91, -0x10,0x72,0x92,0x15,0x6f,0x73,0x68,0x74,0x68,0x69,0x93,1,0x65,0x24,0x72,0x4f, -0x10,0x72,0x4f,0x10,0x6a,0xa2,0x9d,0x11,0x6b,0x69,0xa3,0x9d,4,0x61,0x5c,0x65, -0x90,0x69,0xa0,0x6f,0xa2,0x5d,0x79,1,0x63,0x34,0x64,0x10,0x69,0xa2,0x6c,0x11, -0x61,0x6e,0xa3,0x6c,0x10,0x69,0xa2,0x6b,0x11,0x61,0x6e,0xa3,0x6b,2,0x6e,0x42, -0x6f,0x46,0x74,3,0x66,0xa3,0x50,0x67,0xa3,0x51,0x69,0x24,0x6e,0x53,0x10,0x6e, -0x53,0x10,0x61,0xa3,0x6a,0x50,0x10,0x6f,0x51,0x11,0x70,0x63,0xa2,0x52,0x11,0x68, -0x61,0xa3,0x52,2,0x6d,0x2e,0x6e,0x36,0x73,0x10,0x75,0xa3,0x83,0x10,0x62,0x80, -0x10,0x75,0x81,2,0x61,0xa3,0x53,0x62,0x83,0x65,0x11,0x61,0x72,1,0x61,0xa3, -0x53,0x62,0x83,0x11,0x6d,0x61,0xa3,0x8b,0x68,0x6e,0x69,0xa2,0x91,0x6a,2,0x61, -0x30,0x70,0x52,0x75,0x11,0x72,0x63,0xa3,0x94,1,0x6d,0x38,0x76,0x10,0x61,0xa2, -0x4e,0x13,0x6e,0x65,0x73,0x65,0xa3,0x4e,0x10,0x6f,0xa3,0xad,0x11,0x61,0x6e,0xa3, -0x69,6,0x6c,0x1a,0x6c,0x34,0x6d,0x3a,0x72,0x40,0x75,0x11,0x6e,0x67,0xa3,0x4c, -0x11,0x75,0x77,0xa3,0x9c,0x11,0x6e,0x67,0xa3,0x4b,0x11,0x6b,0x74,0x8d,0x61,0x3c, -0x65,0xa2,0x43,0x69,0x11,0x72,0x61,0x48,0x13,0x67,0x61,0x6e,0x61,0x49,1,0x6e, -0x34,0x74,0x10,0x72,0xa2,0xa2,0x11,0x61,0x6e,0xa3,0xa2,0x42,6,0x6f,0xe,0x6f, -0x77,0x73,0xa3,0x49,0x74,0xa3,0x4a,0x75,0x12,0x6e,0x6f,0x6f,0x77,0x62,0xa3,0xac, -0x67,0x3e,0x69,0x42,0x19,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,0xa3, -0xb6,0x44,0x11,0x75,0x6c,0x45,0x11,0x62,0x72,0x46,0x11,0x65,0x77,0x47,2,0x6d, -0x2e,0x6e,0x4a,0x74,0x11,0x61,0x6c,0x5d,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61, -0x72,0x61,0x6d,0x61,0x69,0x63,0xa3,0x74,2,0x64,0x66,0x68,0x6a,0x73,0x1b,0x63, -0x72,0x69,0x70,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14, -0x74,0x68,0x69,0x61,0x6e,0xa3,0x7d,0x13,0x6c,0x61,0x76,0x69,0xa3,0x7a,0x10,0x73, -0xa3,0x4d,0x15,0x65,0x72,0x69,0x74,0x65,0x64,0x23,0x64,0xc0,0xec,0x64,0xa2,0x7a, -0x65,0xa2,0xad,0x67,4,0x65,0x82,0x6c,0x9a,0x6f,0xa2,0x46,0x72,0xa2,0x55,0x75, -2,0x6a,0x3c,0x6e,0x4e,0x72,1,0x6d,0x24,0x75,0x41,0x13,0x75,0x6b,0x68,0x69, -0x41,1,0x61,0x24,0x72,0x3f,0x13,0x72,0x61,0x74,0x69,0x3f,0x18,0x6a,0x61,0x6c, -0x61,0x67,0x6f,0x6e,0x64,0x69,0xa3,0xb3,0x10,0x6f,1,0x6b,0xa3,0x48,0x72,0x38, -0x13,0x67,0x69,0x61,0x6e,0x39,0x11,0x61,0x67,0x90,0x15,0x6f,0x6c,0x69,0x74,0x69, -0x63,0x91,1,0x6e,0x30,0x74,0x10,0x68,0x3a,0x11,0x69,0x63,0x3b,1,0x67,0xa3, -0xb3,0x6d,0xa3,0xaf,1,0x61,0x32,0x65,1,0x65,0x24,0x6b,0x3d,0x10,0x6b,0x3d, -0x10,0x6e,0xa2,0x89,0x12,0x74,0x68,0x61,0xa3,0x89,3,0x65,0x42,0x6f,0x68,0x73, -0x76,0x75,0x11,0x70,0x6c,0xa2,0x87,0x13,0x6f,0x79,0x61,0x6e,0xa3,0x87,1,0x73, -0x38,0x76,0x10,0x61,0x34,0x15,0x6e,0x61,0x67,0x61,0x72,0x69,0x35,0x13,0x65,0x72, -0x65,0x74,0x33,0x11,0x67,0x72,0xa2,0xb2,0x10,0x61,0xa3,0xb2,0x11,0x72,0x74,0x33, -2,0x67,0x3a,0x6c,0x72,0x74,0x11,0x68,0x69,0x36,0x13,0x6f,0x70,0x69,0x63,0x37, -0x10,0x79,2,0x64,0xa3,0x45,0x68,0xa3,0x46,0x70,0xa2,0x47,0x1e,0x74,0x69,0x61, -0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x47,0x11,0x62, -0x61,0xa2,0x88,0x12,0x73,0x61,0x6e,0xa3,0x88,0x61,0xa2,0xa2,0x62,0xa4,7,0x63, -6,0x6f,0x3d,0x6f,0x5a,0x70,0x76,0x75,0x7a,0x79,1,0x70,0x3e,0x72,2,0x69, -0x2a,0x6c,0x31,0x73,0xa3,0x44,0x13,0x6c,0x6c,0x69,0x63,0x31,0x13,0x72,0x69,0x6f, -0x74,0x7f,1,0x6d,0x30,0x70,0x10,0x74,0x2e,0x11,0x69,0x63,0x2f,0x12,0x6d,0x6f, -0x6e,0x21,0x11,0x72,0x74,0x7f,0x16,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa3,0x65, -0x61,0x32,0x68,0xa2,0x41,0x69,0x11,0x72,0x74,0xa3,0x43,3,0x6b,0x4c,0x6e,0x50, -0x72,0x76,0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e,0x69, -0x61,0x6e,0xa3,0x9f,0x10,0x6d,0xa3,0x76,1,0x61,0x24,0x73,0x71,0x1d,0x64,0x69, -0x61,0x6e,0x61,0x62,0x6f,0x72,0x69,0x67,0x69,0x6e,0x61,0x6c,0x71,0x10,0x69,0xa2, -0x68,0x11,0x61,0x6e,0xa3,0x68,1,0x61,0x34,0x65,0x10,0x72,0x2c,0x13,0x6f,0x6b, -0x65,0x65,0x2d,1,0x6b,0x26,0x6d,0xa3,0x42,0x11,0x6d,0x61,0xa3,0x76,6,0x68, -0x4a,0x68,0x48,0x6e,0x4e,0x72,0x76,0x76,1,0x65,0x2a,0x73,0x10,0x74,0xa3,0x75, -0x13,0x73,0x74,0x61,0x6e,0xa3,0x75,0x11,0x6f,0x6d,0xa3,0xa1,0x11,0x61,0x74,0x1f, -0x6f,0x6c,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73, -0xa3,0x9c,1,0x61,0x3e,0x6d,2,0x65,0x2a,0x69,0xa3,0x74,0x6e,0x27,0x13,0x6e, -0x69,0x61,0x6e,0x27,0x10,0x62,0x24,0x11,0x69,0x63,0x25,0x64,0x30,0x66,0x44,0x67, -0x11,0x68,0x62,0xa3,0x9f,0x10,0x6c,1,0x61,0x26,0x6d,0xa3,0xa7,0x10,0x6d,0xa3, -0xa7,0x11,0x61,0x6b,0xa3,0x93,6,0x6c,0x3c,0x6c,0x52,0x6f,0x56,0x72,0x66,0x75, -1,0x67,0x30,0x68,1,0x64,0x79,0x69,0x10,0x64,0x79,0x10,0x69,0x8e,0x13,0x6e, -0x65,0x73,0x65,0x8f,0x11,0x69,0x73,0xa1,0x11,0x70,0x6f,0x2a,0x13,0x6d,0x6f,0x66, -0x6f,0x2b,0x10,0x61,1,0x68,0x2e,0x69,0x7c,0x12,0x6c,0x6c,0x65,0x7d,0xa2,0x41, -0x11,0x6d,0x69,0xa3,0x41,0x61,0x48,0x65,0x9c,0x68,1,0x61,0x2a,0x6b,0x10,0x73, -0xa3,0xa8,0x15,0x69,0x6b,0x73,0x75,0x6b,0x69,0xa3,0xa8,3,0x6c,0x3a,0x6d,0x48, -0x73,0x54,0x74,1,0x61,0x24,0x6b,0x9f,0x10,0x6b,0x9f,0x10,0x69,0x9c,0x13,0x6e, -0x65,0x73,0x65,0x9d,0x10,0x75,0xa2,0x82,0x10,0x6d,0xa3,0x82,0x10,0x73,0xa2,0x86, -0x13,0x61,0x76,0x61,0x68,0xa3,0x86,0x11,0x6e,0x67,0x28,0x12,0x61,0x6c,0x69,0x29, -3,0x6c,0x42,0x6e,0x90,0x74,0xa2,0x46,0x76,0x24,0x17,0x6f,0x77,0x65,0x6c,0x6a, -0x61,0x6d,0x6f,0x25,0x22,1,0x65,0x54,0x76,0x28,1,0x73,0x38,0x74,0x2a,0x17, -0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x2b,0x16,0x79,0x6c,0x6c,0x61,0x62,0x6c, -0x65,0x29,0x18,0x61,0x64,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x23,1,0x61,0x21, -0x6f,0x1a,0x74,0x61,0x70,0x70,0x6c,0x69,0x63,0x61,0x62,0x6c,0x65,0x21,0x26,0x1a, -0x72,0x61,0x69,0x6c,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x27,1,0x6e,0x2c,0x79, -0x22,0x11,0x65,0x73,0x23,0x20,0x10,0x6f,0x21,1,0x6e,0x2c,0x79,0x22,0x11,0x65, -0x73,0x23,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,0x65,0x73, -0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e, -0x3a,0x79,0x22,0x11,0x65,0x73,0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10, -0x6f,0x21,0xb,0x72,0x39,0x76,0xc,0x76,0x33,0x78,0x2a,0x7a,0x11,0x77,0x6a,0x43, -0x10,0x78,0x21,0x72,0x28,0x73,0x50,0x74,0x31,1,0x65,0x24,0x69,0x39,0x1e,0x67, -0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x39,1, -0x6d,0x35,0x70,0x18,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x35,0x6c,0x1f, -0x6c,0x3c,0x6f,0x4a,0x70,1,0x70,0x37,0x72,0x14,0x65,0x70,0x65,0x6e,0x64,0x37, -0x28,1,0x66,0x2b,0x76,0x2c,0x10,0x74,0x2f,0x13,0x74,0x68,0x65,0x72,0x21,0x63, -0x4c,0x65,0x64,0x67,1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,0x74,0x65,0x72, -0x7a,0x77,0x6a,0x41,0x10,0x7a,0x41,2,0x6e,0x23,0x6f,0x24,0x72,0x25,0x14,0x6e, -0x74,0x72,0x6f,0x6c,0x23,2,0x62,0x34,0x6d,0x4e,0x78,0x26,0x13,0x74,0x65,0x6e, -0x64,0x27,0x3a,1,0x61,0x24,0x67,0x3d,0x11,0x73,0x65,0x3a,0x12,0x67,0x61,0x7a, -0x3d,0x3e,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x3f,9,0x6e,0x4a,0x6e,0x34, -0x6f,0x44,0x73,0x60,0x75,0x94,0x78,0x10,0x78,0x21,0x10,0x75,0x2a,0x14,0x6d,0x65, -0x72,0x69,0x63,0x2b,1,0x6c,0x2c,0x74,0x12,0x68,0x65,0x72,0x21,0x14,0x65,0x74, -0x74,0x65,0x72,0x2d,3,0x63,0x36,0x65,0x46,0x70,0x31,0x74,0x32,0x12,0x65,0x72, -0x6d,0x33,0x3c,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3d,0x2e,0x10,0x70,0x2f, -0x10,0x70,0x34,0x12,0x70,0x65,0x72,0x35,0x61,0x46,0x63,0x52,0x65,0x64,0x66,0x72, -0x6c,2,0x65,0x2d,0x66,0x3b,0x6f,0x28,0x12,0x77,0x65,0x72,0x29,0x10,0x74,0x22, -0x12,0x65,0x72,0x6d,0x23,1,0x6c,0x24,0x72,0x37,0x24,0x12,0x6f,0x73,0x65,0x25, -0x10,0x78,0x38,0x13,0x74,0x65,0x6e,0x64,0x39,0x10,0x6f,0x26,0x13,0x72,0x6d,0x61, -0x74,0x27,0,0x10,0x6c,0x88,0x72,0x40,0x72,0x36,0x73,0x5e,0x77,0x7a,0x78,0x8a, -0x7a,0x11,0x77,0x6a,0x4b,1,0x65,0x24,0x69,0x3b,0x1e,0x67,0x69,0x6f,0x6e,0x61, -0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x3b,1,0x69,0x24,0x71,0x3f, -0x18,0x6e,0x67,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x3f,0x17,0x73,0x65,0x67,0x73, -0x70,0x61,0x63,0x65,0x4d,0x10,0x78,0x21,0x6c,0x36,0x6d,0x3c,0x6e,0x76,0x6f,0x13, -0x74,0x68,0x65,0x72,0x21,1,0x65,0x23,0x66,0x35,3,0x62,0x37,0x69,0x28,0x6c, -0x29,0x6e,0x2b,0x10,0x64,1,0x6c,0x34,0x6e,0x11,0x75,0x6d,0x2a,0x12,0x6c,0x65, -0x74,0x37,0x14,0x65,0x74,0x74,0x65,0x72,0x29,2,0x65,0x36,0x6c,0x39,0x75,0x2c, -0x14,0x6d,0x65,0x72,0x69,0x63,0x2d,0x14,0x77,0x6c,0x69,0x6e,0x65,0x39,0x66,0x3f, -0x66,0x40,0x67,0x4e,0x68,0x70,0x6b,0x10,0x61,0x26,0x15,0x74,0x61,0x6b,0x61,0x6e, -0x61,0x27,0x10,0x6f,0x24,0x13,0x72,0x6d,0x61,0x74,0x25,1,0x61,0x3a,0x6c,0x19, -0x75,0x65,0x61,0x66,0x74,0x65,0x72,0x7a,0x77,0x6a,0x49,0x10,0x7a,0x49,1,0x65, -0x24,0x6c,0x3d,0x19,0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d,0x61, -0x86,0x63,0x92,0x64,0x94,0x65,2,0x62,0x44,0x6d,0x5e,0x78,0x2e,0x13,0x74,0x65, -0x6e,0x64,0x32,0x15,0x6e,0x75,0x6d,0x6c,0x65,0x74,0x2f,0x42,1,0x61,0x24,0x67, -0x45,0x11,0x73,0x65,0x42,0x12,0x67,0x61,0x7a,0x45,0x46,0x16,0x6f,0x64,0x69,0x66, -0x69,0x65,0x72,0x47,0x15,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,0x10,0x72,0x31,1, -0x6f,0x24,0x71,0x41,0x18,0x75,0x62,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x41,2, -0x63,0x32,0x6e,0x3c,0x6f,0x22,0x12,0x70,0x65,0x6e,0x23,0x24,0x13,0x6c,0x6f,0x73, -0x65,0x25,0x20,0x12,0x6f,0x6e,0x65,0x21,6,0x6f,0x5c,0x6f,0x4a,0x72,0x5c,0x74, -0x64,0x76,0x1d,0x69,0x73,0x75,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x6c,0x65,0x66, -0x74,0x3d,0x18,0x76,0x65,0x72,0x73,0x74,0x72,0x75,0x63,0x6b,0x2d,0x13,0x69,0x67, -0x68,0x74,0x2f,0x11,0x6f,0x70,0x30,0x12,0x61,0x6e,0x64,2,0x62,0x32,0x6c,0x50, -0x72,0x13,0x69,0x67,0x68,0x74,0x3b,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x32,0x17,0x61, -0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x35,0x12,0x65,0x66,0x74,0x36,0x17,0x61,0x6e, -0x64,0x72,0x69,0x67,0x68,0x74,0x39,0x62,0x2c,0x6c,0x5c,0x6e,0x10,0x61,0x21,0x14, -0x6f,0x74,0x74,0x6f,0x6d,0x22,0x12,0x61,0x6e,0x64,1,0x6c,0x2e,0x72,0x13,0x69, -0x67,0x68,0x74,0x27,0x12,0x65,0x66,0x74,0x25,0x12,0x65,0x66,0x74,0x28,0x17,0x61, -0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x2b,0xd,0x6e,0xaa,0x72,0x70,0x72,0x92,0x73, -0xa2,0x46,0x74,0xa2,0x54,0x76,1,0x69,0x60,0x6f,0x12,0x77,0x65,0x6c,0x62,1, -0x64,0x3a,0x69,0x19,0x6e,0x64,0x65,0x70,0x65,0x6e,0x64,0x65,0x6e,0x74,0x67,0x17, -0x65,0x70,0x65,0x6e,0x64,0x65,0x6e,0x74,0x65,1,0x72,0x2e,0x73,0x13,0x61,0x72, -0x67,0x61,0x61,0x12,0x61,0x6d,0x61,0x5f,0x1d,0x65,0x67,0x69,0x73,0x74,0x65,0x72, -0x73,0x68,0x69,0x66,0x74,0x65,0x72,0x57,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65, -0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x59,0x12,0x6f,0x6e,0x65,1,0x6c,0x2c, -0x6d,0x12,0x61,0x72,0x6b,0x5d,0x14,0x65,0x74,0x74,0x65,0x72,0x5b,0x6e,0x3c,0x6f, -0x7c,0x70,0x18,0x75,0x72,0x65,0x6b,0x69,0x6c,0x6c,0x65,0x72,0x55,1,0x6f,0x4c, -0x75,1,0x6b,0x3c,0x6d,0x12,0x62,0x65,0x72,0x50,0x15,0x6a,0x6f,0x69,0x6e,0x65, -0x72,0x53,0x11,0x74,0x61,0x4f,0x16,0x6e,0x6a,0x6f,0x69,0x6e,0x65,0x72,0x4d,0x13, -0x74,0x68,0x65,0x72,0x21,0x67,0x3e,0x67,0x4a,0x69,0x64,0x6a,0x82,0x6d,0x1d,0x6f, -0x64,0x69,0x66,0x79,0x69,0x6e,0x67,0x6c,0x65,0x74,0x74,0x65,0x72,0x4b,0x1c,0x65, -0x6d,0x69,0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x45,0x1e,0x6e,0x76, -0x69,0x73,0x69,0x62,0x6c,0x65,0x73,0x74,0x61,0x63,0x6b,0x65,0x72,0x47,0x14,0x6f, -0x69,0x6e,0x65,0x72,0x49,0x61,0xa2,0xba,0x62,0xa2,0xc0,0x63,1,0x61,0xa2,0xa2, -0x6f,0x16,0x6e,0x73,0x6f,0x6e,0x61,0x6e,0x74,0x2a,8,0x6b,0x67,0x6b,0x48,0x6d, -0x52,0x70,0x5c,0x73,0xa2,0x42,0x77,0x19,0x69,0x74,0x68,0x73,0x74,0x61,0x63,0x6b, -0x65,0x72,0x43,0x14,0x69,0x6c,0x6c,0x65,0x72,0x35,0x14,0x65,0x64,0x69,0x61,0x6c, -0x37,1,0x6c,0x52,0x72,0x10,0x65,1,0x63,0x2e,0x66,0x13,0x69,0x78,0x65,0x64, -0x3d,0x19,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68,0x61,0x3b,0x18,0x61,0x63, -0x65,0x68,0x6f,0x6c,0x64,0x65,0x72,0x39,0x10,0x75,1,0x62,0x3e,0x63,0x1b,0x63, -0x65,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68,0x61,0x41,0x15,0x6a,0x6f,0x69, -0x6e,0x65,0x64,0x3f,0x64,0x4c,0x66,0x52,0x68,0x5a,0x69,0x1e,0x6e,0x69,0x74,0x69, -0x61,0x6c,0x70,0x6f,0x73,0x74,0x66,0x69,0x78,0x65,0x64,0x33,0x12,0x65,0x61,0x64, -0x2d,0x13,0x69,0x6e,0x61,0x6c,0x2f,0x18,0x65,0x61,0x64,0x6c,0x65,0x74,0x74,0x65, -0x72,0x31,0x1d,0x6e,0x74,0x69,0x6c,0x6c,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72, -0x6b,0x29,0x16,0x76,0x61,0x67,0x72,0x61,0x68,0x61,0x23,1,0x69,0x4a,0x72,0x10, -0x61,0x1f,0x68,0x6d,0x69,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x6e,0x75,0x6d,0x62, -0x65,0x72,0x27,0x12,0x6e,0x64,0x75,0x25,2,0x72,0x38,0x74,0x46,0x75,0x26,0x15, -0x70,0x72,0x69,0x67,0x68,0x74,0x27,0x20,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x21, -1,0x72,0x24,0x75,0x25,0x22,0x18,0x61,0x6e,0x73,0x66,0x6f,0x72,0x6d,0x65,0x64, -1,0x72,0x32,0x75,0x15,0x70,0x72,0x69,0x67,0x68,0x74,0x25,0x15,0x6f,0x74,0x61, -0x74,0x65,0x64,0x23,0xd,0x6e,0xc1,0x86,0x73,0xa8,0x73,0x4c,0x74,0xa2,0x76,0x75, -0xa2,0x83,0x7a,0xd8,0x70,0,2,0x6c,0xd9,0x20,0,0x70,0xd9,0x40,0,0x73, -0xc3,0,0xfe,0xf,0,0,0,7,0x6f,0x3c,0x6f,0xff,8,0,0,0, -0x70,0x3a,0x75,0x6e,0x79,0x13,0x6d,0x62,0x6f,0x6c,0xff,0xf,0,0,0,0x11, -0x61,0x63,1,0x65,0x34,0x69,0x15,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa5,0,0x18, -0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xc3,0,0x16,0x72,0x72,0x6f,0x67, -0x61,0x74,0x65,0xe1,0,0,0x63,0xff,2,0,0,0,0x65,0x38,0x6b,0xff, -4,0,0,0,0x6d,0xff,1,0,0,0,0x16,0x70,0x61,0x72,0x61,0x74, -0x6f,0x72,0xd9,0x70,0,0x1d,0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65, -0x74,0x74,0x65,0x72,0x31,1,0x6e,0x40,0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73, -0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65, -0x64,0x23,0x6e,0xa2,0x69,0x6f,0xa2,0x89,0x70,0xfe,0x30,0xf8,0,0,9,0x69, -0x33,0x69,0xff,0x10,0,0,0,0x6f,0xfd,0x80,0,0,0x72,0x54,0x73,0xf9, -0,0,0x75,0x12,0x6e,0x63,0x74,0xfe,0x30,0xf8,0,0,0x15,0x75,0x61,0x74, -0x69,0x6f,0x6e,0xff,0x30,0xf8,0,0,0x17,0x69,0x76,0x61,0x74,0x65,0x75,0x73, -0x65,0xdd,0,0,0x61,0x48,0x63,0xfd,0x40,0,0,0x64,0xe9,0,0,0x65, -0xfd,0x20,0,0,0x66,0xff,0x20,0,0,0,0x1f,0x72,0x61,0x67,0x72,0x61, -0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x40,0,0xbe,0, -3,0x64,0xa7,0,0x6c,0xab,0,0x6f,0x30,0x75,0x13,0x6d,0x62,0x65,0x72,0xbf, -0,0xb2,0,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b, -0xa1,1,0x70,0x92,0x74,0x12,0x68,0x65,0x72,0xe6,0x80,1,3,0x6c,0x40,0x6e, -0x4a,0x70,0x56,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,8,0,0,0,0x14, -0x65,0x74,0x74,0x65,0x72,0x61,0x14,0x75,0x6d,0x62,0x65,0x72,0xb3,0,0x19,0x75, -0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x80,0,0,0x1c,0x65,0x6e, -0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xf9,0,0,0x66,0xc0, -0xc4,0x66,0xa2,0x47,0x69,0xa2,0x64,0x6c,0xa2,0x79,0x6d,0xa4,0xc0,4,0x61,0x6c, -0x63,0xa5,0,0x65,0xa3,0x80,0x6e,0xa1,0x6f,0x15,0x64,0x69,0x66,0x69,0x65,0x72, -1,0x6c,0x38,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff,4,0,0,0,0x14, -0x65,0x74,0x74,0x65,0x72,0x41,1,0x72,0x3c,0x74,0x16,0x68,0x73,0x79,0x6d,0x62, -0x6f,0x6c,0xff,1,0,0,0,0x10,0x6b,0xa5,0xc0,1,0x69,0x32,0x6f,0x13, -0x72,0x6d,0x61,0x74,0xdb,0,0,0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74, -0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x20,0,0,0,0x10,0x6e,0x1f,0x69,0x74, -0x69,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x10, -0,0,0,0x9c,7,0x6d,0x18,0x6d,0x41,0x6f,0x28,0x74,0x31,0x75,0x25,0x60, -0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x29,0x63, -0x3d,0x65,0x28,0x69,0x42,0x6c,0x29,0x13,0x74,0x74,0x65,0x72,0x9c,0x15,0x6e,0x75, -0x6d,0x62,0x65,0x72,0xab,0,0x1a,0x6e,0x65,0x73,0x65,0x70,0x61,0x72,0x61,0x74, -0x6f,0x72,0xd9,0x20,0,0x63,0x46,0x64,0xa2,0x96,0x65,0x1b,0x6e,0x63,0x6c,0x6f, -0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa3,0x80,0xe6,0x80,1,7,0x6e,0x57, -0x6e,0x52,0x6f,0x5e,0x73,0xe1,0,0,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79, -0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,2,0,0,0,0x22,0x12,0x74,0x72,0x6c, -0xd9,0x80,0,0xdc,0,0,1,0x6d,0x62,0x6e,1,0x6e,0x30,0x74,0x12,0x72, -0x6f,0x6c,0xd9,0x80,0,0x1f,0x65,0x63,0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74, -0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x40,0,0,0x19,0x62,0x69,0x6e,0x69,0x6e, -0x67,0x6d,0x61,0x72,0x6b,0xa5,0xc0,0x61,0x58,0x63,0xd9,0x80,0,0x66,0xdb,0, -0,0x6c,0x1d,0x6f,0x73,0x65,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f, -0x6e,0xfd,0x20,0,0,0x18,0x73,0x65,0x64,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d, -2,0x61,0x32,0x65,0x50,0x69,0x12,0x67,0x69,0x74,0xa7,0,0x1c,0x73,0x68,0x70, -0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xe9,0,0,0x1a,0x63,0x69, -0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65,0x72,0xa7,0 -}; - -const char PropNameData::nameGroups[21265]={ -2,'A','l','p','h','a',0,'A','l','p','h','a','b','e','t','i','c',0, -4,'N',0,'N','o',0,'F',0,'F','a','l','s','e',0,4,'Y',0,'Y','e','s',0,'T',0,'T','r','u','e',0, -2,'N','R',0,'N','o','t','_','R','e','o','r','d','e','r','e','d',0, -2,'O','V',0,'O','v','e','r','l','a','y',0,2,'N','K',0,'N','u','k','t','a',0, -2,'K','V',0,'K','a','n','a','_','V','o','i','c','i','n','g',0, -2,'V','R',0,'V','i','r','a','m','a',0,2,'C','C','C','1','0',0,'C','C','C','1','0',0, -2,'C','C','C','1','1',0,'C','C','C','1','1',0,2,'C','C','C','1','2',0,'C','C','C','1','2',0, -2,'C','C','C','1','3',0,'C','C','C','1','3',0,2,'C','C','C','1','4',0,'C','C','C','1','4',0, -2,'C','C','C','1','5',0,'C','C','C','1','5',0,2,'C','C','C','1','6',0,'C','C','C','1','6',0, -2,'C','C','C','1','7',0,'C','C','C','1','7',0,2,'C','C','C','1','8',0,'C','C','C','1','8',0, -2,'C','C','C','1','9',0,'C','C','C','1','9',0,2,'C','C','C','2','0',0,'C','C','C','2','0',0, -2,'C','C','C','2','1',0,'C','C','C','2','1',0,2,'C','C','C','2','2',0,'C','C','C','2','2',0, -2,'C','C','C','2','3',0,'C','C','C','2','3',0,2,'C','C','C','2','4',0,'C','C','C','2','4',0, -2,'C','C','C','2','5',0,'C','C','C','2','5',0,2,'C','C','C','2','6',0,'C','C','C','2','6',0, -2,'C','C','C','2','7',0,'C','C','C','2','7',0,2,'C','C','C','2','8',0,'C','C','C','2','8',0, -2,'C','C','C','2','9',0,'C','C','C','2','9',0,2,'C','C','C','3','0',0,'C','C','C','3','0',0, -2,'C','C','C','3','1',0,'C','C','C','3','1',0,2,'C','C','C','3','2',0,'C','C','C','3','2',0, -2,'C','C','C','3','3',0,'C','C','C','3','3',0,2,'C','C','C','3','4',0,'C','C','C','3','4',0, -2,'C','C','C','3','5',0,'C','C','C','3','5',0,2,'C','C','C','3','6',0,'C','C','C','3','6',0, -2,'C','C','C','8','4',0,'C','C','C','8','4',0,2,'C','C','C','9','1',0,'C','C','C','9','1',0, -2,'C','C','C','1','0','3',0,'C','C','C','1','0','3',0,2,'C','C','C','1','0','7',0,'C','C','C','1','0','7',0, -2,'C','C','C','1','1','8',0,'C','C','C','1','1','8',0,2,'C','C','C','1','2','2',0,'C','C','C','1','2','2',0, -2,'C','C','C','1','2','9',0,'C','C','C','1','2','9',0,2,'C','C','C','1','3','0',0,'C','C','C','1','3','0',0, -2,'C','C','C','1','3','2',0,'C','C','C','1','3','2',0,2,'C','C','C','1','3','3',0,'C','C','C','1','3','3',0, -2,'A','T','B','L',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w','_','L','e','f','t',0, -2,'A','T','B',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w',0, -2,'A','T','A',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e',0, -2,'A','T','A','R',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e','_','R','i','g','h','t',0, -2,'B','L',0,'B','e','l','o','w','_','L','e','f','t',0,2,'B',0,'B','e','l','o','w',0, -2,'B','R',0,'B','e','l','o','w','_','R','i','g','h','t',0, -2,'L',0,'L','e','f','t',0,2,'R',0,'R','i','g','h','t',0, -2,'A','L',0,'A','b','o','v','e','_','L','e','f','t',0,2,'A',0,'A','b','o','v','e',0, -2,'A','R',0,'A','b','o','v','e','_','R','i','g','h','t',0, -2,'D','B',0,'D','o','u','b','l','e','_','B','e','l','o','w',0, -2,'D','A',0,'D','o','u','b','l','e','_','A','b','o','v','e',0, -2,'I','S',0,'I','o','t','a','_','S','u','b','s','c','r','i','p','t',0, -2,'A','H','e','x',0,'A','S','C','I','I','_','H','e','x','_','D','i','g','i','t',0, -2,'B','i','d','i','_','C',0,'B','i','d','i','_','C','o','n','t','r','o','l',0, -2,'B','i','d','i','_','M',0,'B','i','d','i','_','M','i','r','r','o','r','e','d',0, -2,'D','a','s','h',0,'D','a','s','h',0,2,'D','I',0,'D','e','f','a','u','l','t','_','I','g','n','o','r','a','b','l','e', -'_','C','o','d','e','_','P','o','i','n','t',0,2,'D','e','p',0,'D','e','p','r','e','c','a','t','e','d',0, -2,'D','i','a',0,'D','i','a','c','r','i','t','i','c',0,2,'E','x','t',0,'E','x','t','e','n','d','e','r',0, -2,'C','o','m','p','_','E','x',0,'F','u','l','l','_','C','o','m','p','o','s','i','t','i','o','n','_','E','x','c','l','u','s', -'i','o','n',0,2,'G','r','_','B','a','s','e',0,'G','r','a','p','h','e','m','e','_','B','a','s','e',0, -2,'G','r','_','E','x','t',0,'G','r','a','p','h','e','m','e','_','E','x','t','e','n','d',0, -2,'G','r','_','L','i','n','k',0,'G','r','a','p','h','e','m','e','_','L','i','n','k',0, -2,'H','e','x',0,'H','e','x','_','D','i','g','i','t',0,2,'H','y','p','h','e','n',0,'H','y','p','h','e','n',0, -2,'I','D','C',0,'I','D','_','C','o','n','t','i','n','u','e',0, -2,'I','D','S',0,'I','D','_','S','t','a','r','t',0,2,'I','d','e','o',0,'I','d','e','o','g','r','a','p','h','i','c',0, -2,'I','D','S','B',0,'I','D','S','_','B','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0, -2,'I','D','S','T',0,'I','D','S','_','T','r','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0, -2,'J','o','i','n','_','C',0,'J','o','i','n','_','C','o','n','t','r','o','l',0, -2,'L','O','E',0,'L','o','g','i','c','a','l','_','O','r','d','e','r','_','E','x','c','e','p','t','i','o','n',0, -2,'L','o','w','e','r',0,'L','o','w','e','r','c','a','s','e',0, -2,'M','a','t','h',0,'M','a','t','h',0,2,'N','C','h','a','r',0,'N','o','n','c','h','a','r','a','c','t','e','r','_','C', -'o','d','e','_','P','o','i','n','t',0,2,'Q','M','a','r','k',0,'Q','u','o','t','a','t','i','o','n','_','M','a','r','k',0, -2,'R','a','d','i','c','a','l',0,'R','a','d','i','c','a','l',0, -2,'S','D',0,'S','o','f','t','_','D','o','t','t','e','d',0, -2,'T','e','r','m',0,'T','e','r','m','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'U','I','d','e','o',0,'U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',0, -2,'U','p','p','e','r',0,'U','p','p','e','r','c','a','s','e',0, -3,'W','S','p','a','c','e',0,'W','h','i','t','e','_','S','p','a','c','e',0,'s','p','a','c','e',0, -2,'X','I','D','C',0,'X','I','D','_','C','o','n','t','i','n','u','e',0, -2,'X','I','D','S',0,'X','I','D','_','S','t','a','r','t',0, -2,'S','e','n','s','i','t','i','v','e',0,'C','a','s','e','_','S','e','n','s','i','t','i','v','e',0, -2,'S','T','e','r','m',0,'S','e','n','t','e','n','c','e','_','T','e','r','m','i','n','a','l',0, -2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e','c','t','o','r',0, -2,'n','f','d','i','n','e','r','t',0,'N','F','D','_','I','n','e','r','t',0, -2,'n','f','k','d','i','n','e','r','t',0,'N','F','K','D','_','I','n','e','r','t',0, -2,'n','f','c','i','n','e','r','t',0,'N','F','C','_','I','n','e','r','t',0, -2,'n','f','k','c','i','n','e','r','t',0,'N','F','K','C','_','I','n','e','r','t',0, -2,'s','e','g','s','t','a','r','t',0,'S','e','g','m','e','n','t','_','S','t','a','r','t','e','r',0, -2,'P','a','t','_','S','y','n',0,'P','a','t','t','e','r','n','_','S','y','n','t','a','x',0, -2,'P','a','t','_','W','S',0,'P','a','t','t','e','r','n','_','W','h','i','t','e','_','S','p','a','c','e',0, -2,0,'a','l','n','u','m',0,2,0,'b','l','a','n','k',0, -2,0,'g','r','a','p','h',0,2,0,'p','r','i','n','t',0, -2,0,'x','d','i','g','i','t',0,2,'C','a','s','e','d',0,'C','a','s','e','d',0, -2,'C','I',0,'C','a','s','e','_','I','g','n','o','r','a','b','l','e',0, -2,'C','W','L',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','L','o','w','e','r','c','a','s','e','d',0, -2,'C','W','U',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','U','p','p','e','r','c','a','s','e','d',0, -2,'C','W','T',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','T','i','t','l','e','c','a','s','e','d',0, -2,'C','W','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','f','o','l','d','e','d',0, -2,'C','W','C','M',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','m','a','p','p','e','d',0, -2,'C','W','K','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','N','F','K','C','_','C','a','s','e','f','o','l', -'d','e','d',0,2,'E','m','o','j','i',0,'E','m','o','j','i',0, -2,'E','P','r','e','s',0,'E','m','o','j','i','_','P','r','e','s','e','n','t','a','t','i','o','n',0, -2,'E','M','o','d',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r',0, -2,'E','B','a','s','e',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r','_','B','a','s','e',0, -2,'E','C','o','m','p',0,'E','m','o','j','i','_','C','o','m','p','o','n','e','n','t',0, -2,'R','I',0,'R','e','g','i','o','n','a','l','_','I','n','d','i','c','a','t','o','r',0, -2,'P','C','M',0,'P','r','e','p','e','n','d','e','d','_','C','o','n','c','a','t','e','n','a','t','i','o','n','_','M','a','r', -'k',0,2,'E','x','t','P','i','c','t',0,'E','x','t','e','n','d','e','d','_','P','i','c','t','o','g','r','a','p','h','i','c', -0,2,'b','c',0,'B','i','d','i','_','C','l','a','s','s',0, -2,'L',0,'L','e','f','t','_','T','o','_','R','i','g','h','t',0, -2,'R',0,'R','i','g','h','t','_','T','o','_','L','e','f','t',0, -2,'E','N',0,'E','u','r','o','p','e','a','n','_','N','u','m','b','e','r',0, -2,'E','S',0,'E','u','r','o','p','e','a','n','_','S','e','p','a','r','a','t','o','r',0, -2,'E','T',0,'E','u','r','o','p','e','a','n','_','T','e','r','m','i','n','a','t','o','r',0, -2,'A','N',0,'A','r','a','b','i','c','_','N','u','m','b','e','r',0, -2,'C','S',0,'C','o','m','m','o','n','_','S','e','p','a','r','a','t','o','r',0, -2,'B',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0, -2,'S',0,'S','e','g','m','e','n','t','_','S','e','p','a','r','a','t','o','r',0, -2,'W','S',0,'W','h','i','t','e','_','S','p','a','c','e',0, -2,'O','N',0,'O','t','h','e','r','_','N','e','u','t','r','a','l',0, -2,'L','R','E',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','E','m','b','e','d','d','i','n','g',0, -2,'L','R','O',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','O','v','e','r','r','i','d','e',0, -2,'A','L',0,'A','r','a','b','i','c','_','L','e','t','t','e','r',0, -2,'R','L','E',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','E','m','b','e','d','d','i','n','g',0, -2,'R','L','O',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','O','v','e','r','r','i','d','e',0, -2,'P','D','F',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','F','o','r','m','a','t',0, -2,'N','S','M',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0, -2,'B','N',0,'B','o','u','n','d','a','r','y','_','N','e','u','t','r','a','l',0, -2,'F','S','I',0,'F','i','r','s','t','_','S','t','r','o','n','g','_','I','s','o','l','a','t','e',0, -2,'L','R','I',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','I','s','o','l','a','t','e',0, -2,'R','L','I',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','I','s','o','l','a','t','e',0, -2,'P','D','I',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','I','s','o','l','a','t','e',0, -2,'b','l','k',0,'B','l','o','c','k',0,2,'N','B',0,'N','o','_','B','l','o','c','k',0, -2,'A','S','C','I','I',0,'B','a','s','i','c','_','L','a','t','i','n',0, -3,'L','a','t','i','n','_','1','_','S','u','p',0,'L','a','t','i','n','_','1','_','S','u','p','p','l','e','m','e','n','t',0, -'L','a','t','i','n','_','1',0,2,'L','a','t','i','n','_','E','x','t','_','A',0,'L','a','t','i','n','_','E','x','t','e','n', -'d','e','d','_','A',0,2,'L','a','t','i','n','_','E','x','t','_','B',0,'L','a','t','i','n','_','E','x','t','e','n','d','e', -'d','_','B',0,2,'I','P','A','_','E','x','t',0,'I','P','A','_','E','x','t','e','n','s','i','o','n','s',0, -2,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r','s',0,'S','p','a','c','i','n','g','_','M','o','d','i','f','i', -'e','r','_','L','e','t','t','e','r','s',0,2,'D','i','a','c','r','i','t','i','c','a','l','s',0, -'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s',0, -2,'G','r','e','e','k',0,'G','r','e','e','k','_','A','n','d','_','C','o','p','t','i','c',0, -2,'C','y','r','i','l','l','i','c',0,'C','y','r','i','l','l','i','c',0, -2,'A','r','m','e','n','i','a','n',0,'A','r','m','e','n','i','a','n',0, -2,'H','e','b','r','e','w',0,'H','e','b','r','e','w',0,2,'A','r','a','b','i','c',0,'A','r','a','b','i','c',0, -2,'S','y','r','i','a','c',0,'S','y','r','i','a','c',0,2,'T','h','a','a','n','a',0,'T','h','a','a','n','a',0, -2,'D','e','v','a','n','a','g','a','r','i',0,'D','e','v','a','n','a','g','a','r','i',0, -2,'B','e','n','g','a','l','i',0,'B','e','n','g','a','l','i',0, -2,'G','u','r','m','u','k','h','i',0,'G','u','r','m','u','k','h','i',0, -2,'G','u','j','a','r','a','t','i',0,'G','u','j','a','r','a','t','i',0, -2,'O','r','i','y','a',0,'O','r','i','y','a',0,2,'T','a','m','i','l',0,'T','a','m','i','l',0, -2,'T','e','l','u','g','u',0,'T','e','l','u','g','u',0,2,'K','a','n','n','a','d','a',0, -'K','a','n','n','a','d','a',0,2,'M','a','l','a','y','a','l','a','m',0,'M','a','l','a','y','a','l','a','m',0, -2,'S','i','n','h','a','l','a',0,'S','i','n','h','a','l','a',0, -2,'T','h','a','i',0,'T','h','a','i',0,2,'L','a','o',0,'L','a','o',0, -2,'T','i','b','e','t','a','n',0,'T','i','b','e','t','a','n',0, -2,'M','y','a','n','m','a','r',0,'M','y','a','n','m','a','r',0, -2,'G','e','o','r','g','i','a','n',0,'G','e','o','r','g','i','a','n',0, -2,'J','a','m','o',0,'H','a','n','g','u','l','_','J','a','m','o',0, -2,'E','t','h','i','o','p','i','c',0,'E','t','h','i','o','p','i','c',0, -2,'C','h','e','r','o','k','e','e',0,'C','h','e','r','o','k','e','e',0, -3,'U','C','A','S',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a', -'l','_','S','y','l','l','a','b','i','c','s',0,'C','a','n','a','d','i','a','n','_','S','y','l','l','a','b','i','c','s',0, -2,'O','g','h','a','m',0,'O','g','h','a','m',0,2,'R','u','n','i','c',0,'R','u','n','i','c',0, -2,'K','h','m','e','r',0,'K','h','m','e','r',0,2,'M','o','n','g','o','l','i','a','n',0, -'M','o','n','g','o','l','i','a','n',0,2,'L','a','t','i','n','_','E','x','t','_','A','d','d','i','t','i','o','n','a','l',0, -'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','A','d','d','i','t','i','o','n','a','l',0, -2,'G','r','e','e','k','_','E','x','t',0,'G','r','e','e','k','_','E','x','t','e','n','d','e','d',0, -2,'P','u','n','c','t','u','a','t','i','o','n',0,'G','e','n','e','r','a','l','_','P','u','n','c','t','u','a','t','i','o','n', -0,2,'S','u','p','e','r','_','A','n','d','_','S','u','b',0,'S','u','p','e','r','s','c','r','i','p','t','s','_','A','n','d', -'_','S','u','b','s','c','r','i','p','t','s',0,2,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0, -'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0, -3,'D','i','a','c','r','i','t','i','c','a','l','s','_','F','o','r','_','S','y','m','b','o','l','s',0, -'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s','_','F','o','r','_', -'S','y','m','b','o','l','s',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k','s','_','F','o','r','_','S','y','m','b', -'o','l','s',0,2,'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0, -'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0, -2,'N','u','m','b','e','r','_','F','o','r','m','s',0,'N','u','m','b','e','r','_','F','o','r','m','s',0, -2,'A','r','r','o','w','s',0,'A','r','r','o','w','s',0,2,'M','a','t','h','_','O','p','e','r','a','t','o','r','s',0, -'M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t','o','r','s',0, -2,'M','i','s','c','_','T','e','c','h','n','i','c','a','l',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','T','e', -'c','h','n','i','c','a','l',0,2,'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0, -'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0, -2,'O','C','R',0,'O','p','t','i','c','a','l','_','C','h','a','r','a','c','t','e','r','_','R','e','c','o','g','n','i','t','i', -'o','n',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m',0,'E','n','c','l','o','s','e','d','_','A', -'l','p','h','a','n','u','m','e','r','i','c','s',0,2,'B','o','x','_','D','r','a','w','i','n','g',0, -'B','o','x','_','D','r','a','w','i','n','g',0,2,'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0, -'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0,2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s', -0,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s',0, -2,'M','i','s','c','_','S','y','m','b','o','l','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b', -'o','l','s',0,2,'D','i','n','g','b','a','t','s',0,'D','i','n','g','b','a','t','s',0, -2,'B','r','a','i','l','l','e',0,'B','r','a','i','l','l','e','_','P','a','t','t','e','r','n','s',0, -2,'C','J','K','_','R','a','d','i','c','a','l','s','_','S','u','p',0,'C','J','K','_','R','a','d','i','c','a','l','s','_','S', -'u','p','p','l','e','m','e','n','t',0,2,'K','a','n','g','x','i',0,'K','a','n','g','x','i','_','R','a','d','i','c','a','l', -'s',0,2,'I','D','C',0,'I','d','e','o','g','r','a','p','h','i','c','_','D','e','s','c','r','i','p','t','i','o','n','_','C', -'h','a','r','a','c','t','e','r','s',0,2,'C','J','K','_','S','y','m','b','o','l','s',0,'C','J','K','_','S','y','m','b','o', -'l','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'H','i','r','a','g','a','n','a',0,'H','i','r','a','g','a','n','a',0, -2,'K','a','t','a','k','a','n','a',0,'K','a','t','a','k','a','n','a',0, -2,'B','o','p','o','m','o','f','o',0,'B','o','p','o','m','o','f','o',0, -2,'C','o','m','p','a','t','_','J','a','m','o',0,'H','a','n','g','u','l','_','C','o','m','p','a','t','i','b','i','l','i','t', -'y','_','J','a','m','o',0,2,'K','a','n','b','u','n',0,'K','a','n','b','u','n',0, -2,'B','o','p','o','m','o','f','o','_','E','x','t',0,'B','o','p','o','m','o','f','o','_','E','x','t','e','n','d','e','d',0, -2,'E','n','c','l','o','s','e','d','_','C','J','K',0,'E','n','c','l','o','s','e','d','_','C','J','K','_','L','e','t','t','e', -'r','s','_','A','n','d','_','M','o','n','t','h','s',0,2,'C','J','K','_','C','o','m','p','a','t',0, -'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y',0, -2,'C','J','K','_','E','x','t','_','A',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', -'s','_','E','x','t','e','n','s','i','o','n','_','A',0,2,'C','J','K',0,'C','J','K','_','U','n','i','f','i','e','d','_','I', -'d','e','o','g','r','a','p','h','s',0,2,'Y','i','_','S','y','l','l','a','b','l','e','s',0, -'Y','i','_','S','y','l','l','a','b','l','e','s',0,2,'Y','i','_','R','a','d','i','c','a','l','s',0, -'Y','i','_','R','a','d','i','c','a','l','s',0,2,'H','a','n','g','u','l',0,'H','a','n','g','u','l','_','S','y','l','l','a', -'b','l','e','s',0,2,'H','i','g','h','_','S','u','r','r','o','g','a','t','e','s',0,'H','i','g','h','_','S','u','r','r','o', -'g','a','t','e','s',0,2,'H','i','g','h','_','P','U','_','S','u','r','r','o','g','a','t','e','s',0, -'H','i','g','h','_','P','r','i','v','a','t','e','_','U','s','e','_','S','u','r','r','o','g','a','t','e','s',0, -2,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0, -3,'P','U','A',0,'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a',0,'P','r','i','v','a','t','e','_','U','s', -'e',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r','a','p','h','s',0, -'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g','r','a','p','h','s',0, -2,'A','l','p','h','a','b','e','t','i','c','_','P','F',0,'A','l','p','h','a','b','e','t','i','c','_','P','r','e','s','e','n', -'t','a','t','i','o','n','_','F','o','r','m','s',0,3,'A','r','a','b','i','c','_','P','F','_','A',0, -'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','_','A',0, -'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','-','A',0, -2,'H','a','l','f','_','M','a','r','k','s',0,'C','o','m','b','i','n','i','n','g','_','H','a','l','f','_','M','a','r','k','s', -0,2,'C','J','K','_','C','o','m','p','a','t','_','F','o','r','m','s',0,'C','J','K','_','C','o','m','p','a','t','i','b','i', -'l','i','t','y','_','F','o','r','m','s',0,2,'S','m','a','l','l','_','F','o','r','m','s',0, -'S','m','a','l','l','_','F','o','r','m','_','V','a','r','i','a','n','t','s',0, -2,'A','r','a','b','i','c','_','P','F','_','B',0,'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n', -'_','F','o','r','m','s','_','B',0,2,'S','p','e','c','i','a','l','s',0,'S','p','e','c','i','a','l','s',0, -2,'H','a','l','f','_','A','n','d','_','F','u','l','l','_','F','o','r','m','s',0,'H','a','l','f','w','i','d','t','h','_','A', -'n','d','_','F','u','l','l','w','i','d','t','h','_','F','o','r','m','s',0, -2,'O','l','d','_','I','t','a','l','i','c',0,'O','l','d','_','I','t','a','l','i','c',0, -2,'G','o','t','h','i','c',0,'G','o','t','h','i','c',0,2,'D','e','s','e','r','e','t',0, -'D','e','s','e','r','e','t',0,2,'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c',0, -'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0, -2,'M','u','s','i','c',0,'M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0, -2,'M','a','t','h','_','A','l','p','h','a','n','u','m',0,'M','a','t','h','e','m','a','t','i','c','a','l','_','A','l','p','h', -'a','n','u','m','e','r','i','c','_','S','y','m','b','o','l','s',0, -2,'C','J','K','_','E','x','t','_','B',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', -'s','_','E','x','t','e','n','s','i','o','n','_','B',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r', -'a','p','h','s','_','S','u','p',0,'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g', -'r','a','p','h','s','_','S','u','p','p','l','e','m','e','n','t',0, -2,'T','a','g','s',0,'T','a','g','s',0,3,'C','y','r','i','l','l','i','c','_','S','u','p',0, -'C','y','r','i','l','l','i','c','_','S','u','p','p','l','e','m','e','n','t',0,'C','y','r','i','l','l','i','c','_','S','u','p', -'p','l','e','m','e','n','t','a','r','y',0,2,'T','a','g','a','l','o','g',0,'T','a','g','a','l','o','g',0, -2,'H','a','n','u','n','o','o',0,'H','a','n','u','n','o','o',0, -2,'B','u','h','i','d',0,'B','u','h','i','d',0,2,'T','a','g','b','a','n','w','a',0,'T','a','g','b','a','n','w','a',0, -2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','A',0,'M','i','s','c','e','l','l','a','n','e','o', -'u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o','l','s','_','A',0, -2,'S','u','p','_','A','r','r','o','w','s','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w', -'s','_','A',0,2,'S','u','p','_','A','r','r','o','w','s','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A', -'r','r','o','w','s','_','B',0,2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','B',0, -'M','i','s','c','e','l','l','a','n','e','o','u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o', -'l','s','_','B',0,2,'S','u','p','_','M','a','t','h','_','O','p','e','r','a','t','o','r','s',0, -'S','u','p','p','l','e','m','e','n','t','a','l','_','M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t', -'o','r','s',0,2,'K','a','t','a','k','a','n','a','_','E','x','t',0,'K','a','t','a','k','a','n','a','_','P','h','o','n','e', -'t','i','c','_','E','x','t','e','n','s','i','o','n','s',0,2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l', -'e','c','t','o','r','s',0,2,'S','u','p','_','P','U','A','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_', -'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a','_','A',0, -2,'S','u','p','_','P','U','A','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_','P','r','i','v','a','t','e', -'_','U','s','e','_','A','r','e','a','_','B',0,2,'L','i','m','b','u',0,'L','i','m','b','u',0, -2,'T','a','i','_','L','e',0,'T','a','i','_','L','e',0,2,'K','h','m','e','r','_','S','y','m','b','o','l','s',0, -'K','h','m','e','r','_','S','y','m','b','o','l','s',0,2,'P','h','o','n','e','t','i','c','_','E','x','t',0, -'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s',0, -2,'M','i','s','c','_','A','r','r','o','w','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b','o', -'l','s','_','A','n','d','_','A','r','r','o','w','s',0,2,'Y','i','j','i','n','g',0,'Y','i','j','i','n','g','_','H','e','x', -'a','g','r','a','m','_','S','y','m','b','o','l','s',0,2,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r', -'y',0,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r','y',0, -2,'L','i','n','e','a','r','_','B','_','I','d','e','o','g','r','a','m','s',0,'L','i','n','e','a','r','_','B','_','I','d','e', -'o','g','r','a','m','s',0,2,'A','e','g','e','a','n','_','N','u','m','b','e','r','s',0,'A','e','g','e','a','n','_','N','u', -'m','b','e','r','s',0,2,'U','g','a','r','i','t','i','c',0,'U','g','a','r','i','t','i','c',0, -2,'S','h','a','v','i','a','n',0,'S','h','a','v','i','a','n',0, -2,'O','s','m','a','n','y','a',0,'O','s','m','a','n','y','a',0, -2,'C','y','p','r','i','o','t','_','S','y','l','l','a','b','a','r','y',0,'C','y','p','r','i','o','t','_','S','y','l','l','a', -'b','a','r','y',0,2,'T','a','i','_','X','u','a','n','_','J','i','n','g',0,'T','a','i','_','X','u','a','n','_','J','i','n', -'g','_','S','y','m','b','o','l','s',0,2,'V','S','_','S','u','p',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e', -'c','t','o','r','s','_','S','u','p','p','l','e','m','e','n','t',0, -2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','M','u','s','i','c',0,'A','n','c','i','e','n','t','_','G','r','e', -'e','k','_','M','u','s','i','c','a','l','_','N','o','t','a','t','i','o','n',0, -2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','N','u','m','b','e','r','s',0,'A','n','c','i','e','n','t','_','G', -'r','e','e','k','_','N','u','m','b','e','r','s',0,2,'A','r','a','b','i','c','_','S','u','p',0, -'A','r','a','b','i','c','_','S','u','p','p','l','e','m','e','n','t',0, -2,'B','u','g','i','n','e','s','e',0,'B','u','g','i','n','e','s','e',0, -2,'C','J','K','_','S','t','r','o','k','e','s',0,'C','J','K','_','S','t','r','o','k','e','s',0, -2,'D','i','a','c','r','i','t','i','c','a','l','s','_','S','u','p',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c', -'r','i','t','i','c','a','l','_','M','a','r','k','s','_','S','u','p','p','l','e','m','e','n','t',0, -2,'C','o','p','t','i','c',0,'C','o','p','t','i','c',0,2,'E','t','h','i','o','p','i','c','_','E','x','t',0, -'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d',0, -2,'E','t','h','i','o','p','i','c','_','S','u','p',0,'E','t','h','i','o','p','i','c','_','S','u','p','p','l','e','m','e','n', -'t',0,2,'G','e','o','r','g','i','a','n','_','S','u','p',0,'G','e','o','r','g','i','a','n','_','S','u','p','p','l','e','m', -'e','n','t',0,2,'G','l','a','g','o','l','i','t','i','c',0,'G','l','a','g','o','l','i','t','i','c',0, -2,'K','h','a','r','o','s','h','t','h','i',0,'K','h','a','r','o','s','h','t','h','i',0, -2,'M','o','d','i','f','i','e','r','_','T','o','n','e','_','L','e','t','t','e','r','s',0,'M','o','d','i','f','i','e','r','_', -'T','o','n','e','_','L','e','t','t','e','r','s',0,2,'N','e','w','_','T','a','i','_','L','u','e',0, -'N','e','w','_','T','a','i','_','L','u','e',0,2,'O','l','d','_','P','e','r','s','i','a','n',0, -'O','l','d','_','P','e','r','s','i','a','n',0,2,'P','h','o','n','e','t','i','c','_','E','x','t','_','S','u','p',0, -'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s','_','S','u','p','p','l','e','m','e','n','t',0, -2,'S','u','p','_','P','u','n','c','t','u','a','t','i','o','n',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','P','u', -'n','c','t','u','a','t','i','o','n',0,2,'S','y','l','o','t','i','_','N','a','g','r','i',0, -'S','y','l','o','t','i','_','N','a','g','r','i',0,2,'T','i','f','i','n','a','g','h',0,'T','i','f','i','n','a','g','h',0, -2,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0, -2,'N','K','o',0,'N','K','o',0,2,'B','a','l','i','n','e','s','e',0,'B','a','l','i','n','e','s','e',0, -2,'L','a','t','i','n','_','E','x','t','_','C',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','C',0, -2,'L','a','t','i','n','_','E','x','t','_','D',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','D',0, -2,'P','h','a','g','s','_','P','a',0,'P','h','a','g','s','_','P','a',0, -2,'P','h','o','e','n','i','c','i','a','n',0,'P','h','o','e','n','i','c','i','a','n',0, -2,'C','u','n','e','i','f','o','r','m',0,'C','u','n','e','i','f','o','r','m',0, -2,'C','u','n','e','i','f','o','r','m','_','N','u','m','b','e','r','s',0,'C','u','n','e','i','f','o','r','m','_','N','u','m', -'b','e','r','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'C','o','u','n','t','i','n','g','_','R','o','d',0,'C','o','u','n','t','i','n','g','_','R','o','d','_','N','u','m','e','r', -'a','l','s',0,2,'S','u','n','d','a','n','e','s','e',0,'S','u','n','d','a','n','e','s','e',0, -2,'L','e','p','c','h','a',0,'L','e','p','c','h','a',0,2,'O','l','_','C','h','i','k','i',0, -'O','l','_','C','h','i','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','A',0, -'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','A',0, -2,'V','a','i',0,'V','a','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','B',0, -'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','B',0, -2,'S','a','u','r','a','s','h','t','r','a',0,'S','a','u','r','a','s','h','t','r','a',0, -2,'K','a','y','a','h','_','L','i',0,'K','a','y','a','h','_','L','i',0, -2,'R','e','j','a','n','g',0,'R','e','j','a','n','g',0,2,'C','h','a','m',0,'C','h','a','m',0, -2,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s',0,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s', -0,2,'P','h','a','i','s','t','o','s',0,'P','h','a','i','s','t','o','s','_','D','i','s','c',0, -2,'L','y','c','i','a','n',0,'L','y','c','i','a','n',0,2,'C','a','r','i','a','n',0,'C','a','r','i','a','n',0, -2,'L','y','d','i','a','n',0,'L','y','d','i','a','n',0,2,'M','a','h','j','o','n','g',0, -'M','a','h','j','o','n','g','_','T','i','l','e','s',0,2,'D','o','m','i','n','o',0,'D','o','m','i','n','o','_','T','i','l', -'e','s',0,2,'S','a','m','a','r','i','t','a','n',0,'S','a','m','a','r','i','t','a','n',0, -2,'U','C','A','S','_','E','x','t',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i', -'g','i','n','a','l','_','S','y','l','l','a','b','i','c','s','_','E','x','t','e','n','d','e','d',0, -2,'T','a','i','_','T','h','a','m',0,'T','a','i','_','T','h','a','m',0, -2,'V','e','d','i','c','_','E','x','t',0,'V','e','d','i','c','_','E','x','t','e','n','s','i','o','n','s',0, -2,'L','i','s','u',0,'L','i','s','u',0,2,'B','a','m','u','m',0,'B','a','m','u','m',0, -2,'I','n','d','i','c','_','N','u','m','b','e','r','_','F','o','r','m','s',0,'C','o','m','m','o','n','_','I','n','d','i','c', -'_','N','u','m','b','e','r','_','F','o','r','m','s',0,2,'D','e','v','a','n','a','g','a','r','i','_','E','x','t',0, -'D','e','v','a','n','a','g','a','r','i','_','E','x','t','e','n','d','e','d',0, -2,'J','a','m','o','_','E','x','t','_','A',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d', -'_','A',0,2,'J','a','v','a','n','e','s','e',0,'J','a','v','a','n','e','s','e',0, -2,'M','y','a','n','m','a','r','_','E','x','t','_','A',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_', -'A',0,2,'T','a','i','_','V','i','e','t',0,'T','a','i','_','V','i','e','t',0, -2,'M','e','e','t','e','i','_','M','a','y','e','k',0,'M','e','e','t','e','i','_','M','a','y','e','k',0, -2,'J','a','m','o','_','E','x','t','_','B',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d', -'_','B',0,2,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0,'I','m','p','e','r','i','a','l','_','A','r', -'a','m','a','i','c',0,2,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0, -'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0, -2,'A','v','e','s','t','a','n',0,'A','v','e','s','t','a','n',0, -2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0, -'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0, -2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0,'I','n','s','c','r','i','p','t','i', -'o','n','a','l','_','P','a','h','l','a','v','i',0,2,'O','l','d','_','T','u','r','k','i','c',0, -'O','l','d','_','T','u','r','k','i','c',0,2,'R','u','m','i',0,'R','u','m','i','_','N','u','m','e','r','a','l','_','S','y', -'m','b','o','l','s',0,2,'K','a','i','t','h','i',0,'K','a','i','t','h','i',0, -2,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0,'E','g','y','p','t','i','a','n','_','H', -'i','e','r','o','g','l','y','p','h','s',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','_','S','u', -'p',0,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','e','r','i','c','_','S','u','p','p','l','e','m','e', -'n','t',0,2,'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p',0, -'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p','p','l','e','m','e','n','t',0, -2,'C','J','K','_','E','x','t','_','C',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', -'s','_','E','x','t','e','n','s','i','o','n','_','C',0,2,'M','a','n','d','a','i','c',0,'M','a','n','d','a','i','c',0, -2,'B','a','t','a','k',0,'B','a','t','a','k',0,2,'E','t','h','i','o','p','i','c','_','E','x','t','_','A',0, -'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d','_','A',0, -2,'B','r','a','h','m','i',0,'B','r','a','h','m','i',0,2,'B','a','m','u','m','_','S','u','p',0, -'B','a','m','u','m','_','S','u','p','p','l','e','m','e','n','t',0, -2,'K','a','n','a','_','S','u','p',0,'K','a','n','a','_','S','u','p','p','l','e','m','e','n','t',0, -2,'P','l','a','y','i','n','g','_','C','a','r','d','s',0,'P','l','a','y','i','n','g','_','C','a','r','d','s',0, -2,'M','i','s','c','_','P','i','c','t','o','g','r','a','p','h','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_', -'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s',0, -2,'E','m','o','t','i','c','o','n','s',0,'E','m','o','t','i','c','o','n','s',0, -2,'T','r','a','n','s','p','o','r','t','_','A','n','d','_','M','a','p',0,'T','r','a','n','s','p','o','r','t','_','A','n','d', -'_','M','a','p','_','S','y','m','b','o','l','s',0,2,'A','l','c','h','e','m','i','c','a','l',0, -'A','l','c','h','e','m','i','c','a','l','_','S','y','m','b','o','l','s',0, -2,'C','J','K','_','E','x','t','_','D',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', -'s','_','E','x','t','e','n','s','i','o','n','_','D',0,2,'A','r','a','b','i','c','_','E','x','t','_','A',0, -'A','r','a','b','i','c','_','E','x','t','e','n','d','e','d','_','A',0, -2,'A','r','a','b','i','c','_','M','a','t','h',0,'A','r','a','b','i','c','_','M','a','t','h','e','m','a','t','i','c','a','l', -'_','A','l','p','h','a','b','e','t','i','c','_','S','y','m','b','o','l','s',0, -2,'C','h','a','k','m','a',0,'C','h','a','k','m','a',0,2,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t', -0,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t','e','n','s','i','o','n','s',0, -2,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i', -'v','e',0,2,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0, -'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0, -2,'M','i','a','o',0,'M','i','a','o',0,2,'S','h','a','r','a','d','a',0,'S','h','a','r','a','d','a',0, -2,'S','o','r','a','_','S','o','m','p','e','n','g',0,'S','o','r','a','_','S','o','m','p','e','n','g',0, -2,'S','u','n','d','a','n','e','s','e','_','S','u','p',0,'S','u','n','d','a','n','e','s','e','_','S','u','p','p','l','e','m', -'e','n','t',0,2,'T','a','k','r','i',0,'T','a','k','r','i',0, -2,'B','a','s','s','a','_','V','a','h',0,'B','a','s','s','a','_','V','a','h',0, -2,'C','a','u','c','a','s','i','a','n','_','A','l','b','a','n','i','a','n',0,'C','a','u','c','a','s','i','a','n','_','A','l', -'b','a','n','i','a','n',0,2,'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0, -'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0, -2,'D','i','a','c','r','i','t','i','c','a','l','s','_','E','x','t',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c', -'r','i','t','i','c','a','l','_','M','a','r','k','s','_','E','x','t','e','n','d','e','d',0, -2,'D','u','p','l','o','y','a','n',0,'D','u','p','l','o','y','a','n',0, -2,'E','l','b','a','s','a','n',0,'E','l','b','a','s','a','n',0, -2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s','_','E','x','t',0,'G','e','o','m','e','t','r','i','c','_', -'S','h','a','p','e','s','_','E','x','t','e','n','d','e','d',0, -2,'G','r','a','n','t','h','a',0,'G','r','a','n','t','h','a',0, -2,'K','h','o','j','k','i',0,'K','h','o','j','k','i',0,2,'K','h','u','d','a','w','a','d','i',0, -'K','h','u','d','a','w','a','d','i',0,2,'L','a','t','i','n','_','E','x','t','_','E',0,'L','a','t','i','n','_','E','x','t', -'e','n','d','e','d','_','E',0,2,'L','i','n','e','a','r','_','A',0,'L','i','n','e','a','r','_','A',0, -2,'M','a','h','a','j','a','n','i',0,'M','a','h','a','j','a','n','i',0, -2,'M','a','n','i','c','h','a','e','a','n',0,'M','a','n','i','c','h','a','e','a','n',0, -2,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0, -2,'M','o','d','i',0,'M','o','d','i',0,2,'M','r','o',0,'M','r','o',0, -2,'M','y','a','n','m','a','r','_','E','x','t','_','B',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_', -'B',0,2,'N','a','b','a','t','a','e','a','n',0,'N','a','b','a','t','a','e','a','n',0, -2,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0,'O','l','d','_','N','o','r','t','h','_','A','r','a', -'b','i','a','n',0,2,'O','l','d','_','P','e','r','m','i','c',0,'O','l','d','_','P','e','r','m','i','c',0, -2,'O','r','n','a','m','e','n','t','a','l','_','D','i','n','g','b','a','t','s',0,'O','r','n','a','m','e','n','t','a','l','_', -'D','i','n','g','b','a','t','s',0,2,'P','a','h','a','w','h','_','H','m','o','n','g',0,'P','a','h','a','w','h','_','H','m', -'o','n','g',0,2,'P','a','l','m','y','r','e','n','e',0,'P','a','l','m','y','r','e','n','e',0, -2,'P','a','u','_','C','i','n','_','H','a','u',0,'P','a','u','_','C','i','n','_','H','a','u',0, -2,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i', -0,2,'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0, -'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0, -2,'S','i','d','d','h','a','m',0,'S','i','d','d','h','a','m',0, -2,'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0, -'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0, -2,'S','u','p','_','A','r','r','o','w','s','_','C',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w', -'s','_','C',0,2,'T','i','r','h','u','t','a',0,'T','i','r','h','u','t','a',0, -2,'W','a','r','a','n','g','_','C','i','t','i',0,'W','a','r','a','n','g','_','C','i','t','i',0, -2,'A','h','o','m',0,'A','h','o','m',0,2,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h', -'s',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0, -2,'C','h','e','r','o','k','e','e','_','S','u','p',0,'C','h','e','r','o','k','e','e','_','S','u','p','p','l','e','m','e','n', -'t',0,2,'C','J','K','_','E','x','t','_','E',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a', -'p','h','s','_','E','x','t','e','n','s','i','o','n','_','E',0, -2,'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0, -'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0, -2,'H','a','t','r','a','n',0,'H','a','t','r','a','n',0,2,'M','u','l','t','a','n','i',0, -'M','u','l','t','a','n','i',0,2,'O','l','d','_','H','u','n','g','a','r','i','a','n',0,'O','l','d','_','H','u','n','g','a', -'r','i','a','n',0,2,'S','u','p','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h', -'s',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o', -'g','r','a','p','h','s',0,2,'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0, -'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0, -2,'A','d','l','a','m',0,'A','d','l','a','m',0,2,'B','h','a','i','k','s','u','k','i',0, -'B','h','a','i','k','s','u','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','C',0, -'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','C',0, -2,'G','l','a','g','o','l','i','t','i','c','_','S','u','p',0,'G','l','a','g','o','l','i','t','i','c','_','S','u','p','p','l', -'e','m','e','n','t',0,2,'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s',0, -'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s','_','A','n','d','_','P','u','n','c','t','u','a','t', -'i','o','n',0,2,'M','a','r','c','h','e','n',0,'M','a','r','c','h','e','n',0, -2,'M','o','n','g','o','l','i','a','n','_','S','u','p',0,'M','o','n','g','o','l','i','a','n','_','S','u','p','p','l','e','m', -'e','n','t',0,2,'N','e','w','a',0,'N','e','w','a',0,2,'O','s','a','g','e',0,'O','s','a','g','e',0, -2,'T','a','n','g','u','t',0,'T','a','n','g','u','t',0,2,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t', -'s',0,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t','s',0, -2,'C','J','K','_','E','x','t','_','F',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', -'s','_','E','x','t','e','n','s','i','o','n','_','F',0,2,'K','a','n','a','_','E','x','t','_','A',0, -'K','a','n','a','_','E','x','t','e','n','d','e','d','_','A',0, -2,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0, -2,'N','u','s','h','u',0,'N','u','s','h','u',0,2,'S','o','y','o','m','b','o',0,'S','o','y','o','m','b','o',0, -2,'S','y','r','i','a','c','_','S','u','p',0,'S','y','r','i','a','c','_','S','u','p','p','l','e','m','e','n','t',0, -2,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a','r','e',0,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a', -'r','e',0,2,'C','h','e','s','s','_','S','y','m','b','o','l','s',0,'C','h','e','s','s','_','S','y','m','b','o','l','s',0, -2,'D','o','g','r','a',0,'D','o','g','r','a',0,2,'G','e','o','r','g','i','a','n','_','E','x','t',0, -'G','e','o','r','g','i','a','n','_','E','x','t','e','n','d','e','d',0, -2,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0, -2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a', -0,2,'I','n','d','i','c','_','S','i','y','a','q','_','N','u','m','b','e','r','s',0,'I','n','d','i','c','_','S','i','y','a', -'q','_','N','u','m','b','e','r','s',0,2,'M','a','k','a','s','a','r',0,'M','a','k','a','s','a','r',0, -2,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0, -2,'M','e','d','e','f','a','i','d','r','i','n',0,'M','e','d','e','f','a','i','d','r','i','n',0, -2,'O','l','d','_','S','o','g','d','i','a','n',0,'O','l','d','_','S','o','g','d','i','a','n',0, -2,'S','o','g','d','i','a','n',0,'S','o','g','d','i','a','n',0, -2,'c','c','c',0,'C','a','n','o','n','i','c','a','l','_','C','o','m','b','i','n','i','n','g','_','C','l','a','s','s',0, -2,'d','t',0,'D','e','c','o','m','p','o','s','i','t','i','o','n','_','T','y','p','e',0, -3,'N','o','n','e',0,'N','o','n','e',0,'n','o','n','e',0, -3,'C','a','n',0,'C','a','n','o','n','i','c','a','l',0,'c','a','n',0, -3,'C','o','m',0,'C','o','m','p','a','t',0,'c','o','m',0, -3,'E','n','c',0,'C','i','r','c','l','e',0,'e','n','c',0, -3,'F','i','n',0,'F','i','n','a','l',0,'f','i','n',0,3,'F','o','n','t',0,'F','o','n','t',0, -'f','o','n','t',0,3,'F','r','a',0,'F','r','a','c','t','i','o','n',0,'f','r','a',0, -3,'I','n','i','t',0,'I','n','i','t','i','a','l',0,'i','n','i','t',0, -3,'I','s','o',0,'I','s','o','l','a','t','e','d',0,'i','s','o',0, -3,'M','e','d',0,'M','e','d','i','a','l',0,'m','e','d',0, -3,'N','a','r',0,'N','a','r','r','o','w',0,'n','a','r',0, -3,'N','b',0,'N','o','b','r','e','a','k',0,'n','b',0,3,'S','m','l',0,'S','m','a','l','l',0, -'s','m','l',0,3,'S','q','r',0,'S','q','u','a','r','e',0,'s','q','r',0, -3,'S','u','b',0,'S','u','b',0,'s','u','b',0,3,'S','u','p',0,'S','u','p','e','r',0, -'s','u','p',0,3,'V','e','r','t',0,'V','e','r','t','i','c','a','l',0,'v','e','r','t',0, -3,'W','i','d','e',0,'W','i','d','e',0,'w','i','d','e',0, -2,'e','a',0,'E','a','s','t','_','A','s','i','a','n','_','W','i','d','t','h',0, -2,'N',0,'N','e','u','t','r','a','l',0,2,'A',0,'A','m','b','i','g','u','o','u','s',0, -2,'H',0,'H','a','l','f','w','i','d','t','h',0,2,'F',0,'F','u','l','l','w','i','d','t','h',0, -2,'N','a',0,'N','a','r','r','o','w',0,2,'W',0,'W','i','d','e',0, -2,'g','c',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y',0, -2,'C','n',0,'U','n','a','s','s','i','g','n','e','d',0,2,'L','u',0,'U','p','p','e','r','c','a','s','e','_','L','e','t', -'t','e','r',0,2,'L','l',0,'L','o','w','e','r','c','a','s','e','_','L','e','t','t','e','r',0, -2,'L','t',0,'T','i','t','l','e','c','a','s','e','_','L','e','t','t','e','r',0, -2,'L','m',0,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r',0, -2,'L','o',0,'O','t','h','e','r','_','L','e','t','t','e','r',0, -2,'M','n',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0, -2,'M','e',0,'E','n','c','l','o','s','i','n','g','_','M','a','r','k',0, -2,'M','c',0,'S','p','a','c','i','n','g','_','M','a','r','k',0, -3,'N','d',0,'D','e','c','i','m','a','l','_','N','u','m','b','e','r',0,'d','i','g','i','t',0, -2,'N','l',0,'L','e','t','t','e','r','_','N','u','m','b','e','r',0, -2,'N','o',0,'O','t','h','e','r','_','N','u','m','b','e','r',0, -2,'Z','s',0,'S','p','a','c','e','_','S','e','p','a','r','a','t','o','r',0, -2,'Z','l',0,'L','i','n','e','_','S','e','p','a','r','a','t','o','r',0, -2,'Z','p',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0, -3,'C','c',0,'C','o','n','t','r','o','l',0,'c','n','t','r','l',0, -2,'C','f',0,'F','o','r','m','a','t',0,2,'C','o',0,'P','r','i','v','a','t','e','_','U','s','e',0, -2,'C','s',0,'S','u','r','r','o','g','a','t','e',0,2,'P','d',0,'D','a','s','h','_','P','u','n','c','t','u','a','t','i', -'o','n',0,2,'P','s',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'P','e',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'P','c',0,'C','o','n','n','e','c','t','o','r','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'P','o',0,'O','t','h','e','r','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'S','m',0,'M','a','t','h','_','S','y','m','b','o','l',0, -2,'S','c',0,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l',0, -2,'S','k',0,'M','o','d','i','f','i','e','r','_','S','y','m','b','o','l',0, -2,'S','o',0,'O','t','h','e','r','_','S','y','m','b','o','l',0, -2,'P','i',0,'I','n','i','t','i','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'P','f',0,'F','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'j','g',0,'J','o','i','n','i','n','g','_','G','r','o','u','p',0, -2,'N','o','_','J','o','i','n','i','n','g','_','G','r','o','u','p',0,'N','o','_','J','o','i','n','i','n','g','_','G','r','o', -'u','p',0,2,'A','i','n',0,'A','i','n',0,2,'A','l','a','p','h',0,'A','l','a','p','h',0, -2,'A','l','e','f',0,'A','l','e','f',0,2,'B','e','h',0,'B','e','h',0, -2,'B','e','t','h',0,'B','e','t','h',0,2,'D','a','l',0,'D','a','l',0, -2,'D','a','l','a','t','h','_','R','i','s','h',0,'D','a','l','a','t','h','_','R','i','s','h',0, -2,'E',0,'E',0,2,'F','e','h',0,'F','e','h',0,2,'F','i','n','a','l','_','S','e','m','k','a','t','h',0, -'F','i','n','a','l','_','S','e','m','k','a','t','h',0,2,'G','a','f',0,'G','a','f',0, -2,'G','a','m','a','l',0,'G','a','m','a','l',0,2,'H','a','h',0,'H','a','h',0, -2,'T','e','h','_','M','a','r','b','u','t','a','_','G','o','a','l',0,'H','a','m','z','a','_','O','n','_','H','e','h','_','G', -'o','a','l',0,2,'H','e',0,'H','e',0,2,'H','e','h',0,'H','e','h',0, -2,'H','e','h','_','G','o','a','l',0,'H','e','h','_','G','o','a','l',0, -2,'H','e','t','h',0,'H','e','t','h',0,2,'K','a','f',0,'K','a','f',0, -2,'K','a','p','h',0,'K','a','p','h',0,2,'K','n','o','t','t','e','d','_','H','e','h',0, -'K','n','o','t','t','e','d','_','H','e','h',0,2,'L','a','m',0,'L','a','m',0, -2,'L','a','m','a','d','h',0,'L','a','m','a','d','h',0,2,'M','e','e','m',0,'M','e','e','m',0, -2,'M','i','m',0,'M','i','m',0,2,'N','o','o','n',0,'N','o','o','n',0, -2,'N','u','n',0,'N','u','n',0,2,'P','e',0,'P','e',0, -2,'Q','a','f',0,'Q','a','f',0,2,'Q','a','p','h',0,'Q','a','p','h',0, -2,'R','e','h',0,'R','e','h',0,2,'R','e','v','e','r','s','e','d','_','P','e',0,'R','e','v','e','r','s','e','d','_','P', -'e',0,2,'S','a','d',0,'S','a','d',0,2,'S','a','d','h','e',0,'S','a','d','h','e',0, -2,'S','e','e','n',0,'S','e','e','n',0,2,'S','e','m','k','a','t','h',0,'S','e','m','k','a','t','h',0, -2,'S','h','i','n',0,'S','h','i','n',0,2,'S','w','a','s','h','_','K','a','f',0,'S','w','a','s','h','_','K','a','f',0, -2,'S','y','r','i','a','c','_','W','a','w',0,'S','y','r','i','a','c','_','W','a','w',0, -2,'T','a','h',0,'T','a','h',0,2,'T','a','w',0,'T','a','w',0, -2,'T','e','h','_','M','a','r','b','u','t','a',0,'T','e','h','_','M','a','r','b','u','t','a',0, -2,'T','e','t','h',0,'T','e','t','h',0,2,'W','a','w',0,'W','a','w',0, -2,'Y','e','h',0,'Y','e','h',0,2,'Y','e','h','_','B','a','r','r','e','e',0,'Y','e','h','_','B','a','r','r','e','e',0, -2,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0, -2,'Y','u','d','h',0,'Y','u','d','h',0,2,'Y','u','d','h','_','H','e',0,'Y','u','d','h','_','H','e',0, -2,'Z','a','i','n',0,'Z','a','i','n',0,2,'F','e',0,'F','e',0, -2,'K','h','a','p','h',0,'K','h','a','p','h',0,2,'Z','h','a','i','n',0,'Z','h','a','i','n',0, -2,'B','u','r','u','s','h','a','s','k','i','_','Y','e','h','_','B','a','r','r','e','e',0,'B','u','r','u','s','h','a','s','k', -'i','_','Y','e','h','_','B','a','r','r','e','e',0,2,'F','a','r','s','i','_','Y','e','h',0, -'F','a','r','s','i','_','Y','e','h',0,2,'N','y','a',0,'N','y','a',0, -2,'R','o','h','i','n','g','y','a','_','Y','e','h',0,'R','o','h','i','n','g','y','a','_','Y','e','h',0, -2,'M','a','n','i','c','h','a','e','a','n','_','A','l','e','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','A','l','e', -'p','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','A','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','A', -'y','i','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','B','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_', -'B','e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0, -'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0, -2,'M','a','n','i','c','h','a','e','a','n','_','D','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','D', -'h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0, -'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0, -2,'M','a','n','i','c','h','a','e','a','n','_','G','i','m','e','l',0,'M','a','n','i','c','h','a','e','a','n','_','G','i','m', -'e','l',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','H', -'e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0, -'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0, -2,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h', -0,2,'M','a','n','i','c','h','a','e','a','n','_','L','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','L', -'a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','M','e','m',0,'M','a','n','i','c','h','a','e','a','n', -'_','M','e','m',0,2,'M','a','n','i','c','h','a','e','a','n','_','N','u','n',0,'M','a','n','i','c','h','a','e','a','n','_', -'N','u','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','O','n','e',0,'M','a','n','i','c','h','a','e','a','n','_','O', -'n','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','P','e',0,'M','a','n','i','c','h','a','e','a','n','_','P','e',0, -2,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h', -0,2,'M','a','n','i','c','h','a','e','a','n','_','R','e','s','h',0,'M','a','n','i','c','h','a','e','a','n','_','R','e','s', -'h',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','d','h','e',0,'M','a','n','i','c','h','a','e','a','n','_','S', -'a','d','h','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0, -'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0, -2,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0, -2,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0, -2,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h', -0,2,'M','a','n','i','c','h','a','e','a','n','_','T','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_', -'T','h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0, -'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0, -2,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0, -2,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h', -0,2,'M','a','n','i','c','h','a','e','a','n','_','Z','a','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','Z','a', -'y','i','n',0,2,'S','t','r','a','i','g','h','t','_','W','a','w',0,'S','t','r','a','i','g','h','t','_','W','a','w',0, -2,'A','f','r','i','c','a','n','_','F','e','h',0,'A','f','r','i','c','a','n','_','F','e','h',0, -2,'A','f','r','i','c','a','n','_','N','o','o','n',0,'A','f','r','i','c','a','n','_','N','o','o','n',0, -2,'A','f','r','i','c','a','n','_','Q','a','f',0,'A','f','r','i','c','a','n','_','Q','a','f',0, -2,'M','a','l','a','y','a','l','a','m','_','B','h','a',0,'M','a','l','a','y','a','l','a','m','_','B','h','a',0, -2,'M','a','l','a','y','a','l','a','m','_','J','a',0,'M','a','l','a','y','a','l','a','m','_','J','a',0, -2,'M','a','l','a','y','a','l','a','m','_','L','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','a',0, -2,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0, -2,'M','a','l','a','y','a','l','a','m','_','N','g','a',0,'M','a','l','a','y','a','l','a','m','_','N','g','a',0, -2,'M','a','l','a','y','a','l','a','m','_','N','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','a',0, -2,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0, -2,'M','a','l','a','y','a','l','a','m','_','N','y','a',0,'M','a','l','a','y','a','l','a','m','_','N','y','a',0, -2,'M','a','l','a','y','a','l','a','m','_','R','a',0,'M','a','l','a','y','a','l','a','m','_','R','a',0, -2,'M','a','l','a','y','a','l','a','m','_','S','s','a',0,'M','a','l','a','y','a','l','a','m','_','S','s','a',0, -2,'M','a','l','a','y','a','l','a','m','_','T','t','a',0,'M','a','l','a','y','a','l','a','m','_','T','t','a',0, -2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0, -'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0, -2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','P','a',0,'H','a','n','i','f','i','_','R','o','h','i','n', -'g','y','a','_','P','a',0,2,'j','t',0,'J','o','i','n','i','n','g','_','T','y','p','e',0, -2,'U',0,'N','o','n','_','J','o','i','n','i','n','g',0,2,'C',0,'J','o','i','n','_','C','a','u','s','i','n','g',0, -2,'D',0,'D','u','a','l','_','J','o','i','n','i','n','g',0, -2,'L',0,'L','e','f','t','_','J','o','i','n','i','n','g',0, -2,'R',0,'R','i','g','h','t','_','J','o','i','n','i','n','g',0, -2,'T',0,'T','r','a','n','s','p','a','r','e','n','t',0,2,'l','b',0,'L','i','n','e','_','B','r','e','a','k',0, -2,'X','X',0,'U','n','k','n','o','w','n',0,2,'A','I',0,'A','m','b','i','g','u','o','u','s',0, -2,'A','L',0,'A','l','p','h','a','b','e','t','i','c',0,2,'B','2',0,'B','r','e','a','k','_','B','o','t','h',0, -2,'B','A',0,'B','r','e','a','k','_','A','f','t','e','r',0, -2,'B','B',0,'B','r','e','a','k','_','B','e','f','o','r','e',0, -2,'B','K',0,'M','a','n','d','a','t','o','r','y','_','B','r','e','a','k',0, -2,'C','B',0,'C','o','n','t','i','n','g','e','n','t','_','B','r','e','a','k',0, -2,'C','L',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0, -2,'C','M',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0, -2,'C','R',0,'C','a','r','r','i','a','g','e','_','R','e','t','u','r','n',0, -2,'E','X',0,'E','x','c','l','a','m','a','t','i','o','n',0, -2,'G','L',0,'G','l','u','e',0,2,'H','Y',0,'H','y','p','h','e','n',0, -2,'I','D',0,'I','d','e','o','g','r','a','p','h','i','c',0, -3,'I','N',0,'I','n','s','e','p','a','r','a','b','l','e',0,'I','n','s','e','p','e','r','a','b','l','e',0, -2,'I','S',0,'I','n','f','i','x','_','N','u','m','e','r','i','c',0, -2,'L','F',0,'L','i','n','e','_','F','e','e','d',0,2,'N','S',0,'N','o','n','s','t','a','r','t','e','r',0, -2,'N','U',0,'N','u','m','e','r','i','c',0,2,'O','P',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n', -0,2,'P','O',0,'P','o','s','t','f','i','x','_','N','u','m','e','r','i','c',0, -2,'P','R',0,'P','r','e','f','i','x','_','N','u','m','e','r','i','c',0, -2,'Q','U',0,'Q','u','o','t','a','t','i','o','n',0,2,'S','A',0,'C','o','m','p','l','e','x','_','C','o','n','t','e','x', -'t',0,2,'S','G',0,'S','u','r','r','o','g','a','t','e',0, -2,'S','P',0,'S','p','a','c','e',0,2,'S','Y',0,'B','r','e','a','k','_','S','y','m','b','o','l','s',0, -2,'Z','W',0,'Z','W','S','p','a','c','e',0,2,'N','L',0,'N','e','x','t','_','L','i','n','e',0, -2,'W','J',0,'W','o','r','d','_','J','o','i','n','e','r',0, -2,'H','2',0,'H','2',0,2,'H','3',0,'H','3',0,2,'J','L',0,'J','L',0, -2,'J','T',0,'J','T',0,2,'J','V',0,'J','V',0,2,'C','P',0,'C','l','o','s','e','_','P','a','r','e','n','t','h','e', -'s','i','s',0,2,'C','J',0,'C','o','n','d','i','t','i','o','n','a','l','_','J','a','p','a','n','e','s','e','_','S','t','a', -'r','t','e','r',0,2,'H','L',0,'H','e','b','r','e','w','_','L','e','t','t','e','r',0, -2,'E','B',0,'E','_','B','a','s','e',0,2,'E','M',0,'E','_','M','o','d','i','f','i','e','r',0, -2,'Z','W','J',0,'Z','W','J',0,2,'n','t',0,'N','u','m','e','r','i','c','_','T','y','p','e',0, -2,'N','o','n','e',0,'N','o','n','e',0,2,'D','e',0,'D','e','c','i','m','a','l',0, -2,'D','i',0,'D','i','g','i','t',0,2,'N','u',0,'N','u','m','e','r','i','c',0, -2,'s','c',0,'S','c','r','i','p','t',0,2,'Z','y','y','y',0,'C','o','m','m','o','n',0, -3,'Z','i','n','h',0,'I','n','h','e','r','i','t','e','d',0,'Q','a','a','i',0, -2,'A','r','a','b',0,'A','r','a','b','i','c',0,2,'A','r','m','n',0,'A','r','m','e','n','i','a','n',0, -2,'B','e','n','g',0,'B','e','n','g','a','l','i',0,2,'B','o','p','o',0,'B','o','p','o','m','o','f','o',0, -2,'C','h','e','r',0,'C','h','e','r','o','k','e','e',0,3,'C','o','p','t',0,'C','o','p','t','i','c',0, -'Q','a','a','c',0,2,'C','y','r','l',0,'C','y','r','i','l','l','i','c',0, -2,'D','s','r','t',0,'D','e','s','e','r','e','t',0,2,'D','e','v','a',0,'D','e','v','a','n','a','g','a','r','i',0, -2,'E','t','h','i',0,'E','t','h','i','o','p','i','c',0,2,'G','e','o','r',0,'G','e','o','r','g','i','a','n',0, -2,'G','o','t','h',0,'G','o','t','h','i','c',0,2,'G','r','e','k',0,'G','r','e','e','k',0, -2,'G','u','j','r',0,'G','u','j','a','r','a','t','i',0,2,'G','u','r','u',0,'G','u','r','m','u','k','h','i',0, -2,'H','a','n','i',0,'H','a','n',0,2,'H','a','n','g',0,'H','a','n','g','u','l',0, -2,'H','e','b','r',0,'H','e','b','r','e','w',0,2,'H','i','r','a',0,'H','i','r','a','g','a','n','a',0, -2,'K','n','d','a',0,'K','a','n','n','a','d','a',0,2,'K','a','n','a',0,'K','a','t','a','k','a','n','a',0, -2,'K','h','m','r',0,'K','h','m','e','r',0,2,'L','a','o','o',0,'L','a','o',0, -2,'L','a','t','n',0,'L','a','t','i','n',0,2,'M','l','y','m',0,'M','a','l','a','y','a','l','a','m',0, -2,'M','o','n','g',0,'M','o','n','g','o','l','i','a','n',0, -2,'M','y','m','r',0,'M','y','a','n','m','a','r',0,2,'O','g','a','m',0,'O','g','h','a','m',0, -2,'I','t','a','l',0,'O','l','d','_','I','t','a','l','i','c',0, -2,'O','r','y','a',0,'O','r','i','y','a',0,2,'R','u','n','r',0,'R','u','n','i','c',0, -2,'S','i','n','h',0,'S','i','n','h','a','l','a',0,2,'S','y','r','c',0,'S','y','r','i','a','c',0, -2,'T','a','m','l',0,'T','a','m','i','l',0,2,'T','e','l','u',0,'T','e','l','u','g','u',0, -2,'T','h','a','a',0,'T','h','a','a','n','a',0,2,'T','i','b','t',0,'T','i','b','e','t','a','n',0, -2,'C','a','n','s',0,'C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a','l',0, -2,'Y','i','i','i',0,'Y','i',0,2,'T','g','l','g',0,'T','a','g','a','l','o','g',0, -2,'H','a','n','o',0,'H','a','n','u','n','o','o',0,2,'B','u','h','d',0,'B','u','h','i','d',0, -2,'T','a','g','b',0,'T','a','g','b','a','n','w','a',0,2,'B','r','a','i',0,'B','r','a','i','l','l','e',0, -2,'C','p','r','t',0,'C','y','p','r','i','o','t',0,2,'L','i','m','b',0,'L','i','m','b','u',0, -2,'L','i','n','b',0,'L','i','n','e','a','r','_','B',0,2,'O','s','m','a',0,'O','s','m','a','n','y','a',0, -2,'S','h','a','w',0,'S','h','a','v','i','a','n',0,2,'T','a','l','e',0,'T','a','i','_','L','e',0, -2,'U','g','a','r',0,'U','g','a','r','i','t','i','c',0,2,'H','r','k','t',0,'K','a','t','a','k','a','n','a','_','O','r', -'_','H','i','r','a','g','a','n','a',0,2,'B','u','g','i',0,'B','u','g','i','n','e','s','e',0, -2,'G','l','a','g',0,'G','l','a','g','o','l','i','t','i','c',0, -2,'K','h','a','r',0,'K','h','a','r','o','s','h','t','h','i',0, -2,'S','y','l','o',0,'S','y','l','o','t','i','_','N','a','g','r','i',0, -2,'T','a','l','u',0,'N','e','w','_','T','a','i','_','L','u','e',0, -2,'T','f','n','g',0,'T','i','f','i','n','a','g','h',0,2,'X','p','e','o',0,'O','l','d','_','P','e','r','s','i','a','n', -0,2,'B','a','l','i',0,'B','a','l','i','n','e','s','e',0, -2,'B','a','t','k',0,'B','a','t','a','k',0,2,'B','l','i','s',0,'B','l','i','s',0, -2,'B','r','a','h',0,'B','r','a','h','m','i',0,2,'C','i','r','t',0,'C','i','r','t',0, -2,'C','y','r','s',0,'C','y','r','s',0,2,'E','g','y','d',0,'E','g','y','d',0, -2,'E','g','y','h',0,'E','g','y','h',0,2,'E','g','y','p',0,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g', -'l','y','p','h','s',0,2,'G','e','o','k',0,'G','e','o','k',0, -2,'H','a','n','s',0,'H','a','n','s',0,2,'H','a','n','t',0,'H','a','n','t',0, -2,'H','m','n','g',0,'P','a','h','a','w','h','_','H','m','o','n','g',0, -2,'H','u','n','g',0,'O','l','d','_','H','u','n','g','a','r','i','a','n',0, -2,'I','n','d','s',0,'I','n','d','s',0,2,'J','a','v','a',0,'J','a','v','a','n','e','s','e',0, -2,'K','a','l','i',0,'K','a','y','a','h','_','L','i',0,2,'L','a','t','f',0,'L','a','t','f',0, -2,'L','a','t','g',0,'L','a','t','g',0,2,'L','e','p','c',0,'L','e','p','c','h','a',0, -2,'L','i','n','a',0,'L','i','n','e','a','r','_','A',0,2,'M','a','n','d',0,'M','a','n','d','a','i','c',0, -2,'M','a','y','a',0,'M','a','y','a',0,2,'M','e','r','o',0,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g', -'l','y','p','h','s',0,2,'N','k','o','o',0,'N','k','o',0, -2,'O','r','k','h',0,'O','l','d','_','T','u','r','k','i','c',0, -2,'P','e','r','m',0,'O','l','d','_','P','e','r','m','i','c',0, -2,'P','h','a','g',0,'P','h','a','g','s','_','P','a',0,2,'P','h','n','x',0,'P','h','o','e','n','i','c','i','a','n',0, -2,'P','l','r','d',0,'M','i','a','o',0,2,'R','o','r','o',0,'R','o','r','o',0, -2,'S','a','r','a',0,'S','a','r','a',0,2,'S','y','r','e',0,'S','y','r','e',0, -2,'S','y','r','j',0,'S','y','r','j',0,2,'S','y','r','n',0,'S','y','r','n',0, -2,'T','e','n','g',0,'T','e','n','g',0,2,'V','a','i','i',0,'V','a','i',0, -2,'V','i','s','p',0,'V','i','s','p',0,2,'X','s','u','x',0,'C','u','n','e','i','f','o','r','m',0, -2,'Z','x','x','x',0,'Z','x','x','x',0,2,'Z','z','z','z',0,'U','n','k','n','o','w','n',0, -2,'C','a','r','i',0,'C','a','r','i','a','n',0,2,'J','p','a','n',0,'J','p','a','n',0, -2,'L','a','n','a',0,'T','a','i','_','T','h','a','m',0,2,'L','y','c','i',0,'L','y','c','i','a','n',0, -2,'L','y','d','i',0,'L','y','d','i','a','n',0,2,'O','l','c','k',0,'O','l','_','C','h','i','k','i',0, -2,'R','j','n','g',0,'R','e','j','a','n','g',0,2,'S','a','u','r',0,'S','a','u','r','a','s','h','t','r','a',0, -2,'S','g','n','w',0,'S','i','g','n','W','r','i','t','i','n','g',0, -2,'S','u','n','d',0,'S','u','n','d','a','n','e','s','e',0, -2,'M','o','o','n',0,'M','o','o','n',0,2,'M','t','e','i',0,'M','e','e','t','e','i','_','M','a','y','e','k',0, -2,'A','r','m','i',0,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0, -2,'A','v','s','t',0,'A','v','e','s','t','a','n',0,2,'C','a','k','m',0,'C','h','a','k','m','a',0, -2,'K','o','r','e',0,'K','o','r','e',0,2,'K','t','h','i',0,'K','a','i','t','h','i',0, -2,'M','a','n','i',0,'M','a','n','i','c','h','a','e','a','n',0, -2,'P','h','l','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0, -2,'P','h','l','p',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0, -2,'P','h','l','v',0,'P','h','l','v',0,2,'P','r','t','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P', -'a','r','t','h','i','a','n',0,2,'S','a','m','r',0,'S','a','m','a','r','i','t','a','n',0, -2,'T','a','v','t',0,'T','a','i','_','V','i','e','t',0,2,'Z','m','t','h',0,'Z','m','t','h',0, -2,'Z','s','y','m',0,'Z','s','y','m',0,2,'B','a','m','u',0,'B','a','m','u','m',0, -2,'N','k','g','b',0,'N','k','g','b',0,2,'S','a','r','b',0,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i', -'a','n',0,2,'B','a','s','s',0,'B','a','s','s','a','_','V','a','h',0, -2,'D','u','p','l',0,'D','u','p','l','o','y','a','n',0,2,'E','l','b','a',0,'E','l','b','a','s','a','n',0, -2,'G','r','a','n',0,'G','r','a','n','t','h','a',0,2,'K','p','e','l',0,'K','p','e','l',0, -2,'L','o','m','a',0,'L','o','m','a',0,2,'M','e','n','d',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0, -2,'M','e','r','c',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0, -2,'N','a','r','b',0,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0, -2,'N','b','a','t',0,'N','a','b','a','t','a','e','a','n',0, -2,'P','a','l','m',0,'P','a','l','m','y','r','e','n','e',0, -2,'S','i','n','d',0,'K','h','u','d','a','w','a','d','i',0, -2,'W','a','r','a',0,'W','a','r','a','n','g','_','C','i','t','i',0, -2,'A','f','a','k',0,'A','f','a','k',0,2,'J','u','r','c',0,'J','u','r','c',0, -2,'M','r','o','o',0,'M','r','o',0,2,'N','s','h','u',0,'N','u','s','h','u',0, -2,'S','h','r','d',0,'S','h','a','r','a','d','a',0,2,'S','o','r','a',0,'S','o','r','a','_','S','o','m','p','e','n','g', -0,2,'T','a','k','r',0,'T','a','k','r','i',0,2,'T','a','n','g',0,'T','a','n','g','u','t',0, -2,'W','o','l','e',0,'W','o','l','e',0,2,'H','l','u','w',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o', -'g','l','y','p','h','s',0,2,'K','h','o','j',0,'K','h','o','j','k','i',0, -2,'T','i','r','h',0,'T','i','r','h','u','t','a',0,2,'A','g','h','b',0,'C','a','u','c','a','s','i','a','n','_','A','l', -'b','a','n','i','a','n',0,2,'M','a','h','j',0,'M','a','h','a','j','a','n','i',0, -2,'H','a','t','r',0,'H','a','t','r','a','n',0,2,'M','u','l','t',0,'M','u','l','t','a','n','i',0, -2,'P','a','u','c',0,'P','a','u','_','C','i','n','_','H','a','u',0, -2,'S','i','d','d',0,'S','i','d','d','h','a','m',0,2,'A','d','l','m',0,'A','d','l','a','m',0, -2,'B','h','k','s',0,'B','h','a','i','k','s','u','k','i',0, -2,'M','a','r','c',0,'M','a','r','c','h','e','n',0,2,'O','s','g','e',0,'O','s','a','g','e',0, -2,'H','a','n','b',0,'H','a','n','b',0,2,'J','a','m','o',0,'J','a','m','o',0, -2,'Z','s','y','e',0,'Z','s','y','e',0,2,'G','o','n','m',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0, -2,'S','o','y','o',0,'S','o','y','o','m','b','o',0,2,'Z','a','n','b',0,'Z','a','n','a','b','a','z','a','r','_','S','q', -'u','a','r','e',0,2,'D','o','g','r',0,'D','o','g','r','a',0, -2,'G','o','n','g',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0, -2,'M','a','k','a',0,'M','a','k','a','s','a','r',0,2,'M','e','d','f',0,'M','e','d','e','f','a','i','d','r','i','n',0, -2,'R','o','h','g',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0, -2,'S','o','g','d',0,'S','o','g','d','i','a','n',0,2,'S','o','g','o',0,'O','l','d','_','S','o','g','d','i','a','n',0, -2,'h','s','t',0,'H','a','n','g','u','l','_','S','y','l','l','a','b','l','e','_','T','y','p','e',0, -2,'N','A',0,'N','o','t','_','A','p','p','l','i','c','a','b','l','e',0, -2,'L',0,'L','e','a','d','i','n','g','_','J','a','m','o',0, -2,'V',0,'V','o','w','e','l','_','J','a','m','o',0,2,'T',0,'T','r','a','i','l','i','n','g','_','J','a','m','o',0, -2,'L','V',0,'L','V','_','S','y','l','l','a','b','l','e',0, -2,'L','V','T',0,'L','V','T','_','S','y','l','l','a','b','l','e',0, -2,'N','F','D','_','Q','C',0,'N','F','D','_','Q','u','i','c','k','_','C','h','e','c','k',0, -2,'N',0,'N','o',0,2,'Y',0,'Y','e','s',0,2,'N','F','K','D','_','Q','C',0,'N','F','K','D','_','Q','u','i','c','k', -'_','C','h','e','c','k',0,2,'N','F','C','_','Q','C',0,'N','F','C','_','Q','u','i','c','k','_','C','h','e','c','k',0, -2,'M',0,'M','a','y','b','e',0,2,'N','F','K','C','_','Q','C',0,'N','F','K','C','_','Q','u','i','c','k','_','C','h','e', -'c','k',0,2,'l','c','c','c',0,'L','e','a','d','_','C','a','n','o','n','i','c','a','l','_','C','o','m','b','i','n','i','n', -'g','_','C','l','a','s','s',0,2,'t','c','c','c',0,'T','r','a','i','l','_','C','a','n','o','n','i','c','a','l','_','C','o', -'m','b','i','n','i','n','g','_','C','l','a','s','s',0,2,'G','C','B',0,'G','r','a','p','h','e','m','e','_','C','l','u','s', -'t','e','r','_','B','r','e','a','k',0,2,'X','X',0,'O','t','h','e','r',0, -2,'C','N',0,'C','o','n','t','r','o','l',0,2,'C','R',0,'C','R',0, -2,'E','X',0,'E','x','t','e','n','d',0,2,'L',0,'L',0, -2,'L','F',0,'L','F',0,2,'L','V',0,'L','V',0,2,'L','V','T',0,'L','V','T',0, -2,'T',0,'T',0,2,'V',0,'V',0,2,'S','M',0,'S','p','a','c','i','n','g','M','a','r','k',0, -2,'P','P',0,'P','r','e','p','e','n','d',0,2,'E','B','G',0,'E','_','B','a','s','e','_','G','A','Z',0, -2,'G','A','Z',0,'G','l','u','e','_','A','f','t','e','r','_','Z','w','j',0, -2,'S','B',0,'S','e','n','t','e','n','c','e','_','B','r','e','a','k',0, -2,'A','T',0,'A','T','e','r','m',0,2,'C','L',0,'C','l','o','s','e',0, -2,'F','O',0,'F','o','r','m','a','t',0,2,'L','O',0,'L','o','w','e','r',0, -2,'L','E',0,'O','L','e','t','t','e','r',0,2,'S','E',0,'S','e','p',0, -2,'S','P',0,'S','p',0,2,'S','T',0,'S','T','e','r','m',0, -2,'U','P',0,'U','p','p','e','r',0,2,'S','C',0,'S','C','o','n','t','i','n','u','e',0, -2,'W','B',0,'W','o','r','d','_','B','r','e','a','k',0,2,'L','E',0,'A','L','e','t','t','e','r',0, -2,'K','A',0,'K','a','t','a','k','a','n','a',0,2,'M','L',0,'M','i','d','L','e','t','t','e','r',0, -2,'M','N',0,'M','i','d','N','u','m',0,2,'E','X',0,'E','x','t','e','n','d','N','u','m','L','e','t',0, -2,'E','x','t','e','n','d',0,'E','x','t','e','n','d',0,2,'M','B',0,'M','i','d','N','u','m','L','e','t',0, -2,'N','L',0,'N','e','w','l','i','n','e',0,2,'S','Q',0,'S','i','n','g','l','e','_','Q','u','o','t','e',0, -2,'D','Q',0,'D','o','u','b','l','e','_','Q','u','o','t','e',0, -2,'W','S','e','g','S','p','a','c','e',0,'W','S','e','g','S','p','a','c','e',0, -2,'b','p','t',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t','_','T','y','p','e',0, -2,'n',0,'N','o','n','e',0,2,'o',0,'O','p','e','n',0, -2,'c',0,'C','l','o','s','e',0,2,'I','n','P','C',0,'I','n','d','i','c','_','P','o','s','i','t','i','o','n','a','l','_', -'C','a','t','e','g','o','r','y',0,2,'N','A',0,'N','A',0, -2,'B','o','t','t','o','m',0,'B','o','t','t','o','m',0,2,'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0, -'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0, -2,'B','o','t','t','o','m','_','A','n','d','_','R','i','g','h','t',0,'B','o','t','t','o','m','_','A','n','d','_','R','i','g', -'h','t',0,2,'L','e','f','t',0,'L','e','f','t',0,2,'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0, -'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,2,'O','v','e','r','s','t','r','u','c','k',0, -'O','v','e','r','s','t','r','u','c','k',0,2,'R','i','g','h','t',0,'R','i','g','h','t',0, -2,'T','o','p',0,'T','o','p',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0, -'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A', -'n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','R','i','g','h', -'t',0,2,'T','o','p','_','A','n','d','_','L','e','f','t',0,'T','o','p','_','A','n','d','_','L','e','f','t',0, -2,'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0, -'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0, -2,'T','o','p','_','A','n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','R','i','g','h','t',0, -2,'V','i','s','u','a','l','_','O','r','d','e','r','_','L','e','f','t',0,'V','i','s','u','a','l','_','O','r','d','e','r','_', -'L','e','f','t',0,2,'I','n','S','C',0,'I','n','d','i','c','_','S','y','l','l','a','b','i','c','_','C','a','t','e','g','o', -'r','y',0,2,'O','t','h','e','r',0,'O','t','h','e','r',0, -2,'A','v','a','g','r','a','h','a',0,'A','v','a','g','r','a','h','a',0, -2,'B','i','n','d','u',0,'B','i','n','d','u',0,2,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m', -'b','e','r',0,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m','b','e','r',0, -2,'C','a','n','t','i','l','l','a','t','i','o','n','_','M','a','r','k',0,'C','a','n','t','i','l','l','a','t','i','o','n','_', -'M','a','r','k',0,2,'C','o','n','s','o','n','a','n','t',0,'C','o','n','s','o','n','a','n','t',0, -2,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0, -2,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l',0,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l', -0,2,'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0, -'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0, -2,'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0, -'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0, -2,'C','o','n','s','o','n','a','n','t','_','K','i','l','l','e','r',0,'C','o','n','s','o','n','a','n','t','_','K','i','l','l', -'e','r',0,2,'C','o','n','s','o','n','a','n','t','_','M','e','d','i','a','l',0,'C','o','n','s','o','n','a','n','t','_','M', -'e','d','i','a','l',0,2,'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0, -'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0, -2,'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0, -'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0, -2,'C','o','n','s','o','n','a','n','t','_','P','r','e','f','i','x','e','d',0,'C','o','n','s','o','n','a','n','t','_','P','r', -'e','f','i','x','e','d',0,2,'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0, -'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0, -2,'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0, -'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0, -2,'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0, -'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0, -2,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k',0,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k', -0,2,'I','n','v','i','s','i','b','l','e','_','S','t','a','c','k','e','r',0,'I','n','v','i','s','i','b','l','e','_','S','t', -'a','c','k','e','r',0,2,'J','o','i','n','e','r',0,'J','o','i','n','e','r',0, -2,'M','o','d','i','f','y','i','n','g','_','L','e','t','t','e','r',0,'M','o','d','i','f','y','i','n','g','_','L','e','t','t', -'e','r',0,2,'N','o','n','_','J','o','i','n','e','r',0,'N','o','n','_','J','o','i','n','e','r',0, -2,'N','u','k','t','a',0,'N','u','k','t','a',0,2,'N','u','m','b','e','r',0,'N','u','m','b','e','r',0, -2,'N','u','m','b','e','r','_','J','o','i','n','e','r',0,'N','u','m','b','e','r','_','J','o','i','n','e','r',0, -2,'P','u','r','e','_','K','i','l','l','e','r',0,'P','u','r','e','_','K','i','l','l','e','r',0, -2,'R','e','g','i','s','t','e','r','_','S','h','i','f','t','e','r',0,'R','e','g','i','s','t','e','r','_','S','h','i','f','t', -'e','r',0,2,'S','y','l','l','a','b','l','e','_','M','o','d','i','f','i','e','r',0,'S','y','l','l','a','b','l','e','_','M', -'o','d','i','f','i','e','r',0,2,'T','o','n','e','_','L','e','t','t','e','r',0,'T','o','n','e','_','L','e','t','t','e','r', -0,2,'T','o','n','e','_','M','a','r','k',0,'T','o','n','e','_','M','a','r','k',0, -2,'V','i','r','a','m','a',0,'V','i','r','a','m','a',0,2,'V','i','s','a','r','g','a',0, -'V','i','s','a','r','g','a',0,2,'V','o','w','e','l',0,'V','o','w','e','l',0, -2,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t', -0,2,'V','o','w','e','l','_','I','n','d','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','I','n','d','e','p','e', -'n','d','e','n','t',0,2,'v','o',0,'V','e','r','t','i','c','a','l','_','O','r','i','e','n','t','a','t','i','o','n',0, -2,'R',0,'R','o','t','a','t','e','d',0,2,'T','r',0,'T','r','a','n','s','f','o','r','m','e','d','_','R','o','t','a','t', -'e','d',0,2,'T','u',0,'T','r','a','n','s','f','o','r','m','e','d','_','U','p','r','i','g','h','t',0, -2,'U',0,'U','p','r','i','g','h','t',0,2,'g','c','m',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y', -'_','M','a','s','k',0,2,'C',0,'O','t','h','e','r',0,2,'L',0,'L','e','t','t','e','r',0, -2,'L','C',0,'C','a','s','e','d','_','L','e','t','t','e','r',0, -3,'M',0,'M','a','r','k',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0, -2,'N',0,'N','u','m','b','e','r',0,3,'P',0,'P','u','n','c','t','u','a','t','i','o','n',0, -'p','u','n','c','t',0,2,'S',0,'S','y','m','b','o','l',0, -2,'Z',0,'S','e','p','a','r','a','t','o','r',0,2,'n','v',0,'N','u','m','e','r','i','c','_','V','a','l','u','e',0, -2,'a','g','e',0,'A','g','e',0,2,'b','m','g',0,'B','i','d','i','_','M','i','r','r','o','r','i','n','g','_','G','l','y', -'p','h',0,2,'c','f',0,'C','a','s','e','_','F','o','l','d','i','n','g',0, -2,'i','s','c',0,'I','S','O','_','C','o','m','m','e','n','t',0, -2,'l','c',0,'L','o','w','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0, -2,'n','a',0,'N','a','m','e',0,3,'s','c','f',0,'S','i','m','p','l','e','_','C','a','s','e','_','F','o','l','d','i','n', -'g',0,'s','f','c',0,2,'s','l','c',0,'S','i','m','p','l','e','_','L','o','w','e','r','c','a','s','e','_','M','a','p','p', -'i','n','g',0,2,'s','t','c',0,'S','i','m','p','l','e','_','T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n', -'g',0,2,'s','u','c',0,'S','i','m','p','l','e','_','U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0, -2,'t','c',0,'T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n','g',0, -2,'n','a','1',0,'U','n','i','c','o','d','e','_','1','_','N','a','m','e',0, -2,'u','c',0,'U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0, -2,'b','p','b',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t',0, -2,'s','c','x',0,'S','c','r','i','p','t','_','E','x','t','e','n','s','i','o','n','s',0 -}; - -U_NAMESPACE_END - -#endif // INCLUDED_FROM_PROPNAME_CPP diff --git a/deps/node/deps/icu-small/source/common/propsvec.cpp b/deps/node/deps/icu-small/source/common/propsvec.cpp deleted file mode 100644 index 056fcda9..00000000 --- a/deps/node/deps/icu-small/source/common/propsvec.cpp +++ /dev/null @@ -1,529 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: propsvec.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb22 -* created by: Markus W. Scherer -* -* Store bits (Unicode character properties) in bit set vectors. -*/ - -#include -#include "unicode/utypes.h" -#include "cmemory.h" -#include "utrie.h" -#include "utrie2.h" -#include "uarrsort.h" -#include "propsvec.h" -#include "uassert.h" - -struct UPropsVectors { - uint32_t *v; - int32_t columns; /* number of columns, plus two for start & limit values */ - int32_t maxRows; - int32_t rows; - int32_t prevRow; /* search optimization: remember last row seen */ - UBool isCompacted; -}; - -#define UPVEC_INITIAL_ROWS (1<<12) -#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) -#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) - -U_CAPI UPropsVectors * U_EXPORT2 -upvec_open(int32_t columns, UErrorCode *pErrorCode) { - UPropsVectors *pv; - uint32_t *v, *row; - uint32_t cp; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(columns<1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - columns+=2; /* count range start and limit columns */ - - pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); - v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); - if(pv==NULL || v==NULL) { - uprv_free(pv); - uprv_free(v); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(pv, 0, sizeof(UPropsVectors)); - pv->v=v; - pv->columns=columns; - pv->maxRows=UPVEC_INITIAL_ROWS; - pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); - - /* set the all-Unicode row and the special-value rows */ - row=pv->v; - uprv_memset(row, 0, pv->rows*columns*4); - row[0]=0; - row[1]=0x110000; - row+=columns; - for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { - row[0]=cp; - row[1]=cp+1; - row+=columns; - } - return pv; -} - -U_CAPI void U_EXPORT2 -upvec_close(UPropsVectors *pv) { - if(pv!=NULL) { - uprv_free(pv->v); - uprv_free(pv); - } -} - -static uint32_t * -_findRow(UPropsVectors *pv, UChar32 rangeStart) { - uint32_t *row; - int32_t columns, i, start, limit, prevRow; - - columns=pv->columns; - limit=pv->rows; - prevRow=pv->prevRow; - - /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ - row=pv->v+prevRow*columns; - if(rangeStart>=(UChar32)row[0]) { - if(rangeStart<(UChar32)row[1]) { - /* same row as last seen */ - return row; - } else if(rangeStart<(UChar32)(row+=columns)[1]) { - /* next row after the last one */ - pv->prevRow=prevRow+1; - return row; - } else if(rangeStart<(UChar32)(row+=columns)[1]) { - /* second row after the last one */ - pv->prevRow=prevRow+2; - return row; - } else if((rangeStart-(UChar32)row[1])<10) { - /* we are close, continue looping */ - prevRow+=2; - do { - ++prevRow; - row+=columns; - } while(rangeStart>=(UChar32)row[1]); - pv->prevRow=prevRow; - return row; - } - } else if(rangeStart<(UChar32)pv->v[1]) { - /* the very first row */ - pv->prevRow=0; - return pv->v; - } - - /* do a binary search for the start of the range */ - start=0; - while(startv+i*columns; - if(rangeStart<(UChar32)row[0]) { - limit=i; - } else if(rangeStart<(UChar32)row[1]) { - pv->prevRow=i; - return row; - } else { - start=i; - } - } - - /* must be found because all ranges together always cover all of Unicode */ - pv->prevRow=start; - return pv->v+start*columns; -} - -U_CAPI void U_EXPORT2 -upvec_setValue(UPropsVectors *pv, - UChar32 start, UChar32 end, - int32_t column, - uint32_t value, uint32_t mask, - UErrorCode *pErrorCode) { - uint32_t *firstRow, *lastRow; - int32_t columns; - UChar32 limit; - UBool splitFirstRow, splitLastRow; - - /* argument checking */ - if(U_FAILURE(*pErrorCode)) { - return; - } - if( pv==NULL || - start<0 || start>end || end>UPVEC_MAX_CP || - column<0 || column>=(pv->columns-2) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(pv->isCompacted) { - *pErrorCode=U_NO_WRITE_PERMISSION; - return; - } - limit=end+1; - - /* initialize */ - columns=pv->columns; - column+=2; /* skip range start and limit columns */ - value&=mask; - - /* find the rows whose ranges overlap with the input range */ - - /* find the first and last rows, always successful */ - firstRow=_findRow(pv, start); - lastRow=_findRow(pv, end); - - /* - * Rows need to be split if they partially overlap with the - * input range (only possible for the first and last rows) - * and if their value differs from the input value. - */ - splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); - splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); - - /* split first/last rows if necessary */ - if(splitFirstRow || splitLastRow) { - int32_t count, rows; - - rows=pv->rows; - if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { - uint32_t *newVectors; - int32_t newMaxRows; - - if(pv->maxRowsmaxRowsv, (size_t)rows*columns*4); - firstRow=newVectors+(firstRow-pv->v); - lastRow=newVectors+(lastRow-pv->v); - uprv_free(pv->v); - pv->v=newVectors; - pv->maxRows=newMaxRows; - } - - /* count the number of row cells to move after the last row, and move them */ - count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); - if(count>0) { - uprv_memmove( - lastRow+(1+splitFirstRow+splitLastRow)*columns, - lastRow+columns, - count*4); - } - pv->rows=rows+splitFirstRow+splitLastRow; - - /* split the first row, and move the firstRow pointer to the second part */ - if(splitFirstRow) { - /* copy all affected rows up one and move the lastRow pointer */ - count = (int32_t)((lastRow-firstRow)+columns); - uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); - lastRow+=columns; - - /* split the range and move the firstRow pointer */ - firstRow[1]=firstRow[columns]=(uint32_t)start; - firstRow+=columns; - } - - /* split the last row */ - if(splitLastRow) { - /* copy the last row data */ - uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); - - /* split the range and move the firstRow pointer */ - lastRow[1]=lastRow[columns]=(uint32_t)limit; - } - } - - /* set the "row last seen" to the last row for the range */ - pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); - - /* set the input value in all remaining rows */ - firstRow+=column; - lastRow+=column; - mask=~mask; - for(;;) { - *firstRow=(*firstRow&mask)|value; - if(firstRow==lastRow) { - break; - } - firstRow+=columns; - } -} - -U_CAPI uint32_t U_EXPORT2 -upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { - uint32_t *row; - UPropsVectors *ncpv; - - if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { - return 0; - } - ncpv=(UPropsVectors *)pv; - row=_findRow(ncpv, c); - return row[2+column]; -} - -U_CAPI uint32_t * U_EXPORT2 -upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, - UChar32 *pRangeStart, UChar32 *pRangeEnd) { - uint32_t *row; - int32_t columns; - - if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { - return NULL; - } - - columns=pv->columns; - row=pv->v+rowIndex*columns; - if(pRangeStart!=NULL) { - *pRangeStart=(UChar32)row[0]; - } - if(pRangeEnd!=NULL) { - *pRangeEnd=(UChar32)row[1]-1; - } - return row+2; -} - -static int32_t U_CALLCONV -upvec_compareRows(const void *context, const void *l, const void *r) { - const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; - const UPropsVectors *pv=(const UPropsVectors *)context; - int32_t i, count, columns; - - count=columns=pv->columns; /* includes start/limit columns */ - - /* start comparing after start/limit but wrap around to them */ - i=2; - do { - if(left[i]!=right[i]) { - return left[i]0); - - return 0; -} - -U_CAPI void U_EXPORT2 -upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { - uint32_t *row; - int32_t i, columns, valueColumns, rows, count; - UChar32 start, limit; - - /* argument checking */ - if(U_FAILURE(*pErrorCode)) { - return; - } - if(handler==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(pv->isCompacted) { - return; - } - - /* Set the flag now: Sorting and compacting destroys the builder data structure. */ - pv->isCompacted=TRUE; - - rows=pv->rows; - columns=pv->columns; - U_ASSERT(columns>=3); /* upvec_open asserts this */ - valueColumns=columns-2; /* not counting start & limit */ - - /* sort the properties vectors to find unique vector values */ - uprv_sortArray(pv->v, rows, columns*4, - upvec_compareRows, pv, FALSE, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* - * Find and set the special values. - * This has to do almost the same work as the compaction below, - * to find the indexes where the special-value rows will move. - */ - row=pv->v; - count=-valueColumns; - for(i=0; i=UPVEC_FIRST_SPECIAL_CP) { - handler(context, start, start, count, row+2, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - row+=columns; - } - - /* count is at the beginning of the last vector, add valueColumns to include that last vector */ - count+=valueColumns; - - /* Call the handler once more to signal the start of delivering real values. */ - handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, - count, row-valueColumns, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* - * Move vector contents up to a contiguous array with only unique - * vector values, and call the handler function for each vector. - * - * This destroys the Properties Vector structure and replaces it - * with an array of just vector values. - */ - row=pv->v; - count=-valueColumns; - for(i=0; iv+count, valueColumns*4)) { - count+=valueColumns; - uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); - } - - if(startv+count, valueColumns, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - row+=columns; - } - - /* count is at the beginning of the last vector, add one to include that last vector */ - pv->rows=count/valueColumns+1; -} - -U_CAPI const uint32_t * U_EXPORT2 -upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { - if(!pv->isCompacted) { - return NULL; - } - if(pRows!=NULL) { - *pRows=pv->rows; - } - if(pColumns!=NULL) { - *pColumns=pv->columns-2; - } - return pv->v; -} - -U_CAPI uint32_t * U_EXPORT2 -upvec_cloneArray(const UPropsVectors *pv, - int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { - uint32_t *clonedArray; - int32_t byteLength; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(!pv->isCompacted) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - byteLength=pv->rows*(pv->columns-2)*4; - clonedArray=(uint32_t *)uprv_malloc(byteLength); - if(clonedArray==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(clonedArray, pv->v, byteLength); - if(pRows!=NULL) { - *pRows=pv->rows; - } - if(pColumns!=NULL) { - *pColumns=pv->columns-2; - } - return clonedArray; -} - -U_CAPI UTrie2 * U_EXPORT2 -upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { - UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 }; - upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); - utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - utrie2_close(toUTrie2.trie); - toUTrie2.trie=NULL; - } - return toUTrie2.trie; -} - -/* - * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts - * some 16-bit field and builds and returns a UTrie2. - */ - -U_CAPI void U_CALLCONV -upvec_compactToUTrie2Handler(void *context, - UChar32 start, UChar32 end, - int32_t rowIndex, uint32_t *row, int32_t columns, - UErrorCode *pErrorCode) { - (void)row; - (void)columns; - UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; - if(starttrie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); - } else { - switch(start) { - case UPVEC_INITIAL_VALUE_CP: - toUTrie2->initialValue=rowIndex; - break; - case UPVEC_ERROR_VALUE_CP: - toUTrie2->errorValue=rowIndex; - break; - case UPVEC_START_REAL_VALUES_CP: - toUTrie2->maxValue=rowIndex; - if(rowIndex>0xffff) { - /* too many rows for a 16-bit trie */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } else { - toUTrie2->trie=utrie2_open(toUTrie2->initialValue, - toUTrie2->errorValue, pErrorCode); - } - break; - default: - break; - } - } -} diff --git a/deps/node/deps/icu-small/source/common/propsvec.h b/deps/node/deps/icu-small/source/common/propsvec.h deleted file mode 100644 index 39080615..00000000 --- a/deps/node/deps/icu-small/source/common/propsvec.h +++ /dev/null @@ -1,178 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: propsvec.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb22 -* created by: Markus W. Scherer -* -* Store bits (Unicode character properties) in bit set vectors. -*/ - -#ifndef __UPROPSVEC_H__ -#define __UPROPSVEC_H__ - -#include "unicode/utypes.h" -#include "utrie.h" -#include "utrie2.h" - -U_CDECL_BEGIN - -/** - * Unicode Properties Vectors associated with code point ranges. - * - * Rows of uint32_t integers in a contiguous array store - * the range limits and the properties vectors. - * - * Logically, each row has a certain number of uint32_t values, - * which is set via the upvec_open() "columns" parameter. - * - * Internally, two additional columns are stored. - * In each internal row, - * row[0] contains the start code point and - * row[1] contains the limit code point, - * which is the start of the next range. - * - * Initially, there is only one "normal" row for - * range [0..0x110000[ with values 0. - * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. - * - * It would be possible to store only one range boundary per row, - * but self-contained rows allow to later sort them by contents. - */ -struct UPropsVectors; -typedef struct UPropsVectors UPropsVectors; - -/* - * Special pseudo code points for storing the initialValue and the errorValue, - * which are used to initialize a UTrie2 or similar. - */ -#define UPVEC_FIRST_SPECIAL_CP 0x110000 -#define UPVEC_INITIAL_VALUE_CP 0x110000 -#define UPVEC_ERROR_VALUE_CP 0x110001 -#define UPVEC_MAX_CP 0x110001 - -/* - * Special pseudo code point used in upvec_compact() signalling the end of - * delivering special values and the beginning of delivering real ones. - * Stable value, unlike UPVEC_MAX_CP which might grow over time. - */ -#define UPVEC_START_REAL_VALUES_CP 0x200000 - -/* - * Open a UPropsVectors object. - * @param columns Number of value integers (uint32_t) per row. - */ -U_CAPI UPropsVectors * U_EXPORT2 -upvec_open(int32_t columns, UErrorCode *pErrorCode); - -U_CAPI void U_EXPORT2 -upvec_close(UPropsVectors *pv); - -/* - * In rows for code points [start..end], select the column, - * reset the mask bits and set the value bits (ANDed with the mask). - * - * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). - */ -U_CAPI void U_EXPORT2 -upvec_setValue(UPropsVectors *pv, - UChar32 start, UChar32 end, - int32_t column, - uint32_t value, uint32_t mask, - UErrorCode *pErrorCode); - -/* - * Logically const but must not be used on the same pv concurrently! - * Always returns 0 if called after upvec_compact(). - */ -U_CAPI uint32_t U_EXPORT2 -upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); - -/* - * pRangeStart and pRangeEnd can be NULL. - * @return NULL if rowIndex out of range and for illegal arguments, - * or if called after upvec_compact() - */ -U_CAPI uint32_t * U_EXPORT2 -upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, - UChar32 *pRangeStart, UChar32 *pRangeEnd); - -/* - * Compact the vectors: - * - modify the memory - * - keep only unique vectors - * - store them contiguously from the beginning of the memory - * - for each (non-unique) row, call the handler function - * - * The handler's rowIndex is the index of the row in the compacted - * memory block. - * (Therefore, it starts at 0 increases in increments of the columns value.) - * - * In a first phase, only special values are delivered (each exactly once), - * with start==end both equalling a special pseudo code point. - * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP - * where rowIndex is the length of the compacted array, - * and the row is arbitrary (but not NULL). - * Then, in the second phase, the handler is called for each row of real values. - */ -typedef void U_CALLCONV -UPVecCompactHandler(void *context, - UChar32 start, UChar32 end, - int32_t rowIndex, uint32_t *row, int32_t columns, - UErrorCode *pErrorCode); - -U_CAPI void U_EXPORT2 -upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); - -/* - * Get the vectors array after calling upvec_compact(). - * The caller must not modify nor release the returned array. - * Returns NULL if called before upvec_compact(). - */ -U_CAPI const uint32_t * U_EXPORT2 -upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); - -/* - * Get a clone of the vectors array after calling upvec_compact(). - * The caller owns the returned array and must uprv_free() it. - * Returns NULL if called before upvec_compact(). - */ -U_CAPI uint32_t * U_EXPORT2 -upvec_cloneArray(const UPropsVectors *pv, - int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); - -/* - * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted - * vectors array, and freeze the trie. - */ -U_CAPI UTrie2 * U_EXPORT2 -upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); - -struct UPVecToUTrie2Context { - UTrie2 *trie; - int32_t initialValue; - int32_t errorValue; - int32_t maxValue; -}; -typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; - -/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ -U_CAPI void U_CALLCONV -upvec_compactToUTrie2Handler(void *context, - UChar32 start, UChar32 end, - int32_t rowIndex, uint32_t *row, int32_t columns, - UErrorCode *pErrorCode); - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/punycode.cpp b/deps/node/deps/icu-small/source/common/punycode.cpp deleted file mode 100644 index 4f0b9ea9..00000000 --- a/deps/node/deps/icu-small/source/common/punycode.cpp +++ /dev/null @@ -1,589 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: punycode.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jan31 -* created by: Markus W. Scherer -*/ - - -/* This ICU code derived from: */ -/* -punycode.c 0.4.0 (2001-Nov-17-Sat) -http://www.cs.berkeley.edu/~amc/idn/ -Adam M. Costello -http://www.nicemice.net/amc/ - -Disclaimer and license - - Regarding this entire document or any portion of it (including - the pseudocode and C code), the author makes no guarantees and - is not responsible for any damage resulting from its use. The - author grants irrevocable permission to anyone to use, modify, - and distribute it in any way that does not diminish the rights - of anyone else to use, modify, and distribute it, provided that - redistributed derivative works do not contain misleading author or - version information. Derivative works need not be licensed under - similar terms. -*/ -/* - * ICU modifications: - * - ICU data types and coding conventions - * - ICU string buffer handling with implicit source lengths - * and destination preflighting - * - UTF-16 handling - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -#include "unicode/ustring.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" -#include "cstring.h" -#include "cmemory.h" -#include "punycode.h" -#include "uassert.h" - - -/* Punycode ----------------------------------------------------------------- */ - -/* Punycode parameters for Bootstring */ -#define BASE 36 -#define TMIN 1 -#define TMAX 26 -#define SKEW 38 -#define DAMP 700 -#define INITIAL_BIAS 72 -#define INITIAL_N 0x80 - -/* "Basic" Unicode/ASCII code points */ -#define _HYPHEN 0X2d -#define DELIMITER _HYPHEN - -#define _ZERO_ 0X30 -#define _NINE 0x39 - -#define _SMALL_A 0X61 -#define _SMALL_Z 0X7a - -#define _CAPITAL_A 0X41 -#define _CAPITAL_Z 0X5a - -#define IS_BASIC(c) ((c)<0x80) -#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z) - -/** - * digitToBasic() returns the basic code point whose value - * (when used for representing integers) is d, which must be in the - * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is - * nonzero, in which case the uppercase form is used. - */ -static inline char -digitToBasic(int32_t digit, UBool uppercase) { - /* 0..25 map to ASCII a..z or A..Z */ - /* 26..35 map to ASCII 0..9 */ - if(digit<26) { - if(uppercase) { - return (char)(_CAPITAL_A+digit); - } else { - return (char)(_SMALL_A+digit); - } - } else { - return (char)((_ZERO_-26)+digit); - } -} - -/** - * basicToDigit[] contains the numeric value of a basic code - * point (for use in representing integers) in the range 0 to - * BASE-1, or -1 if b is does not represent a value. - */ -static const int8_t -basicToDigit[256]={ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 -}; - -static inline char -asciiCaseMap(char b, UBool uppercase) { - if(uppercase) { - if(_SMALL_A<=b && b<=_SMALL_Z) { - b-=(_SMALL_A-_CAPITAL_A); - } - } else { - if(_CAPITAL_A<=b && b<=_CAPITAL_Z) { - b+=(_SMALL_A-_CAPITAL_A); - } - } - return b; -} - -/* Punycode-specific Bootstring code ---------------------------------------- */ - -/* - * The following code omits the {parts} of the pseudo-algorithm in the spec - * that are not used with the Punycode parameter set. - */ - -/* Bias adaptation function. */ -static int32_t -adaptBias(int32_t delta, int32_t length, UBool firstTime) { - int32_t count; - - if(firstTime) { - delta/=DAMP; - } else { - delta/=2; - } - - delta+=delta/length; - for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { - delta/=(BASE-TMIN); - } - - return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); -} - -#define MAX_CP_COUNT 200 - -U_CFUNC int32_t -u_strToPunycode(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, - const UBool *caseFlags, - UErrorCode *pErrorCode) { - - int32_t cpBuffer[MAX_CP_COUNT]; - int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; - UChar c, c2; - - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* - * Handle the basic code points and - * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): - */ - srcCPCount=destLength=0; - if(srcLength==-1) { - /* NUL-terminated input */ - for(j=0; /* no condition */; ++j) { - if((c=src[j])==0) { - break; - } - if(srcCPCount==MAX_CP_COUNT) { - /* too many input code points */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - if(IS_BASIC(c)) { - cpBuffer[srcCPCount++]=0; - if(destLength0) { - if(destLength state to , but guard against overflow: - */ - if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) { - *pErrorCode=U_INTERNAL_PROGRAM_ERROR; - return 0; - } - delta+=(m-n)*(handledCPCount+1); - n=m; - - /* Encode a sequence of same code points n */ - for(j=0; jTMAX) { - t=TMAX; - } - */ - - t=k-bias; - if(t=(bias+TMAX)) { - t=TMAX; - } - - if(q0;) { - if(src[--j]==DELIMITER) { - break; - } - } - destLength=basicLength=destCPCount=j; - U_ASSERT(destLength>=0); - - while(j>0) { - b=src[--j]; - if(!IS_BASIC(b)) { - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - - if(j0 ? basicLength+1 : 0; in=srcLength) { - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - digit=basicToDigit[(uint8_t)src[in++]]; - if(digit<0) { - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - if(digit>(0x7fffffff-i)/w) { - /* integer overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - i+=digit*w; - /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt - t=k-bias; - if(tTMAX) { - t=TMAX; - } - */ - t=k-bias; - if(t=(bias+TMAX)) { - t=TMAX; - } - if(digit0x7fffffff/(BASE-t)) { - /* integer overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - w*=BASE-t; - } - - /* - * Modification from sample code: - * Increments destCPCount here, - * where needed instead of in for() loop tail. - */ - ++destCPCount; - bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0)); - - /* - * i was supposed to wrap around from (incremented) destCPCount to 0, - * incrementing n each time, so we'll fix that now: - */ - if(i/destCPCount>(0x7fffffff-n)) { - /* integer overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - n+=i/destCPCount; - i%=destCPCount; - /* not needed for Punycode: */ - /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ - - if(n>0x10ffff || U_IS_SURROGATE(n)) { - /* Unicode code point overflow */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0; - } - - /* Insert n at position i of the output: */ - cpLength=U16_LENGTH(n); - if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { - int32_t codeUnitIndex; - - /* - * Handle indexes when supplementary code points are present. - * - * In almost all cases, there will be only BMP code points before i - * and even in the entire string. - * This is handled with the same efficiency as with UTF-32. - * - * Only the rare cases with supplementary code points are handled - * more slowly - but not too bad since this is an insertion anyway. - */ - if(i<=firstSupplementaryIndex) { - codeUnitIndex=i; - if(cpLength>1) { - firstSupplementaryIndex=codeUnitIndex; - } else { - ++firstSupplementaryIndex; - } - } else { - codeUnitIndex=firstSupplementaryIndex; - U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex); - } - - /* use the UChar index codeUnitIndex instead of the code point index i */ - if(codeUnitIndex=0); - ++i; - } - - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); -} - -/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */ - -#endif /* #if !UCONFIG_NO_IDNA */ diff --git a/deps/node/deps/icu-small/source/common/punycode.h b/deps/node/deps/icu-small/source/common/punycode.h deleted file mode 100644 index 5d8a2431..00000000 --- a/deps/node/deps/icu-small/source/common/punycode.h +++ /dev/null @@ -1,120 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2003, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: punycode.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jan31 -* created by: Markus W. Scherer -*/ - -/* This ICU code derived from: */ -/* -punycode.c 0.4.0 (2001-Nov-17-Sat) -http://www.cs.berkeley.edu/~amc/idn/ -Adam M. Costello -http://www.nicemice.net/amc/ -*/ - -#ifndef __PUNYCODE_H__ -#define __PUNYCODE_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -/** - * u_strToPunycode() converts Unicode to Punycode. - * - * The input string must not contain single, unpaired surrogates. - * The output will be represented as an array of ASCII code points. - * - * The output string is NUL-terminated according to normal ICU - * string output rules. - * - * @param src Input Unicode string. - * This function handles a limited amount of code points - * (the limit is >=64). - * U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded. - * @param srcLength Number of UChars in src, or -1 if NUL-terminated. - * @param dest Output Punycode array. - * @param destCapacity Size of dest. - * @param caseFlags Vector of boolean values, one per input UChar, - * indicating that the corresponding character is to be - * marked for the decoder optionally - * uppercasing (TRUE) or lowercasing (FALSE) - * the character. - * ASCII characters are output directly in the case as marked. - * Flags corresponding to trail surrogates are ignored. - * If caseFlags==NULL then input characters are not - * case-mapped. - * @param pErrorCode ICU in/out error code parameter. - * U_INVALID_CHAR_FOUND if src contains - * unmatched single surrogates. - * U_INDEX_OUTOFBOUNDS_ERROR if src contains - * too many code points. - * @return Number of ASCII characters in puny. - * - * @see u_strFromPunycode - */ -U_CFUNC int32_t -u_strToPunycode(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, - const UBool *caseFlags, - UErrorCode *pErrorCode); - -/** - * u_strFromPunycode() converts Punycode to Unicode. - * The Unicode string will be at most as long (in UChars) - * than the Punycode string (in chars). - * - * @param src Input Punycode string. - * @param srcLength Length of puny, or -1 if NUL-terminated - * @param dest Output Unicode string buffer. - * @param destCapacity Size of dest in number of UChars, - * and of caseFlags in numbers of UBools. - * @param caseFlags Output array for case flags as - * defined by the Punycode string. - * The caller should uppercase (TRUE) or lowercase (FASLE) - * the corresponding character in dest. - * For supplementary characters, only the lead surrogate - * is marked, and FALSE is stored for the trail surrogate. - * This is redundant and not necessary for ASCII characters - * because they are already in the case indicated. - * Can be NULL if the case flags are not needed. - * @param pErrorCode ICU in/out error code parameter. - * U_INVALID_CHAR_FOUND if a non-ASCII character - * precedes the last delimiter ('-'), - * or if an invalid character (not a-zA-Z0-9) is found - * after the last delimiter. - * U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed. - * @return Number of UChars written to dest. - * - * @see u_strToPunycode - */ -U_CFUNC int32_t -u_strFromPunycode(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, - UBool *caseFlags, - UErrorCode *pErrorCode); - -#endif /* #if !UCONFIG_NO_IDNA */ - -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/putil.cpp b/deps/node/deps/icu-small/source/common/putil.cpp deleted file mode 100644 index b1193d7c..00000000 --- a/deps/node/deps/icu-small/source/common/putil.cpp +++ /dev/null @@ -1,2444 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) -* -* Date Name Description -* 04/14/97 aliu Creation. -* 04/24/97 aliu Added getDefaultDataDirectory() and -* getDefaultLocaleID(). -* 04/28/97 aliu Rewritten to assume Unix and apply general methods -* for assumed case. Non-UNIX platforms must be -* special-cased. Rewrote numeric methods dealing -* with NaN and Infinity to be platform independent -* over all IEEE 754 platforms. -* 05/13/97 aliu Restored sign of timezone -* (semantics are hours West of GMT) -* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, -* nextDouble.. -* 07/22/98 stephen Added remainder, max, min, trunc -* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity -* 08/24/98 stephen Added longBitsFromDouble -* 09/08/98 stephen Minor changes for Mac Port -* 03/02/99 stephen Removed openFile(). Added AS400 support. -* Fixed EBCDIC tables -* 04/15/99 stephen Converted to C. -* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). -* 08/04/99 jeffrey R. Added OS/2 changes -* 11/15/99 helena Integrated S/390 IEEE support. -* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID -* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage -* 01/03/08 Steven L. Fake Time Support -****************************************************************************** -*/ - -// Defines _XOPEN_SOURCE for access to POSIX functions. -// Must be before any other #includes. -#include "uposixdefs.h" - -// First, the platform type. Need this for U_PLATFORM. -#include "unicode/platform.h" - -#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ -/* tzset isn't defined in strict ANSI on MinGW. */ -#undef __STRICT_ANSI__ -#endif - -/* - * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. - */ -#include - -#if !U_PLATFORM_USES_ONLY_WIN32_API -#include -#endif - -/* include the rest of the ICU headers */ -#include "unicode/putil.h" -#include "unicode/ustring.h" -#include "putilimp.h" -#include "uassert.h" -#include "umutex.h" -#include "cmemory.h" -#include "cstring.h" -#include "locmap.h" -#include "ucln_cmn.h" -#include "charstr.h" - -/* Include standard headers. */ -#include -#include -#include -#include -#include -#include - -#ifndef U_COMMON_IMPLEMENTATION -#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu -#endif - - -/* include system headers */ -#if U_PLATFORM_USES_ONLY_WIN32_API - /* - * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. - * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) - * to use native APIs as much as possible? - */ -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# include -# include "unicode/uloc.h" -# include "wintz.h" -#if U_PLATFORM_HAS_WINUWP_API -typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef -#include -#include -#include -#include - -using namespace ABI::Windows::Foundation; -using namespace Microsoft::WRL; -using namespace Microsoft::WRL::Wrappers; -#endif -#elif U_PLATFORM == U_PF_OS400 -# include -# include /* error code structure */ -# include -# include /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ -# include /* For uprv_maximumPtr */ -#elif U_PLATFORM == U_PF_OS390 -# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ -#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS -# include -# include -# if U_PLATFORM == U_PF_SOLARIS -# ifndef _XPG4_2 -# define _XPG4_2 -# endif -# endif -#elif U_PLATFORM == U_PF_QNX -# include -#endif - -/* - * Only include langinfo.h if we have a way to get the codeset. If we later - * depend on more feature, we can test on U_HAVE_NL_LANGINFO. - * - */ - -#if U_HAVE_NL_LANGINFO_CODESET -#include -#endif - -/** - * Simple things (presence of functions, etc) should just go in configure.in and be added to - * icucfg.h via autoheader. - */ -#if U_PLATFORM_IMPLEMENTS_POSIX -# if U_PLATFORM == U_PF_OS400 -# define HAVE_DLFCN_H 0 -# define HAVE_DLOPEN 0 -# else -# ifndef HAVE_DLFCN_H -# define HAVE_DLFCN_H 1 -# endif -# ifndef HAVE_DLOPEN -# define HAVE_DLOPEN 1 -# endif -# endif -# ifndef HAVE_GETTIMEOFDAY -# define HAVE_GETTIMEOFDAY 1 -# endif -#else -# define HAVE_DLFCN_H 0 -# define HAVE_DLOPEN 0 -# define HAVE_GETTIMEOFDAY 0 -#endif - -U_NAMESPACE_USE - -/* Define the extension for data files, again... */ -#define DATA_TYPE "dat" - -/* Leave this copyright notice here! */ -static const char copyright[] = U_COPYRIGHT_STRING; - -/* floating point implementations ------------------------------------------- */ - -/* We return QNAN rather than SNAN*/ -#define SIGN 0x80000000U - -/* Make it easy to define certain types of constants */ -typedef union { - int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ - double d64; -} BitPatternConversion; -static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; -static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; - -/*--------------------------------------------------------------------------- - Platform utilities - Our general strategy is to assume we're on a POSIX platform. Platforms which - are non-POSIX must declare themselves so. The default POSIX implementation - will sometimes work for non-POSIX platforms as well (e.g., the NaN-related - functions). - ---------------------------------------------------------------------------*/ - -#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 -# undef U_POSIX_LOCALE -#else -# define U_POSIX_LOCALE 1 -#endif - -/* - WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble - can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). -*/ -#if !IEEE_754 -static char* -u_topNBytesOfDouble(double* d, int n) -{ -#if U_IS_BIG_ENDIAN - return (char*)d; -#else - return (char*)(d + 1) - n; -#endif -} - -static char* -u_bottomNBytesOfDouble(double* d, int n) -{ -#if U_IS_BIG_ENDIAN - return (char*)(d + 1) - n; -#else - return (char*)d; -#endif -} -#endif /* !IEEE_754 */ - -#if IEEE_754 -static UBool -u_signBit(double d) { - uint8_t hiByte; -#if U_IS_BIG_ENDIAN - hiByte = *(uint8_t *)&d; -#else - hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); -#endif - return (hiByte & 0x80) != 0; -} -#endif - - - -#if defined (U_DEBUG_FAKETIME) -/* Override the clock to test things without having to move the system clock. - * Assumes POSIX gettimeofday() will function - */ -UDate fakeClock_t0 = 0; /** Time to start the clock from **/ -UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ -UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ -static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; - -static UDate getUTCtime_real() { - struct timeval posixTime; - gettimeofday(&posixTime, NULL); - return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); -} - -static UDate getUTCtime_fake() { - umtx_lock(&fakeClockMutex); - if(!fakeClock_set) { - UDate real = getUTCtime_real(); - const char *fake_start = getenv("U_FAKETIME_START"); - if((fake_start!=NULL) && (fake_start[0]!=0)) { - sscanf(fake_start,"%lf",&fakeClock_t0); - fakeClock_dt = fakeClock_t0 - real; - fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" - "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", - fakeClock_t0, fake_start, fakeClock_dt, real); - } else { - fakeClock_dt = 0; - fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" - "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); - } - fakeClock_set = TRUE; - } - umtx_unlock(&fakeClockMutex); - - return getUTCtime_real() + fakeClock_dt; -} -#endif - -#if U_PLATFORM_USES_ONLY_WIN32_API -typedef union { - int64_t int64; - FILETIME fileTime; -} FileTimeConversion; /* This is like a ULARGE_INTEGER */ - -/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ -#define EPOCH_BIAS INT64_C(116444736000000000) -#define HECTONANOSECOND_PER_MILLISECOND 10000 - -#endif - -/*--------------------------------------------------------------------------- - Universal Implementations - These are designed to work on all platforms. Try these, and if they - don't work on your platform, then special case your platform with new - implementations. ----------------------------------------------------------------------------*/ - -U_CAPI UDate U_EXPORT2 -uprv_getUTCtime() -{ -#if defined(U_DEBUG_FAKETIME) - return getUTCtime_fake(); /* Hook for overriding the clock */ -#else - return uprv_getRawUTCtime(); -#endif -} - -/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ -U_CAPI UDate U_EXPORT2 -uprv_getRawUTCtime() -{ -#if U_PLATFORM_USES_ONLY_WIN32_API - - FileTimeConversion winTime; - GetSystemTimeAsFileTime(&winTime.fileTime); - return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); -#else - -#if HAVE_GETTIMEOFDAY - struct timeval posixTime; - gettimeofday(&posixTime, NULL); - return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); -#else - time_t epochtime; - time(&epochtime); - return (UDate)epochtime * U_MILLIS_PER_SECOND; -#endif - -#endif -} - -/*----------------------------------------------------------------------------- - IEEE 754 - These methods detect and return NaN and infinity values for doubles - conforming to IEEE 754. Platforms which support this standard include X86, - Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. - If this doesn't work on your platform, you have non-IEEE floating-point, and - will need to code your own versions. A naive implementation is to return 0.0 - for getNaN and getInfinity, and false for isNaN and isInfinite. - ---------------------------------------------------------------------------*/ - -U_CAPI UBool U_EXPORT2 -uprv_isNaN(double number) -{ -#if IEEE_754 - BitPatternConversion convertedNumber; - convertedNumber.d64 = number; - /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ - return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); - -#elif U_PLATFORM == U_PF_OS390 - uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, - sizeof(uint32_t)); - uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, - sizeof(uint32_t)); - - return ((highBits & 0x7F080000L) == 0x7F080000L) && - (lowBits == 0x00000000L); - -#else - /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ - /* you'll need to replace this default implementation with what's correct*/ - /* for your platform.*/ - return number != number; -#endif -} - -U_CAPI UBool U_EXPORT2 -uprv_isInfinite(double number) -{ -#if IEEE_754 - BitPatternConversion convertedNumber; - convertedNumber.d64 = number; - /* Infinity is exactly 0x7FF0000000000000U. */ - return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); -#elif U_PLATFORM == U_PF_OS390 - uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, - sizeof(uint32_t)); - uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, - sizeof(uint32_t)); - - return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); - -#else - /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ - /* value, you'll need to replace this default implementation with what's*/ - /* correct for your platform.*/ - return number == (2.0 * number); -#endif -} - -U_CAPI UBool U_EXPORT2 -uprv_isPositiveInfinity(double number) -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return (UBool)(number > 0 && uprv_isInfinite(number)); -#else - return uprv_isInfinite(number); -#endif -} - -U_CAPI UBool U_EXPORT2 -uprv_isNegativeInfinity(double number) -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return (UBool)(number < 0 && uprv_isInfinite(number)); - -#else - uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, - sizeof(uint32_t)); - return((highBits & SIGN) && uprv_isInfinite(number)); - -#endif -} - -U_CAPI double U_EXPORT2 -uprv_getNaN() -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return gNan.d64; -#else - /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ - /* you'll need to replace this default implementation with what's correct*/ - /* for your platform.*/ - return 0.0; -#endif -} - -U_CAPI double U_EXPORT2 -uprv_getInfinity() -{ -#if IEEE_754 || U_PLATFORM == U_PF_OS390 - return gInf.d64; -#else - /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ - /* value, you'll need to replace this default implementation with what's*/ - /* correct for your platform.*/ - return 0.0; -#endif -} - -U_CAPI double U_EXPORT2 -uprv_floor(double x) -{ - return floor(x); -} - -U_CAPI double U_EXPORT2 -uprv_ceil(double x) -{ - return ceil(x); -} - -U_CAPI double U_EXPORT2 -uprv_round(double x) -{ - return uprv_floor(x + 0.5); -} - -U_CAPI double U_EXPORT2 -uprv_fabs(double x) -{ - return fabs(x); -} - -U_CAPI double U_EXPORT2 -uprv_modf(double x, double* y) -{ - return modf(x, y); -} - -U_CAPI double U_EXPORT2 -uprv_fmod(double x, double y) -{ - return fmod(x, y); -} - -U_CAPI double U_EXPORT2 -uprv_pow(double x, double y) -{ - /* This is declared as "double pow(double x, double y)" */ - return pow(x, y); -} - -U_CAPI double U_EXPORT2 -uprv_pow10(int32_t x) -{ - return pow(10.0, (double)x); -} - -U_CAPI double U_EXPORT2 -uprv_fmax(double x, double y) -{ -#if IEEE_754 - /* first handle NaN*/ - if(uprv_isNaN(x) || uprv_isNaN(y)) - return uprv_getNaN(); - - /* check for -0 and 0*/ - if(x == 0.0 && y == 0.0 && u_signBit(x)) - return y; - -#endif - - /* this should work for all flt point w/o NaN and Inf special cases */ - return (x > y ? x : y); -} - -U_CAPI double U_EXPORT2 -uprv_fmin(double x, double y) -{ -#if IEEE_754 - /* first handle NaN*/ - if(uprv_isNaN(x) || uprv_isNaN(y)) - return uprv_getNaN(); - - /* check for -0 and 0*/ - if(x == 0.0 && y == 0.0 && u_signBit(y)) - return y; - -#endif - - /* this should work for all flt point w/o NaN and Inf special cases */ - return (x > y ? y : x); -} - -U_CAPI UBool U_EXPORT2 -uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) { - // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow. - // This function could be optimized by calling one of those primitives. - auto a64 = static_cast(a); - auto b64 = static_cast(b); - int64_t res64 = a64 + b64; - *res = static_cast(res64); - return res64 != *res; -} - -U_CAPI UBool U_EXPORT2 -uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) { - // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow. - // This function could be optimized by calling one of those primitives. - auto a64 = static_cast(a); - auto b64 = static_cast(b); - int64_t res64 = a64 * b64; - *res = static_cast(res64); - return res64 != *res; -} - -/** - * Truncates the given double. - * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 - * This is different than calling floor() or ceil(): - * floor(3.3) = 3, floor(-3.3) = -4 - * ceil(3.3) = 4, ceil(-3.3) = -3 - */ -U_CAPI double U_EXPORT2 -uprv_trunc(double d) -{ -#if IEEE_754 - /* handle error cases*/ - if(uprv_isNaN(d)) - return uprv_getNaN(); - if(uprv_isInfinite(d)) - return uprv_getInfinity(); - - if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ - return ceil(d); - else - return floor(d); - -#else - return d >= 0 ? floor(d) : ceil(d); - -#endif -} - -/** - * Return the largest positive number that can be represented by an integer - * type of arbitrary bit length. - */ -U_CAPI double U_EXPORT2 -uprv_maxMantissa(void) -{ - return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; -} - -U_CAPI double U_EXPORT2 -uprv_log(double d) -{ - return log(d); -} - -U_CAPI void * U_EXPORT2 -uprv_maximumPtr(void * base) -{ -#if U_PLATFORM == U_PF_OS400 - /* - * With the provided function we should never be out of range of a given segment - * (a traditional/typical segment that is). Our segments have 5 bytes for the - * id and 3 bytes for the offset. The key is that the casting takes care of - * only retrieving the offset portion minus x1000. Hence, the smallest offset - * seen in a program is x001000 and when casted to an int would be 0. - * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. - * - * Currently, 16MB is the current addressing limitation on i5/OS if the activation is - * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). - * This function determines the activation based on the pointer that is passed in and - * calculates the appropriate maximum available size for - * each pointer type (TERASPACE and non-TERASPACE) - * - * Unlike other operating systems, the pointer model isn't determined at - * compile time on i5/OS. - */ - if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { - /* if it is a TERASPACE pointer the max is 2GB - 4k */ - return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); - } - /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ - return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); - -#else - return U_MAX_PTR(base); -#endif -} - -/*--------------------------------------------------------------------------- - Platform-specific Implementations - Try these, and if they don't work on your platform, then special case your - platform with new implementations. - ---------------------------------------------------------------------------*/ - -/* Generic time zone layer -------------------------------------------------- */ - -/* Time zone utilities */ -U_CAPI void U_EXPORT2 -uprv_tzset() -{ -#if defined(U_TZSET) - U_TZSET(); -#else - /* no initialization*/ -#endif -} - -U_CAPI int32_t U_EXPORT2 -uprv_timezone() -{ -#ifdef U_TIMEZONE - return U_TIMEZONE; -#else - time_t t, t1, t2; - struct tm tmrec; - int32_t tdiff = 0; - - time(&t); - uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); -#if U_PLATFORM != U_PF_IPHONE - UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ -#endif - t1 = mktime(&tmrec); /* local time in seconds*/ - uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); - t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ - tdiff = t2 - t1; - -#if U_PLATFORM != U_PF_IPHONE - /* imitate NT behaviour, which returns same timezone offset to GMT for - winter and summer. - This does not work on all platforms. For instance, on glibc on Linux - and on Mac OS 10.5, tdiff calculated above remains the same - regardless of whether DST is in effect or not. iOS is another - platform where this does not work. Linux + glibc and Mac OS 10.5 - have U_TIMEZONE defined so that this code is not reached. - */ - if (dst_checked) - tdiff += 3600; -#endif - return tdiff; -#endif -} - -/* Note that U_TZNAME does *not* have to be tzname, but if it is, - some platforms need to have it declared here. */ - -#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED) -/* RS6000 and others reject char **tzname. */ -extern U_IMPORT char *U_TZNAME[]; -#endif - -#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) -/* These platforms are likely to use Olson timezone IDs. */ -/* common targets of the symbolic link at TZDEFAULT are: - * "/usr/share/zoneinfo/" default, older Linux distros, macOS to 10.12 - * "../usr/share/zoneinfo/" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12 - * "/usr/share/lib/zoneinfo/" Solaris - * "../usr/share/lib/zoneinfo/" Solaris - * "/var/db/timezone/zoneinfo/" macOS 10.13 - * To avoid checking lots of paths, just check that the target path - * before the ends with "/zoneinfo/", and the is valid. - */ - -#define CHECK_LOCALTIME_LINK 1 -#if U_PLATFORM_IS_DARWIN_BASED -#include -#define TZZONEINFO (TZDIR "/") -#elif U_PLATFORM == U_PF_SOLARIS -#define TZDEFAULT "/etc/localtime" -#define TZZONEINFO "/usr/share/lib/zoneinfo/" -#define TZ_ENV_CHECK "localtime" -#else -#define TZDEFAULT "/etc/localtime" -#define TZZONEINFO "/usr/share/zoneinfo/" -#endif -#define TZZONEINFOTAIL "/zoneinfo/" -#if U_HAVE_DIRENT_H -#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ -/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo - symlinked to /etc/localtime, which makes searchForTZFile return - 'localtime' when it's the first match. */ -#define TZFILE_SKIP2 "localtime" -#define SEARCH_TZFILE -#include /* Needed to search through system timezone files */ -#endif -static char gTimeZoneBuffer[PATH_MAX]; -static char *gTimeZoneBufferPtr = NULL; -#endif - -#if !U_PLATFORM_USES_ONLY_WIN32_API -#define isNonDigit(ch) (ch < '0' || '9' < ch) -static UBool isValidOlsonID(const char *id) { - int32_t idx = 0; - - /* Determine if this is something like Iceland (Olson ID) - or AST4ADT (non-Olson ID) */ - while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { - idx++; - } - - /* If we went through the whole string, then it might be okay. - The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", - "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. - The rest of the time it could be an Olson ID. George */ - return (UBool)(id[idx] == 0 - || uprv_strcmp(id, "PST8PDT") == 0 - || uprv_strcmp(id, "MST7MDT") == 0 - || uprv_strcmp(id, "CST6CDT") == 0 - || uprv_strcmp(id, "EST5EDT") == 0); -} - -/* On some Unix-like OS, 'posix' subdirectory in - /usr/share/zoneinfo replicates the top-level contents. 'right' - subdirectory has the same set of files, but individual files - are different from those in the top-level directory or 'posix' - because 'right' has files for TAI (Int'l Atomic Time) while 'posix' - has files for UTC. - When the first match for /etc/localtime is in either of them - (usually in posix because 'right' has different file contents), - or TZ environment variable points to one of them, createTimeZone - fails because, say, 'posix/America/New_York' is not an Olson - timezone id ('America/New_York' is). So, we have to skip - 'posix/' and 'right/' at the beginning. */ -static void skipZoneIDPrefix(const char** id) { - if (uprv_strncmp(*id, "posix/", 6) == 0 - || uprv_strncmp(*id, "right/", 6) == 0) - { - *id += 6; - } -} -#endif - -#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API - -#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) -typedef struct OffsetZoneMapping { - int32_t offsetSeconds; - int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ - const char *stdID; - const char *dstID; - const char *olsonID; -} OffsetZoneMapping; - -enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; - -/* -This list tries to disambiguate a set of abbreviated timezone IDs and offsets -and maps it to an Olson ID. -Before adding anything to this list, take a look at -icu/source/tools/tzcode/tz.alias -Sometimes no daylight savings (0) is important to define due to aliases. -This list can be tested with icu/source/test/compat/tzone.pl -More values could be added to daylightType to increase precision. -*/ -static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { - {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, - {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, - {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, - {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, - {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, - {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, - {-36000, 2, "EST", "EST", "Australia/Sydney"}, - {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, - {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, - {-34200, 2, "CST", "CST", "Australia/South"}, - {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, - {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, - {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, - {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, - {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, - {-28800, 2, "WST", "WST", "Australia/West"}, - {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, - {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, - {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, - {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, - {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, - {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, - {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, - {-14400, 1, "AZT", "AZST", "Asia/Baku"}, - {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, - {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, - {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, - {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, - {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ - {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, - {-3600, 0, "CET", "WEST", "Africa/Algiers"}, - {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, - {0, 1, "GMT", "IST", "Europe/Dublin"}, - {0, 1, "GMT", "BST", "Europe/London"}, - {0, 0, "WET", "WEST", "Africa/Casablanca"}, - {0, 0, "WET", "WET", "Africa/El_Aaiun"}, - {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, - {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, - {10800, 1, "PMST", "PMDT", "America/Miquelon"}, - {10800, 2, "UYT", "UYST", "America/Montevideo"}, - {10800, 1, "WGT", "WGST", "America/Godthab"}, - {10800, 2, "BRT", "BRST", "Brazil/East"}, - {12600, 1, "NST", "NDT", "America/St_Johns"}, - {14400, 1, "AST", "ADT", "Canada/Atlantic"}, - {14400, 2, "AMT", "AMST", "America/Cuiaba"}, - {14400, 2, "CLT", "CLST", "Chile/Continental"}, - {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, - {14400, 2, "PYT", "PYST", "America/Asuncion"}, - {18000, 1, "CST", "CDT", "America/Havana"}, - {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ - {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, - {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, - {21600, 0, "CST", "CDT", "America/Guatemala"}, - {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ - {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ - {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, - {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ - {32400, 1, "AKST", "AKDT", "US/Alaska"}, - {36000, 1, "HAST", "HADT", "US/Aleutian"} -}; - -/*#define DEBUG_TZNAME*/ - -static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) -{ - int32_t idx; -#ifdef DEBUG_TZNAME - fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); -#endif - for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) - { - if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds - && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType - && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 - && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) - { - return OFFSET_ZONE_MAPPINGS[idx].olsonID; - } - } - return NULL; -} -#endif - -#ifdef SEARCH_TZFILE -#define MAX_READ_SIZE 512 - -typedef struct DefaultTZInfo { - char* defaultTZBuffer; - int64_t defaultTZFileSize; - FILE* defaultTZFilePtr; - UBool defaultTZstatus; - int32_t defaultTZPosition; -} DefaultTZInfo; - -/* - * This method compares the two files given to see if they are a match. - * It is currently use to compare two TZ files. - */ -static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { - FILE* file; - int64_t sizeFile; - int64_t sizeFileLeft; - int32_t sizeFileRead; - int32_t sizeFileToRead; - char bufferFile[MAX_READ_SIZE]; - UBool result = TRUE; - - if (tzInfo->defaultTZFilePtr == NULL) { - tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); - } - file = fopen(TZFileName, "r"); - - tzInfo->defaultTZPosition = 0; /* reset position to begin search */ - - if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { - /* First check that the file size are equal. */ - if (tzInfo->defaultTZFileSize == 0) { - fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); - tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); - } - fseek(file, 0, SEEK_END); - sizeFile = ftell(file); - sizeFileLeft = sizeFile; - - if (sizeFile != tzInfo->defaultTZFileSize) { - result = FALSE; - } else { - /* Store the data from the files in seperate buffers and - * compare each byte to determine equality. - */ - if (tzInfo->defaultTZBuffer == NULL) { - rewind(tzInfo->defaultTZFilePtr); - tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); - sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); - } - rewind(file); - while(sizeFileLeft > 0) { - uprv_memset(bufferFile, 0, MAX_READ_SIZE); - sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; - - sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); - if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { - result = FALSE; - break; - } - sizeFileLeft -= sizeFileRead; - tzInfo->defaultTZPosition += sizeFileRead; - } - } - } else { - result = FALSE; - } - - if (file != NULL) { - fclose(file); - } - - return result; -} - - -/* dirent also lists two entries: "." and ".." that we can safely ignore. */ -#define SKIP1 "." -#define SKIP2 ".." -static UBool U_CALLCONV putil_cleanup(void); -static CharString *gSearchTZFileResult = NULL; - -/* - * This method recursively traverses the directory given for a matching TZ file and returns the first match. - * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. - */ -static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { - DIR* dirp = NULL; - struct dirent* dirEntry = NULL; - char* result = NULL; - UErrorCode status = U_ZERO_ERROR; - - /* Save the current path */ - CharString curpath(path, -1, status); - if (U_FAILURE(status)) { - goto cleanupAndReturn; - } - - dirp = opendir(path); - if (dirp == NULL) { - goto cleanupAndReturn; - } - - if (gSearchTZFileResult == NULL) { - gSearchTZFileResult = new CharString; - if (gSearchTZFileResult == NULL) { - goto cleanupAndReturn; - } - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - } - - /* Check each entry in the directory. */ - while((dirEntry = readdir(dirp)) != NULL) { - const char* dirName = dirEntry->d_name; - if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { - /* Create a newpath with the new entry to test each entry in the directory. */ - CharString newpath(curpath, status); - newpath.append(dirName, -1, status); - if (U_FAILURE(status)) { - break; - } - - DIR* subDirp = NULL; - if ((subDirp = opendir(newpath.data())) != NULL) { - /* If this new path is a directory, make a recursive call with the newpath. */ - closedir(subDirp); - newpath.append('/', status); - if (U_FAILURE(status)) { - break; - } - result = searchForTZFile(newpath.data(), tzInfo); - /* - Have to get out here. Otherwise, we'd keep looking - and return the first match in the top-level directory - if there's a match in the top-level. If not, this function - would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). - It worked without this in most cases because we have a fallback of calling - localtime_r to figure out the default timezone. - */ - if (result != NULL) - break; - } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { - if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) { - int32_t amountToSkip = sizeof(TZZONEINFO) - 1; - if (amountToSkip > newpath.length()) { - amountToSkip = newpath.length(); - } - const char* zoneid = newpath.data() + amountToSkip; - skipZoneIDPrefix(&zoneid); - gSearchTZFileResult->clear(); - gSearchTZFileResult->append(zoneid, -1, status); - if (U_FAILURE(status)) { - break; - } - result = gSearchTZFileResult->data(); - /* Get out after the first one found. */ - break; - } - } - } - } - - cleanupAndReturn: - if (dirp) { - closedir(dirp); - } - return result; -} -#endif - -U_CAPI void U_EXPORT2 -uprv_tzname_clear_cache() -{ -#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) - gTimeZoneBufferPtr = NULL; -#endif -} - -U_CAPI const char* U_EXPORT2 -uprv_tzname(int n) -{ - (void)n; // Avoid unreferenced parameter warning. - const char *tzid = NULL; -#if U_PLATFORM_USES_ONLY_WIN32_API - tzid = uprv_detectWindowsTimeZone(); - - if (tzid != NULL) { - return tzid; - } - -#ifndef U_TZNAME - // The return value is free'd in timezone.cpp on Windows because - // the other code path returns a pointer to a heap location. - // If we don't have a name already, then tzname wouldn't be any - // better, so just fall back. - return uprv_strdup("Etc/UTC"); -#endif // !U_TZNAME - -#else - -/*#if U_PLATFORM_IS_DARWIN_BASED - int ret; - - tzid = getenv("TZFILE"); - if (tzid != NULL) { - return tzid; - } -#endif*/ - -/* This code can be temporarily disabled to test tzname resolution later on. */ -#ifndef DEBUG_TZNAME - tzid = getenv("TZ"); - if (tzid != NULL && isValidOlsonID(tzid) -#if U_PLATFORM == U_PF_SOLARIS - /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ - && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 -#endif - ) { - /* The colon forces tzset() to treat the remainder as zoneinfo path */ - if (tzid[0] == ':') { - tzid++; - } - /* This might be a good Olson ID. */ - skipZoneIDPrefix(&tzid); - return tzid; - } - /* else U_TZNAME will give a better result. */ -#endif - -#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) - /* Caller must handle threading issues */ - if (gTimeZoneBufferPtr == NULL) { - /* - This is a trick to look at the name of the link to get the Olson ID - because the tzfile contents is underspecified. - This isn't guaranteed to work because it may not be a symlink. - */ - int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1); - if (0 < ret) { - int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL); - gTimeZoneBuffer[ret] = 0; - char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL); - - if (tzZoneInfoTailPtr != NULL - && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen)) - { - return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen); - } - } else { -#if defined(SEARCH_TZFILE) - DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); - if (tzInfo != NULL) { - tzInfo->defaultTZBuffer = NULL; - tzInfo->defaultTZFileSize = 0; - tzInfo->defaultTZFilePtr = NULL; - tzInfo->defaultTZstatus = FALSE; - tzInfo->defaultTZPosition = 0; - - gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); - - /* Free previously allocated memory */ - if (tzInfo->defaultTZBuffer != NULL) { - uprv_free(tzInfo->defaultTZBuffer); - } - if (tzInfo->defaultTZFilePtr != NULL) { - fclose(tzInfo->defaultTZFilePtr); - } - uprv_free(tzInfo); - } - - if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { - return gTimeZoneBufferPtr; - } -#endif - } - } - else { - return gTimeZoneBufferPtr; - } -#endif -#endif - -#ifdef U_TZNAME -#if U_PLATFORM_USES_ONLY_WIN32_API - /* The return value is free'd in timezone.cpp on Windows because - * the other code path returns a pointer to a heap location. */ - return uprv_strdup(U_TZNAME[n]); -#else - /* - U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. - So we remap the abbreviation to an olson ID. - - Since Windows exposes a little more timezone information, - we normally don't use this code on Windows because - uprv_detectWindowsTimeZone should have already given the correct answer. - */ - { - struct tm juneSol, decemberSol; - int daylightType; - static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ - static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ - - /* This probing will tell us when daylight savings occurs. */ - localtime_r(&juneSolstice, &juneSol); - localtime_r(&decemberSolstice, &decemberSol); - if(decemberSol.tm_isdst > 0) { - daylightType = U_DAYLIGHT_DECEMBER; - } else if(juneSol.tm_isdst > 0) { - daylightType = U_DAYLIGHT_JUNE; - } else { - daylightType = U_DAYLIGHT_NONE; - } - tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); - if (tzid != NULL) { - return tzid; - } - } - return U_TZNAME[n]; -#endif -#else - return ""; -#endif -} - -/* Get and set the ICU data directory --------------------------------------- */ - -static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER; -static char *gDataDirectory = NULL; - -UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; -static CharString *gTimeZoneFilesDirectory = NULL; - -#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API - static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ - static bool gCorrectedPOSIXLocaleHeapAllocated = false; -#endif - -static UBool U_CALLCONV putil_cleanup(void) -{ - if (gDataDirectory && *gDataDirectory) { - uprv_free(gDataDirectory); - } - gDataDirectory = NULL; - gDataDirInitOnce.reset(); - - delete gTimeZoneFilesDirectory; - gTimeZoneFilesDirectory = NULL; - gTimeZoneFilesInitOnce.reset(); - -#ifdef SEARCH_TZFILE - delete gSearchTZFileResult; - gSearchTZFileResult = NULL; -#endif - -#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API - if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { - uprv_free(const_cast(gCorrectedPOSIXLocale)); - gCorrectedPOSIXLocale = NULL; - gCorrectedPOSIXLocaleHeapAllocated = false; - } -#endif - return TRUE; -} - -/* - * Set the data directory. - * Make a copy of the passed string, and set the global data dir to point to it. - */ -U_CAPI void U_EXPORT2 -u_setDataDirectory(const char *directory) { - char *newDataDir; - int32_t length; - - if(directory==NULL || *directory==0) { - /* A small optimization to prevent the malloc and copy when the - shared library is used, and this is a way to make sure that NULL - is never returned. - */ - newDataDir = (char *)""; - } - else { - length=(int32_t)uprv_strlen(directory); - newDataDir = (char *)uprv_malloc(length + 2); - /* Exit out if newDataDir could not be created. */ - if (newDataDir == NULL) { - return; - } - uprv_strcpy(newDataDir, directory); - -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - { - char *p; - while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) { - *p = U_FILE_SEP_CHAR; - } - } -#endif - } - - if (gDataDirectory && *gDataDirectory) { - uprv_free(gDataDirectory); - } - gDataDirectory = newDataDir; - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); -} - -U_CAPI UBool U_EXPORT2 -uprv_pathIsAbsolute(const char *path) -{ - if(!path || !*path) { - return FALSE; - } - - if(*path == U_FILE_SEP_CHAR) { - return TRUE; - } - -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - if(*path == U_FILE_ALT_SEP_CHAR) { - return TRUE; - } -#endif - -#if U_PLATFORM_USES_ONLY_WIN32_API - if( (((path[0] >= 'A') && (path[0] <= 'Z')) || - ((path[0] >= 'a') && (path[0] <= 'z'))) && - path[1] == ':' ) { - return TRUE; - } -#endif - - return FALSE; -} - -/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR - until some client wrapper makefiles are updated */ -#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR -# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) -# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" -# endif -#endif - -#if U_PLATFORM_HAS_WINUWP_API != 0 -// Helper function to get the ICU Data Directory under the Windows directory location. -static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) -{ -#if defined(ICU_DATA_DIR_WINDOWS) - wchar_t windowsPath[MAX_PATH]; - char windowsPathUtf8[MAX_PATH]; - - UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath)); - if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) { - // Convert UTF-16 to a UTF-8 string. - UErrorCode status = U_ZERO_ERROR; - int32_t windowsPathUtf8Len = 0; - u_strToUTF8(windowsPathUtf8, static_cast(UPRV_LENGTHOF(windowsPathUtf8)), - &windowsPathUtf8Len, reinterpret_cast(windowsPath), -1, &status); - - if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) && - (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) { - // Ensure it always has a separator, so we can append the ICU data path. - if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) { - windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR; - windowsPathUtf8[windowsPathUtf8Len] = '\0'; - } - // Check if the concatenated string will fit. - if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) { - uprv_strcpy(directoryBuffer, windowsPathUtf8); - uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS); - return TRUE; - } - } - } -#endif - - return FALSE; -} -#endif - -static void U_CALLCONV dataDirectoryInitFn() { - /* If we already have the directory, then return immediately. Will happen if user called - * u_setDataDirectory(). - */ - if (gDataDirectory) { - return; - } - - const char *path = NULL; -#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) - char datadir_path_buffer[PATH_MAX]; -#endif - - /* - When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to - override ICU's data with the ICU_DATA environment variable. This prevents - problems where multiple custom copies of ICU's specific version of data - are installed on a system. Either the application must define the data - directory with u_setDataDirectory, define ICU_DATA_DIR when compiling - ICU, set the data with udata_setCommonData or trust that all of the - required data is contained in ICU's data library that contains - the entry point defined by U_ICUDATA_ENTRY_POINT. - - There may also be some platforms where environment variables - are not allowed. - */ -# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO - /* First try to get the environment variable */ -# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv - path=getenv("ICU_DATA"); -# endif -# endif - - /* ICU_DATA_DIR may be set as a compile option. - * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time - * and is used only when data is built in archive mode eliminating the need - * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation - * directory of the data dat file. Users should use ICU_DATA_DIR if they want to - * set their own path. - */ -#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) - if(path==NULL || *path==0) { -# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) - const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); -# endif -# ifdef ICU_DATA_DIR - path=ICU_DATA_DIR; -# else - path=U_ICU_DATA_DEFAULT_DIR; -# endif -# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) - if (prefix != NULL) { - snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); - path=datadir_path_buffer; - } -# endif - } -#endif - -#if U_PLATFORM_HAS_WINUWP_API != 0 && defined(ICU_DATA_DIR_WINDOWS) - char datadir_path_buffer[MAX_PATH]; - if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { - path = datadir_path_buffer; - } -#endif - - if(path==NULL) { - /* It looks really bad, set it to something. */ -#if U_PLATFORM_HAS_WIN32_API - // Windows UWP will require icudtl.dat file in same directory as icuuc.dll - path = ".\\"; -#else - path = ""; -#endif - } - - u_setDataDirectory(path); - return; -} - -U_CAPI const char * U_EXPORT2 -u_getDataDirectory(void) { - umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); - return gDataDirectory; -} - -static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - gTimeZoneFilesDirectory->clear(); - gTimeZoneFilesDirectory->append(path, status); -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) - char *p = gTimeZoneFilesDirectory->data(); - while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) { - *p = U_FILE_SEP_CHAR; - } -#endif -} - -#define TO_STRING(x) TO_STRING_2(x) -#define TO_STRING_2(x) #x - -static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { - U_ASSERT(gTimeZoneFilesDirectory == NULL); - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - gTimeZoneFilesDirectory = new CharString(); - if (gTimeZoneFilesDirectory == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - const char *dir = ""; - -#if U_PLATFORM_HAS_WINUWP_API != 0 - // The UWP version does not support the environment variable setting, but can possibly pick them up from the Windows directory. - char datadir_path_buffer[MAX_PATH]; - if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { - dir = datadir_path_buffer; - } -#else - dir = getenv("ICU_TIMEZONE_FILES_DIR"); -#endif // U_PLATFORM_HAS_WINUWP_API - -#if defined(U_TIMEZONE_FILES_DIR) - if (dir == NULL) { - // Build time configuration setting. - dir = TO_STRING(U_TIMEZONE_FILES_DIR); - } -#endif - - if (dir == NULL) { - dir = ""; - } - - setTimeZoneFilesDir(dir, status); -} - - -U_CAPI const char * U_EXPORT2 -u_getTimeZoneFilesDirectory(UErrorCode *status) { - umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); - return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : ""; -} - -U_CAPI void U_EXPORT2 -u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { - umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); - setTimeZoneFilesDir(path, *status); - - // Note: this function does some extra churn, first setting based on the - // environment, then immediately replacing with the value passed in. - // The logic is simpler that way, and performance shouldn't be an issue. -} - - -#if U_POSIX_LOCALE -/* A helper function used by uprv_getPOSIXIDForDefaultLocale and - * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for - * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. - */ -static const char *uprv_getPOSIXIDForCategory(int category) -{ - const char* posixID = NULL; - if (category == LC_MESSAGES || category == LC_CTYPE) { - /* - * On Solaris two different calls to setlocale can result in - * different values. Only get this value once. - * - * We must check this first because an application can set this. - * - * LC_ALL can't be used because it's platform dependent. The LANG - * environment variable seems to affect LC_CTYPE variable by default. - * Here is what setlocale(LC_ALL, NULL) can return. - * HPUX can return 'C C C C C C C' - * Solaris can return /en_US/C/C/C/C/C on the second try. - * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... - * - * The default codepage detection also needs to use LC_CTYPE. - * - * Do not call setlocale(LC_*, "")! Using an empty string instead - * of NULL, will modify the libc behavior. - */ - posixID = setlocale(category, NULL); - if ((posixID == 0) - || (uprv_strcmp("C", posixID) == 0) - || (uprv_strcmp("POSIX", posixID) == 0)) - { - /* Maybe we got some garbage. Try something more reasonable */ - posixID = getenv("LC_ALL"); - /* Solaris speaks POSIX - See IEEE Std 1003.1-2008 - * This is needed to properly handle empty env. variables - */ -#if U_PLATFORM == U_PF_SOLARIS - if ((posixID == 0) || (posixID[0] == '\0')) { - posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); - if ((posixID == 0) || (posixID[0] == '\0')) { -#else - if (posixID == 0) { - posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); - if (posixID == 0) { -#endif - posixID = getenv("LANG"); - } - } - } - } - if ((posixID==0) - || (uprv_strcmp("C", posixID) == 0) - || (uprv_strcmp("POSIX", posixID) == 0)) - { - /* Nothing worked. Give it a nice POSIX default value. */ - posixID = "en_US_POSIX"; - } - return posixID; -} - -/* Return just the POSIX id for the default locale, whatever happens to be in - * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. - */ -static const char *uprv_getPOSIXIDForDefaultLocale(void) -{ - static const char* posixID = NULL; - if (posixID == 0) { - posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); - } - return posixID; -} - -#if !U_CHARSET_IS_UTF8 -/* Return just the POSIX id for the default codepage, whatever happens to be in - * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. - */ -static const char *uprv_getPOSIXIDForDefaultCodepage(void) -{ - static const char* posixID = NULL; - if (posixID == 0) { - posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); - } - return posixID; -} -#endif -#endif - -/* NOTE: The caller should handle thread safety */ -U_CAPI const char* U_EXPORT2 -uprv_getDefaultLocaleID() -{ -#if U_POSIX_LOCALE -/* - Note that: (a '!' means the ID is improper somehow) - LC_ALL ----> default_loc codepage --------------------------------------------------------- - ab.CD ab CD - ab@CD ab__CD - - ab@CD.EF ab__CD EF - - ab_CD.EF@GH ab_CD_GH EF - -Some 'improper' ways to do the same as above: - ! ab_CD@GH.EF ab_CD_GH EF - ! ab_CD.EF@GH.IJ ab_CD_GH EF - ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF - - _CD@GH _CD_GH - - _CD.EF@GH _CD_GH EF - -The variant cannot have dots in it. -The 'rightmost' variant (@xxx) wins. -The leftmost codepage (.xxx) wins. -*/ - char *correctedPOSIXLocale = 0; - const char* posixID = uprv_getPOSIXIDForDefaultLocale(); - const char *p; - const char *q; - int32_t len; - - /* Format: (no spaces) - ll [ _CC ] [ . MM ] [ @ VV] - - l = lang, C = ctry, M = charmap, V = variant - */ - - if (gCorrectedPOSIXLocale != NULL) { - return gCorrectedPOSIXLocale; - } - - if ((p = uprv_strchr(posixID, '.')) != NULL) { - /* assume new locale can't be larger than old one? */ - correctedPOSIXLocale = static_cast(uprv_malloc(uprv_strlen(posixID)+1)); - /* Exit on memory allocation error. */ - if (correctedPOSIXLocale == NULL) { - return NULL; - } - uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); - correctedPOSIXLocale[p-posixID] = 0; - - /* do not copy after the @ */ - if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { - correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; - } - } - - /* Note that we scan the *uncorrected* ID. */ - if ((p = uprv_strrchr(posixID, '@')) != NULL) { - if (correctedPOSIXLocale == NULL) { - /* new locale can be 1 char longer than old one if @ -> __ */ - correctedPOSIXLocale = static_cast(uprv_malloc(uprv_strlen(posixID)+2)); - /* Exit on memory allocation error. */ - if (correctedPOSIXLocale == NULL) { - return NULL; - } - uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); - correctedPOSIXLocale[p-posixID] = 0; - } - p++; - - /* Take care of any special cases here.. */ - if (!uprv_strcmp(p, "nynorsk")) { - p = "NY"; - /* Don't worry about no__NY. In practice, it won't appear. */ - } - - if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { - uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */ - } - else { - uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ - } - - if ((q = uprv_strchr(p, '.')) != NULL) { - /* How big will the resulting string be? */ - len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); - uprv_strncat(correctedPOSIXLocale, p, q-p); - correctedPOSIXLocale[len] = 0; - } - else { - /* Anything following the @ sign */ - uprv_strcat(correctedPOSIXLocale, p); - } - - /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? - * How about 'russian' -> 'ru'? - * Many of the other locales using ISO codes will be handled by the - * canonicalization functions in uloc_getDefault. - */ - } - - /* Was a correction made? */ - if (correctedPOSIXLocale != NULL) { - posixID = correctedPOSIXLocale; - } - else { - /* copy it, just in case the original pointer goes away. See j2395 */ - correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); - /* Exit on memory allocation error. */ - if (correctedPOSIXLocale == NULL) { - return NULL; - } - posixID = uprv_strcpy(correctedPOSIXLocale, posixID); - } - - if (gCorrectedPOSIXLocale == NULL) { - gCorrectedPOSIXLocale = correctedPOSIXLocale; - gCorrectedPOSIXLocaleHeapAllocated = true; - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - correctedPOSIXLocale = NULL; - } - - if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ - uprv_free(correctedPOSIXLocale); - } - - return posixID; - -#elif U_PLATFORM_USES_ONLY_WIN32_API -#define POSIX_LOCALE_CAPACITY 64 - UErrorCode status = U_ZERO_ERROR; - char *correctedPOSIXLocale = nullptr; - - // If we have already figured this out just use the cached value - if (gCorrectedPOSIXLocale != nullptr) { - return gCorrectedPOSIXLocale; - } - - // No cached value, need to determine the current value - static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; - int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH); - - // Now we should have a Windows locale name that needs converted to the POSIX style. - if (length > 0) // If length is 0, then the GetLocaleInfoEx failed. - { - // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) - char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; - - int32_t i; - for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) - { - if (windowsLocale[i] == '_') - { - modifiedWindowsLocale[i] = '-'; - } - else - { - modifiedWindowsLocale[i] = static_cast(windowsLocale[i]); - } - - if (modifiedWindowsLocale[i] == '\0') - { - break; - } - } - - if (i >= UPRV_LENGTHOF(modifiedWindowsLocale)) - { - // Ran out of room, can't really happen, maybe we'll be lucky about a matching - // locale when tags are dropped - modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0'; - } - - // Now normalize the resulting name - correctedPOSIXLocale = static_cast(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); - /* TODO: Should we just exit on memory allocation failure? */ - if (correctedPOSIXLocale) - { - int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); - if (U_SUCCESS(status)) - { - *(correctedPOSIXLocale + posixLen) = 0; - gCorrectedPOSIXLocale = correctedPOSIXLocale; - gCorrectedPOSIXLocaleHeapAllocated = true; - ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); - } - else - { - uprv_free(correctedPOSIXLocale); - } - } - } - - // If unable to find a locale we can agree upon, use en-US by default - if (gCorrectedPOSIXLocale == nullptr) { - gCorrectedPOSIXLocale = "en_US"; - } - return gCorrectedPOSIXLocale; - -#elif U_PLATFORM == U_PF_OS400 - /* locales are process scoped and are by definition thread safe */ - static char correctedLocale[64]; - const char *localeID = getenv("LC_ALL"); - char *p; - - if (localeID == NULL) - localeID = getenv("LANG"); - if (localeID == NULL) - localeID = setlocale(LC_ALL, NULL); - /* Make sure we have something... */ - if (localeID == NULL) - return "en_US_POSIX"; - - /* Extract the locale name from the path. */ - if((p = uprv_strrchr(localeID, '/')) != NULL) - { - /* Increment p to start of locale name. */ - p++; - localeID = p; - } - - /* Copy to work location. */ - uprv_strcpy(correctedLocale, localeID); - - /* Strip off the '.locale' extension. */ - if((p = uprv_strchr(correctedLocale, '.')) != NULL) { - *p = 0; - } - - /* Upper case the locale name. */ - T_CString_toUpperCase(correctedLocale); - - /* See if we are using the POSIX locale. Any of the - * following are equivalent and use the same QLGPGCMA - * (POSIX) locale. - * QLGPGCMA2 means UCS2 - * QLGPGCMA_4 means UTF-32 - * QLGPGCMA_8 means UTF-8 - */ - if ((uprv_strcmp("C", correctedLocale) == 0) || - (uprv_strcmp("POSIX", correctedLocale) == 0) || - (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) - { - uprv_strcpy(correctedLocale, "en_US_POSIX"); - } - else - { - int16_t LocaleLen; - - /* Lower case the lang portion. */ - for(p = correctedLocale; *p != 0 && *p != '_'; p++) - { - *p = uprv_tolower(*p); - } - - /* Adjust for Euro. After '_E' add 'URO'. */ - LocaleLen = uprv_strlen(correctedLocale); - if (correctedLocale[LocaleLen - 2] == '_' && - correctedLocale[LocaleLen - 1] == 'E') - { - uprv_strcat(correctedLocale, "URO"); - } - - /* If using Lotus-based locale then convert to - * equivalent non Lotus. - */ - else if (correctedLocale[LocaleLen - 2] == '_' && - correctedLocale[LocaleLen - 1] == 'L') - { - correctedLocale[LocaleLen - 2] = 0; - } - - /* There are separate simplified and traditional - * locales called zh_HK_S and zh_HK_T. - */ - else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) - { - uprv_strcpy(correctedLocale, "zh_HK"); - } - - /* A special zh_CN_GBK locale... - */ - else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) - { - uprv_strcpy(correctedLocale, "zh_CN"); - } - - } - - return correctedLocale; -#endif - -} - -#if !U_CHARSET_IS_UTF8 -#if U_POSIX_LOCALE -/* -Due to various platform differences, one platform may specify a charset, -when they really mean a different charset. Remap the names so that they are -compatible with ICU. Only conflicting/ambiguous aliases should be resolved -here. Before adding anything to this function, please consider adding unique -names to the ICU alias table in the data directory. -*/ -static const char* -remapPlatformDependentCodepage(const char *locale, const char *name) { - if (locale != NULL && *locale == 0) { - /* Make sure that an empty locale is handled the same way. */ - locale = NULL; - } - if (name == NULL) { - return NULL; - } -#if U_PLATFORM == U_PF_AIX - if (uprv_strcmp(name, "IBM-943") == 0) { - /* Use the ASCII compatible ibm-943 */ - name = "Shift-JIS"; - } - else if (uprv_strcmp(name, "IBM-1252") == 0) { - /* Use the windows-1252 that contains the Euro */ - name = "IBM-5348"; - } -#elif U_PLATFORM == U_PF_SOLARIS - if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { - /* Solaris underspecifies the "EUC" name. */ - if (uprv_strcmp(locale, "zh_CN") == 0) { - name = "EUC-CN"; - } - else if (uprv_strcmp(locale, "zh_TW") == 0) { - name = "EUC-TW"; - } - else if (uprv_strcmp(locale, "ko_KR") == 0) { - name = "EUC-KR"; - } - } - else if (uprv_strcmp(name, "eucJP") == 0) { - /* - ibm-954 is the best match. - ibm-33722 is the default for eucJP (similar to Windows). - */ - name = "eucjis"; - } - else if (uprv_strcmp(name, "646") == 0) { - /* - * The default codepage given by Solaris is 646 but the C library routines treat it as if it was - * ISO-8859-1 instead of US-ASCII(646). - */ - name = "ISO-8859-1"; - } -#elif U_PLATFORM_IS_DARWIN_BASED - if (locale == NULL && *name == 0) { - /* - No locale was specified, and an empty name was passed in. - This usually indicates that nl_langinfo didn't return valid information. - Mac OS X uses UTF-8 by default (especially the locale data and console). - */ - name = "UTF-8"; - } - else if (uprv_strcmp(name, "CP949") == 0) { - /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ - name = "EUC-KR"; - } - else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { - /* - * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. - */ - name = "UTF-8"; - } -#elif U_PLATFORM == U_PF_BSD - if (uprv_strcmp(name, "CP949") == 0) { - /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ - name = "EUC-KR"; - } -#elif U_PLATFORM == U_PF_HPUX - if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { - /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ - /* zh_TW.big5 is not the same charset as zh_HK.big5! */ - name = "hkbig5"; - } - else if (uprv_strcmp(name, "eucJP") == 0) { - /* - ibm-1350 is the best match, but unavailable. - ibm-954 is mostly a superset of ibm-1350. - ibm-33722 is the default for eucJP (similar to Windows). - */ - name = "eucjis"; - } -#elif U_PLATFORM == U_PF_LINUX - if (locale != NULL && uprv_strcmp(name, "euc") == 0) { - /* Linux underspecifies the "EUC" name. */ - if (uprv_strcmp(locale, "korean") == 0) { - name = "EUC-KR"; - } - else if (uprv_strcmp(locale, "japanese") == 0) { - /* See comment below about eucJP */ - name = "eucjis"; - } - } - else if (uprv_strcmp(name, "eucjp") == 0) { - /* - ibm-1350 is the best match, but unavailable. - ibm-954 is mostly a superset of ibm-1350. - ibm-33722 is the default for eucJP (similar to Windows). - */ - name = "eucjis"; - } - else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && - (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { - /* - * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. - */ - name = "UTF-8"; - } - /* - * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of - * it by falling back to 'US-ASCII' when NULL is returned from this - * function. So, we don't have to worry about it here. - */ -#endif - /* return NULL when "" is passed in */ - if (*name == 0) { - name = NULL; - } - return name; -} - -static const char* -getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) -{ - char localeBuf[100]; - const char *name = NULL; - char *variant = NULL; - - if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { - size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); - uprv_strncpy(localeBuf, localeName, localeCapacity); - localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ - name = uprv_strncpy(buffer, name+1, buffCapacity); - buffer[buffCapacity-1] = 0; /* ensure NULL termination */ - if ((variant = const_cast(uprv_strchr(name, '@'))) != NULL) { - *variant = 0; - } - name = remapPlatformDependentCodepage(localeBuf, name); - } - return name; -} -#endif - -static const char* -int_getDefaultCodepage() -{ -#if U_PLATFORM == U_PF_OS400 - uint32_t ccsid = 37; /* Default to ibm-37 */ - static char codepage[64]; - Qwc_JOBI0400_t jobinfo; - Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ - - EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", - "* ", " ", &error); - - if (error.Bytes_Available == 0) { - if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { - ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; - } - else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { - ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; - } - /* else use the default */ - } - sprintf(codepage,"ibm-%d", ccsid); - return codepage; - -#elif U_PLATFORM == U_PF_OS390 - static char codepage[64]; - - strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); - strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); - codepage[63] = 0; /* NULL terminate */ - - return codepage; - -#elif U_PLATFORM_USES_ONLY_WIN32_API - static char codepage[64]; - DWORD codepageNumber = 0; - -#if U_PLATFORM_HAS_WINUWP_API > 0 - // UWP doesn't have a direct API to get the default ACP as Microsoft would rather - // have folks use Unicode than a "system" code page, however this is the same - // codepage as the system default locale codepage. (FWIW, the system locale is - // ONLY used for codepage, it should never be used for anything else) - GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, - (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR)); -#else - // Win32 apps can call GetACP - codepageNumber = GetACP(); -#endif - // Special case for UTF-8 - if (codepageNumber == 65001) - { - return "UTF-8"; - } - // Windows codepages can look like windows-1252, so format the found number - // the numbers are eclectic, however all valid system code pages, besides UTF-8 - // are between 3 and 19999 - if (codepageNumber > 0 && codepageNumber < 20000) - { - sprintf(codepage, "windows-%ld", codepageNumber); - return codepage; - } - // If the codepage number call failed then return UTF-8 - return "UTF-8"; - -#elif U_POSIX_LOCALE - static char codesetName[100]; - const char *localeName = NULL; - const char *name = NULL; - - localeName = uprv_getPOSIXIDForDefaultCodepage(); - uprv_memset(codesetName, 0, sizeof(codesetName)); - /* On Solaris nl_langinfo returns C locale values unless setlocale - * was called earlier. - */ -#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS) - /* When available, check nl_langinfo first because it usually gives more - useful names. It depends on LC_CTYPE. - nl_langinfo may use the same buffer as setlocale. */ - { - const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); -#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED - /* - * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 - * instead of ASCII. - */ - if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { - codeset = remapPlatformDependentCodepage(localeName, codeset); - } else -#endif - { - codeset = remapPlatformDependentCodepage(NULL, codeset); - } - - if (codeset != NULL) { - uprv_strncpy(codesetName, codeset, sizeof(codesetName)); - codesetName[sizeof(codesetName)-1] = 0; - return codesetName; - } - } -#endif - - /* Use setlocale in a nice way, and then check some environment variables. - Maybe the application used setlocale already. - */ - uprv_memset(codesetName, 0, sizeof(codesetName)); - name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); - if (name) { - /* if we can find the codeset name from setlocale, return that. */ - return name; - } - - if (*codesetName == 0) - { - /* Everything failed. Return US ASCII (ISO 646). */ - (void)uprv_strcpy(codesetName, "US-ASCII"); - } - return codesetName; -#else - return "US-ASCII"; -#endif -} - - -U_CAPI const char* U_EXPORT2 -uprv_getDefaultCodepage() -{ - static char const *name = NULL; - umtx_lock(NULL); - if (name == NULL) { - name = int_getDefaultCodepage(); - } - umtx_unlock(NULL); - return name; -} -#endif /* !U_CHARSET_IS_UTF8 */ - - -/* end of platform-specific implementation -------------- */ - -/* version handling --------------------------------------------------------- */ - -U_CAPI void U_EXPORT2 -u_versionFromString(UVersionInfo versionArray, const char *versionString) { - char *end; - uint16_t part=0; - - if(versionArray==NULL) { - return; - } - - if(versionString!=NULL) { - for(;;) { - versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); - if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { - break; - } - versionString=end+1; - } - } - - while(partU_MAX_VERSION_STRING_LENGTH) { - len = U_MAX_VERSION_STRING_LENGTH; - } - u_UCharsToChars(versionString, versionChars, len); - versionChars[len]=0; - u_versionFromString(versionArray, versionChars); - } -} - -U_CAPI void U_EXPORT2 -u_versionToString(const UVersionInfo versionArray, char *versionString) { - uint16_t count, part; - uint8_t field; - - if(versionString==NULL) { - return; - } - - if(versionArray==NULL) { - versionString[0]=0; - return; - } - - /* count how many fields need to be written */ - for(count=4; count>0 && versionArray[count-1]==0; --count) { - } - - if(count <= 1) { - count = 2; - } - - /* write the first part */ - /* write the decimal field value */ - field=versionArray[0]; - if(field>=100) { - *versionString++=(char)('0'+field/100); - field%=100; - } - if(field>=10) { - *versionString++=(char)('0'+field/10); - field%=10; - } - *versionString++=(char)('0'+field); - - /* write the following parts */ - for(part=1; part=100) { - *versionString++=(char)('0'+field/100); - field%=100; - } - if(field>=10) { - *versionString++=(char)('0'+field/10); - field%=10; - } - *versionString++=(char)('0'+field); - } - - /* NUL-terminate */ - *versionString=0; -} - -U_CAPI void U_EXPORT2 -u_getVersion(UVersionInfo versionArray) { - (void)copyright; // Suppress unused variable warning from clang. - u_versionFromString(versionArray, U_ICU_VERSION); -} - -/** - * icucfg.h dependent code - */ - -#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API - -#if HAVE_DLFCN_H -#ifdef __MVS__ -#ifndef __SUSV3 -#define __SUSV3 1 -#endif -#endif -#include -#endif /* HAVE_DLFCN_H */ - -U_INTERNAL void * U_EXPORT2 -uprv_dl_open(const char *libName, UErrorCode *status) { - void *ret = NULL; - if(U_FAILURE(*status)) return ret; - ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); - if(ret==NULL) { -#ifdef U_TRACE_DYLOAD - printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); -#endif - *status = U_MISSING_RESOURCE_ERROR; - } - return ret; -} - -U_INTERNAL void U_EXPORT2 -uprv_dl_close(void *lib, UErrorCode *status) { - if(U_FAILURE(*status)) return; - dlclose(lib); -} - -U_INTERNAL UVoidFunction* U_EXPORT2 -uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { - union { - UVoidFunction *fp; - void *vp; - } uret; - uret.fp = NULL; - if(U_FAILURE(*status)) return uret.fp; - uret.vp = dlsym(lib, sym); - if(uret.vp == NULL) { -#ifdef U_TRACE_DYLOAD - printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); -#endif - *status = U_MISSING_RESOURCE_ERROR; - } - return uret.fp; -} - -#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API - -/* Windows API implementation. */ -// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */ - -U_INTERNAL void * U_EXPORT2 -uprv_dl_open(const char *libName, UErrorCode *status) { - HMODULE lib = NULL; - - if(U_FAILURE(*status)) return NULL; - - lib = LoadLibraryA(libName); - - if(lib==NULL) { - *status = U_MISSING_RESOURCE_ERROR; - } - - return (void*)lib; -} - -U_INTERNAL void U_EXPORT2 -uprv_dl_close(void *lib, UErrorCode *status) { - HMODULE handle = (HMODULE)lib; - if(U_FAILURE(*status)) return; - - FreeLibrary(handle); - - return; -} - -U_INTERNAL UVoidFunction* U_EXPORT2 -uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { - HMODULE handle = (HMODULE)lib; - UVoidFunction* addr = NULL; - - if(U_FAILURE(*status) || lib==NULL) return NULL; - - addr = (UVoidFunction*)GetProcAddress(handle, sym); - - if(addr==NULL) { - DWORD lastError = GetLastError(); - if(lastError == ERROR_PROC_NOT_FOUND) { - *status = U_MISSING_RESOURCE_ERROR; - } else { - *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ - } - } - - return addr; -} - -#else - -/* No dynamic loading, null (nonexistent) implementation. */ - -U_INTERNAL void * U_EXPORT2 -uprv_dl_open(const char *libName, UErrorCode *status) { - (void)libName; - if(U_FAILURE(*status)) return NULL; - *status = U_UNSUPPORTED_ERROR; - return NULL; -} - -U_INTERNAL void U_EXPORT2 -uprv_dl_close(void *lib, UErrorCode *status) { - (void)lib; - if(U_FAILURE(*status)) return; - *status = U_UNSUPPORTED_ERROR; - return; -} - -U_INTERNAL UVoidFunction* U_EXPORT2 -uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { - (void)lib; - (void)sym; - if(U_SUCCESS(*status)) { - *status = U_UNSUPPORTED_ERROR; - } - return (UVoidFunction*)NULL; -} - -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/putilimp.h b/deps/node/deps/icu-small/source/common/putilimp.h deleted file mode 100644 index f744746b..00000000 --- a/deps/node/deps/icu-small/source/common/putilimp.h +++ /dev/null @@ -1,669 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : putilimp.h -* -* Date Name Description -* 10/17/04 grhoten Move internal functions from putil.h to this file. -****************************************************************************** -*/ - -#ifndef PUTILIMP_H -#define PUTILIMP_H - -#include "unicode/utypes.h" -#include "unicode/putil.h" - -/** - * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC - * Nearly all CPUs and compilers implement a right-shift of a signed integer - * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB)) - * into the vacated bits (sign extension). - * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1. - * - * This can be useful for storing a signed value in the upper bits - * and another bit field in the lower bits. - * The signed value can be retrieved by simple right-shifting. - * - * This is consistent with the Java language. - * - * However, the C standard allows compilers to implement a right-shift of a signed integer - * as a Logical Shift Right which copies a 0 into the vacated bits. - * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff. - * - * Code that depends on the natural behavior should be guarded with this macro, - * with an alternate path for unusual platforms. - * @internal - */ -#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC - /* Use the predefined value. */ -#else - /* - * Nearly all CPUs & compilers implement a right-shift of a signed integer - * as an Arithmetic Shift Right (with sign extension). - */ -# define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1 -#endif - -/** Define this to 1 if your platform supports IEEE 754 floating point, - to 0 if it does not. */ -#ifndef IEEE_754 -# define IEEE_754 1 -#endif - -/** - * uintptr_t is an optional part of the standard definitions in stdint.h. - * The opengroup.org documentation for stdint.h says - * "On XSI-conformant systems, the intptr_t and uintptr_t types are required; - * otherwise, they are optional." - * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well. - * - * Do not use ptrdiff_t since it is signed. size_t is unsigned. - */ -/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */ -#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390) -typedef size_t uintptr_t; -#endif - -/*===========================================================================*/ -/** @{ Information about POSIX support */ -/*===========================================================================*/ - -#ifdef U_HAVE_NL_LANGINFO_CODESET - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX -# define U_HAVE_NL_LANGINFO_CODESET 0 -#else -# define U_HAVE_NL_LANGINFO_CODESET 1 -#endif - -#ifdef U_NL_LANGINFO_CODESET - /* Use the predefined value. */ -#elif !U_HAVE_NL_LANGINFO_CODESET -# define U_NL_LANGINFO_CODESET -1 -#elif U_PLATFORM == U_PF_OS400 - /* not defined */ -#else -# define U_NL_LANGINFO_CODESET CODESET -#endif - -#if defined(U_TZSET) || defined(U_HAVE_TZSET) - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API - // UWP doesn't support tzset or environment variables for tz -#if U_PLATFORM_HAS_WINUWP_API == 0 -# define U_TZSET _tzset -#endif -#elif U_PLATFORM == U_PF_OS400 - /* not defined */ -#else -# define U_TZSET tzset -#endif - -#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE) - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_ANDROID -# define U_TIMEZONE timezone -#elif defined(__UCLIBC__) - // uClibc does not have __timezone or _timezone. -#elif defined(_NEWLIB_VERSION) -# define U_TIMEZONE _timezone -#elif defined(__GLIBC__) - // glibc -# define U_TIMEZONE __timezone -#elif U_PLATFORM_IS_LINUX_BASED - // not defined -#elif U_PLATFORM_USES_ONLY_WIN32_API -# define U_TIMEZONE _timezone -#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__) - /* not defined */ -#elif U_PLATFORM == U_PF_OS400 - /* not defined */ -#elif U_PLATFORM == U_PF_IPHONE - /* not defined */ -#else -# define U_TIMEZONE timezone -#endif - -#if defined(U_TZNAME) || defined(U_HAVE_TZNAME) - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API - /* not usable on all windows platforms */ -#if U_PLATFORM_HAS_WINUWP_API == 0 -# define U_TZNAME _tzname -#endif -#elif U_PLATFORM == U_PF_OS400 - /* not defined */ -#else -# define U_TZNAME tzname -#endif - -#ifdef U_HAVE_MMAP - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API -# define U_HAVE_MMAP 0 -#else -# define U_HAVE_MMAP 1 -#endif - -#ifdef U_HAVE_POPEN - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API -# define U_HAVE_POPEN 0 -#elif U_PLATFORM == U_PF_OS400 -# define U_HAVE_POPEN 0 -#else -# define U_HAVE_POPEN 1 -#endif - -/** - * \def U_HAVE_DIRENT_H - * Defines whether dirent.h is available. - * @internal - */ -#ifdef U_HAVE_DIRENT_H - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API -# define U_HAVE_DIRENT_H 0 -#else -# define U_HAVE_DIRENT_H 1 -#endif - -/** @} */ - -/*===========================================================================*/ -/** @{ GCC built in functions for atomic memory operations */ -/*===========================================================================*/ - -/** - * \def U_HAVE_GCC_ATOMICS - * @internal - */ -#ifdef U_HAVE_GCC_ATOMICS - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_MINGW - #define U_HAVE_GCC_ATOMICS 0 -#elif U_GCC_MAJOR_MINOR >= 404 || defined(__clang__) - /* TODO: Intel icc and IBM xlc on AIX also support gcc atomics. (Intel originated them.) - * Add them for these compilers. - * Note: Clang sets __GNUC__ defines for version 4.2, so misses the 4.4 test here. - */ -# define U_HAVE_GCC_ATOMICS 1 -#else -# define U_HAVE_GCC_ATOMICS 0 -#endif - -/** @} */ - -/** - * \def U_HAVE_STD_ATOMICS - * Defines whether to use the standard C++11 functions - * If false, ICU will fall back to compiler or platform specific alternatives. - * Note: support for these fall back options for atomics will be removed in a future version - * of ICU, and the use of C++ 11 atomics will be required. - * @internal - */ -#ifdef U_HAVE_STD_ATOMICS - /* Use the predefined value. */ -#else -# define U_HAVE_STD_ATOMICS 1 -#endif - -/** - * \def U_HAVE_CLANG_ATOMICS - * Defines whether Clang c11 style built-in atomics are available. - * These are used in preference to gcc atomics when both are available. - */ -#ifdef U_HAVE_CLANG_ATOMICS - /* Use the predefined value. */ -#elif __has_builtin(__c11_atomic_load) && \ - __has_builtin(__c11_atomic_store) && \ - __has_builtin(__c11_atomic_fetch_add) && \ - __has_builtin(__c11_atomic_fetch_sub) -# define U_HAVE_CLANG_ATOMICS 1 -#else -# define U_HAVE_CLANG_ATOMICS 0 -#endif - -/*===========================================================================*/ -/** @{ Programs used by ICU code */ -/*===========================================================================*/ - -/** - * \def U_MAKE_IS_NMAKE - * Defines whether the "make" program is Windows nmake. - */ -#ifdef U_MAKE_IS_NMAKE - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_WINDOWS -# define U_MAKE_IS_NMAKE 1 -#else -# define U_MAKE_IS_NMAKE 0 -#endif - -/** @} */ - -/*==========================================================================*/ -/* Platform utilities */ -/*==========================================================================*/ - -/** - * Platform utilities isolates the platform dependencies of the - * library. For each platform which this code is ported to, these - * functions may have to be re-implemented. - */ - -/** - * Floating point utility to determine if a double is Not a Number (NaN). - * @internal - */ -U_INTERNAL UBool U_EXPORT2 uprv_isNaN(double d); -/** - * Floating point utility to determine if a double has an infinite value. - * @internal - */ -U_INTERNAL UBool U_EXPORT2 uprv_isInfinite(double d); -/** - * Floating point utility to determine if a double has a positive infinite value. - * @internal - */ -U_INTERNAL UBool U_EXPORT2 uprv_isPositiveInfinity(double d); -/** - * Floating point utility to determine if a double has a negative infinite value. - * @internal - */ -U_INTERNAL UBool U_EXPORT2 uprv_isNegativeInfinity(double d); -/** - * Floating point utility that returns a Not a Number (NaN) value. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_getNaN(void); -/** - * Floating point utility that returns an infinite value. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_getInfinity(void); - -/** - * Floating point utility to truncate a double. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_trunc(double d); -/** - * Floating point utility to calculate the floor of a double. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_floor(double d); -/** - * Floating point utility to calculate the ceiling of a double. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_ceil(double d); -/** - * Floating point utility to calculate the absolute value of a double. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_fabs(double d); -/** - * Floating point utility to calculate the fractional and integer parts of a double. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_modf(double d, double* pinteger); -/** - * Floating point utility to calculate the remainder of a double divided by another double. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_fmod(double d, double y); -/** - * Floating point utility to calculate d to the power of exponent (d^exponent). - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_pow(double d, double exponent); -/** - * Floating point utility to calculate 10 to the power of exponent (10^exponent). - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_pow10(int32_t exponent); -/** - * Floating point utility to calculate the maximum value of two doubles. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_fmax(double d, double y); -/** - * Floating point utility to calculate the minimum value of two doubles. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_fmin(double d, double y); -/** - * Private utility to calculate the maximum value of two integers. - * @internal - */ -U_INTERNAL int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y); -/** - * Private utility to calculate the minimum value of two integers. - * @internal - */ -U_INTERNAL int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y); - -#if U_IS_BIG_ENDIAN -# define uprv_isNegative(number) (*((signed char *)&(number))<0) -#else -# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0) -#endif - -/** - * Return the largest positive number that can be represented by an integer - * type of arbitrary bit length. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_maxMantissa(void); - -/** - * Floating point utility to calculate the logarithm of a double. - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_log(double d); - -/** - * Does common notion of rounding e.g. uprv_floor(x + 0.5); - * @param x the double number - * @return the rounded double - * @internal - */ -U_INTERNAL double U_EXPORT2 uprv_round(double x); - -/** - * Adds the signed integers a and b, storing the result in res. - * Checks for signed integer overflow. - * Similar to the GCC/Clang extension __builtin_add_overflow - * - * @param a The first operand. - * @param b The second operand. - * @param res a + b - * @return true if overflow occurred; false if no overflow occurred. - * @internal - */ -U_INTERNAL UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res); - -/** - * Multiplies the signed integers a and b, storing the result in res. - * Checks for signed integer overflow. - * Similar to the GCC/Clang extension __builtin_mul_overflow - * - * @param a The first multiplicand. - * @param b The second multiplicand. - * @param res a * b - * @return true if overflow occurred; false if no overflow occurred. - * @internal - */ -U_INTERNAL UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res); - -#if 0 -/** - * Returns the number of digits after the decimal point in a double number x. - * - * @param x the double number - * @return the number of digits after the decimal point in a double number x. - * @internal - */ -/*U_INTERNAL int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/ -#endif - -#if !U_CHARSET_IS_UTF8 -/** - * Please use ucnv_getDefaultName() instead. - * Return the default codepage for this platform and locale. - * This function can call setlocale() on Unix platforms. Please read the - * platform documentation on setlocale() before calling this function. - * @return the default codepage for this platform - * @internal - */ -U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void); -#endif - -/** - * Please use uloc_getDefault() instead. - * Return the default locale ID string by querying the system, or - * zero if one cannot be found. - * This function can call setlocale() on Unix platforms. Please read the - * platform documentation on setlocale() before calling this function. - * @return the default locale ID string - * @internal - */ -U_INTERNAL const char* U_EXPORT2 uprv_getDefaultLocaleID(void); - -/** - * Time zone utilities - * - * Wrappers for C runtime library functions relating to timezones. - * The t_tzset() function (similar to tzset) uses the current setting - * of the environment variable TZ to assign values to three global - * variables: daylight, timezone, and tzname. These variables have the - * following meanings, and are declared in <time.h>. - * - * daylight Nonzero if daylight-saving-time zone (DST) is specified - * in TZ; otherwise, 0. Default value is 1. - * timezone Difference in seconds between coordinated universal - * time and local time. E.g., -28,800 for PST (GMT-8hrs) - * tzname(0) Three-letter time-zone name derived from TZ environment - * variable. E.g., "PST". - * tzname(1) Three-letter DST zone name derived from TZ environment - * variable. E.g., "PDT". If DST zone is omitted from TZ, - * tzname(1) is an empty string. - * - * Notes: For example, to set the TZ environment variable to correspond - * to the current time zone in Germany, you can use one of the - * following statements: - * - * set TZ=GST1GDT - * set TZ=GST+1GDT - * - * If the TZ value is not set, t_tzset() attempts to use the time zone - * information specified by the operating system. Under Windows NT - * and Windows 95, this information is specified in the Control Panel's - * Date/Time application. - * @internal - */ -U_INTERNAL void U_EXPORT2 uprv_tzset(void); - -/** - * Difference in seconds between coordinated universal - * time and local time. E.g., -28,800 for PST (GMT-8hrs) - * @return the difference in seconds between coordinated universal time and local time. - * @internal - */ -U_INTERNAL int32_t U_EXPORT2 uprv_timezone(void); - -/** - * tzname(0) Three-letter time-zone name derived from TZ environment - * variable. E.g., "PST". - * tzname(1) Three-letter DST zone name derived from TZ environment - * variable. E.g., "PDT". If DST zone is omitted from TZ, - * tzname(1) is an empty string. - * @internal - */ -U_INTERNAL const char* U_EXPORT2 uprv_tzname(int n); - -/** - * Reset the global tzname cache. - * @internal - */ -U_INTERNAL void uprv_tzname_clear_cache(); - -/** - * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970. - * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions. - * @return the UTC time measured in milliseconds - * @internal - */ -U_INTERNAL UDate U_EXPORT2 uprv_getUTCtime(void); - -/** - * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970. - * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that - * exposes time to the end user. - * @return the UTC time measured in milliseconds - * @internal - */ -U_INTERNAL UDate U_EXPORT2 uprv_getRawUTCtime(void); - -/** - * Determine whether a pathname is absolute or not, as defined by the platform. - * @param path Pathname to test - * @return TRUE if the path is absolute - * @internal (ICU 3.0) - */ -U_INTERNAL UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path); - -/** - * Use U_MAX_PTR instead of this function. - * @param void pointer to test - * @return the largest possible pointer greater than the base - * @internal (ICU 3.8) - */ -U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base); - -/** - * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer. - * In fact, buffer sizes must not exceed 2GB so that the difference between - * the buffer limit and the buffer start can be expressed in an int32_t. - * - * The definition of U_MAX_PTR must fulfill the following conditions: - * - return the largest possible pointer greater than base - * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.) - * - avoid wrapping around at high addresses - * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes - * - * @param base The beginning of a buffer to find the maximum offset from - * @internal - */ -#ifndef U_MAX_PTR -# if U_PLATFORM == U_PF_OS390 && !defined(_LP64) - /* We have 31-bit pointers. */ -# define U_MAX_PTR(base) ((void *)0x7fffffff) -# elif U_PLATFORM == U_PF_OS400 -# define U_MAX_PTR(base) uprv_maximumPtr((void *)base) -# elif 0 - /* - * For platforms where pointers are scalar values (which is normal, but unlike i5/OS) - * but that do not define uintptr_t. - * - * However, this does not work on modern compilers: - * The C++ standard does not define pointer overflow, and allows compilers to - * assume that p+u>p for any pointer p and any integer u>0. - * Thus, modern compilers optimize away the ">" comparison. - * (See ICU tickets #7187 and #8096.) - */ -# define U_MAX_PTR(base) \ - ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \ - ? ((char *)(base)+0x7fffffffu) \ - : (char *)-1)) -# else - /* Default version. C++ standard compliant for scalar pointers. */ -# define U_MAX_PTR(base) \ - ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \ - ? ((uintptr_t)(base)+0x7fffffffu) \ - : (uintptr_t)-1)) -# endif -#endif - - -#ifdef __cplusplus -/** - * Pin a buffer capacity such that doing pointer arithmetic - * on the destination pointer and capacity cannot overflow. - * - * The pinned capacity must fulfill the following conditions (for positive capacities): - * - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.) - * - (dest + capacity) >= dest - * - The size (in bytes) of T[capacity] does not exceed 0x7fffffff - * - * @param dest the destination buffer pointer. - * @param capacity the requested buffer capacity, in units of type T. - * @return the pinned capacity. - * @internal - */ -template -inline int32_t pinCapacity(T *dest, int32_t capacity) { - if (capacity <= 0) { return capacity; } - - uintptr_t destInt = (uintptr_t)dest; - uintptr_t maxInt; - -# if U_PLATFORM == U_PF_OS390 && !defined(_LP64) - // We have 31-bit pointers. - maxInt = 0x7fffffff; -# elif U_PLATFORM == U_PF_OS400 - maxInt = (uintptr_t)uprv_maximumPtr((void *)dest); -# else - maxInt = destInt + 0x7fffffffu; - if (maxInt < destInt) { - // Less than 2GB to the end of the address space. - // Pin to that to prevent address overflow. - maxInt = (uintptr_t)-1; - } -# endif - - uintptr_t maxBytes = maxInt - destInt; // max. 2GB - int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T)); - return capacity <= maxCapacity ? capacity : maxCapacity; -} -#endif // __cplusplus - -/* Dynamic Library Functions */ - -typedef void (UVoidFunction)(void); - -#if U_ENABLE_DYLOAD -/** - * Load a library - * @internal (ICU 4.4) - */ -U_INTERNAL void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status); - -/** - * Close a library - * @internal (ICU 4.4) - */ -U_INTERNAL void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status); - -/** - * Extract a symbol from a library (function) - * @internal (ICU 4.8) - */ -U_INTERNAL UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status); - -/** - * Extract a symbol from a library (function) - * Not implemented, no clients. - * @internal - */ -/* U_INTERNAL void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */ - -#endif - -/** - * Define malloc and related functions - * @internal - */ -#if U_PLATFORM == U_PF_OS400 -# define uprv_default_malloc(x) _C_TS_malloc(x) -# define uprv_default_realloc(x,y) _C_TS_realloc(x,y) -# define uprv_default_free(x) _C_TS_free(x) -/* also _C_TS_calloc(x) */ -#else -/* C defaults */ -# define uprv_default_malloc(x) malloc(x) -# define uprv_default_realloc(x,y) realloc(x,y) -# define uprv_default_free(x) free(x) -#endif - - -#endif diff --git a/deps/node/deps/icu-small/source/common/rbbi.cpp b/deps/node/deps/icu-small/source/common/rbbi.cpp deleted file mode 100644 index cb376650..00000000 --- a/deps/node/deps/icu-small/source/common/rbbi.cpp +++ /dev/null @@ -1,1274 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -*************************************************************************** -* Copyright (C) 1999-2016 International Business Machines Corporation -* and others. All rights reserved. -*************************************************************************** -*/ -// -// file: rbbi.cpp Contains the implementation of the rule based break iterator -// runtime engine and the API implementation for -// class RuleBasedBreakIterator -// - -#include "utypeinfo.h" // for 'typeid' to work - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include - -#include "unicode/rbbi.h" -#include "unicode/schriter.h" -#include "unicode/uchriter.h" -#include "unicode/uclean.h" -#include "unicode/udata.h" - -#include "brkeng.h" -#include "ucln_cmn.h" -#include "cmemory.h" -#include "cstring.h" -#include "rbbidata.h" -#include "rbbi_cache.h" -#include "rbbirb.h" -#include "uassert.h" -#include "umutex.h" -#include "uvectr32.h" - -// if U_LOCAL_SERVICE_HOOK is defined, then localsvc.cpp is expected to be included. -#if U_LOCAL_SERVICE_HOOK -#include "localsvc.h" -#endif - -#ifdef RBBI_DEBUG -static UBool gTrace = FALSE; -#endif - -U_NAMESPACE_BEGIN - -// The state number of the starting state -constexpr int32_t START_STATE = 1; - -// The state-transition value indicating "stop" -constexpr int32_t STOP_STATE = 0; - - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator) - - -//======================================================================= -// constructors -//======================================================================= - -/** - * Constructs a RuleBasedBreakIterator that uses the already-created - * tables object that is passed in as a parameter. - */ -RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) - : fSCharIter(UnicodeString()) -{ - init(status); - fData = new RBBIDataWrapper(data, status); // status checked in constructor - if (U_FAILURE(status)) {return;} - if(fData == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } -} - -// -// Construct from precompiled binary rules (tables). This constructor is public API, -// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules(). -// -RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, - uint32_t ruleLength, - UErrorCode &status) - : fSCharIter(UnicodeString()) -{ - init(status); - if (U_FAILURE(status)) { - return; - } - if (compiledRules == NULL || ruleLength < sizeof(RBBIDataHeader)) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules; - if (data->fLength > ruleLength) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status); - if (U_FAILURE(status)) {return;} - if(fData == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } -} - - -//------------------------------------------------------------------------------- -// -// Constructor from a UDataMemory handle to precompiled break rules -// stored in an ICU data file. -// -//------------------------------------------------------------------------------- -RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status) - : fSCharIter(UnicodeString()) -{ - init(status); - fData = new RBBIDataWrapper(udm, status); // status checked in constructor - if (U_FAILURE(status)) {return;} - if(fData == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } -} - - - -//------------------------------------------------------------------------------- -// -// Constructor from a set of rules supplied as a string. -// -//------------------------------------------------------------------------------- -RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, - UParseError &parseError, - UErrorCode &status) - : fSCharIter(UnicodeString()) -{ - init(status); - if (U_FAILURE(status)) {return;} - RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *) - RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status); - // Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that - // creates and returns a complete RBBI. From here, in a constructor, we - // can't just return the object created by the builder factory, hence - // the assignment of the factory created object to "this". - if (U_SUCCESS(status)) { - *this = *bi; - delete bi; - } -} - - -//------------------------------------------------------------------------------- -// -// Default Constructor. Create an empty shell that can be set up later. -// Used when creating a RuleBasedBreakIterator from a set -// of rules. -//------------------------------------------------------------------------------- -RuleBasedBreakIterator::RuleBasedBreakIterator() - : fSCharIter(UnicodeString()) -{ - UErrorCode status = U_ZERO_ERROR; - init(status); -} - - -//------------------------------------------------------------------------------- -// -// Copy constructor. Will produce a break iterator with the same behavior, -// and which iterates over the same text, as the one passed in. -// -//------------------------------------------------------------------------------- -RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other) -: BreakIterator(other), - fSCharIter(UnicodeString()) -{ - UErrorCode status = U_ZERO_ERROR; - this->init(status); - *this = other; -} - - -/** - * Destructor - */ -RuleBasedBreakIterator::~RuleBasedBreakIterator() { - if (fCharIter != &fSCharIter) { - // fCharIter was adopted from the outside. - delete fCharIter; - } - fCharIter = NULL; - - utext_close(&fText); - - if (fData != NULL) { - fData->removeReference(); - fData = NULL; - } - delete fBreakCache; - fBreakCache = NULL; - - delete fDictionaryCache; - fDictionaryCache = NULL; - - delete fLanguageBreakEngines; - fLanguageBreakEngines = NULL; - - delete fUnhandledBreakEngine; - fUnhandledBreakEngine = NULL; -} - -/** - * Assignment operator. Sets this iterator to have the same behavior, - * and iterate over the same text, as the one passed in. - */ -RuleBasedBreakIterator& -RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { - if (this == &that) { - return *this; - } - BreakIterator::operator=(that); - - if (fLanguageBreakEngines != NULL) { - delete fLanguageBreakEngines; - fLanguageBreakEngines = NULL; // Just rebuild for now - } - // TODO: clone fLanguageBreakEngines from "that" - UErrorCode status = U_ZERO_ERROR; - utext_clone(&fText, &that.fText, FALSE, TRUE, &status); - - if (fCharIter != &fSCharIter) { - delete fCharIter; - } - fCharIter = &fSCharIter; - - if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) { - // This is a little bit tricky - it will intially appear that - // this->fCharIter is adopted, even if that->fCharIter was - // not adopted. That's ok. - fCharIter = that.fCharIter->clone(); - } - fSCharIter = that.fSCharIter; - if (fCharIter == NULL) { - fCharIter = &fSCharIter; - } - - if (fData != NULL) { - fData->removeReference(); - fData = NULL; - } - if (that.fData != NULL) { - fData = that.fData->addReference(); - } - - fPosition = that.fPosition; - fRuleStatusIndex = that.fRuleStatusIndex; - fDone = that.fDone; - - // TODO: both the dictionary and the main cache need to be copied. - // Current position could be within a dictionary range. Trying to continue - // the iteration without the caches present would go to the rules, with - // the assumption that the current position is on a rule boundary. - fBreakCache->reset(fPosition, fRuleStatusIndex); - fDictionaryCache->reset(); - - return *this; -} - - - -//----------------------------------------------------------------------------- -// -// init() Shared initialization routine. Used by all the constructors. -// Initializes all fields, leaving the object in a consistent state. -// -//----------------------------------------------------------------------------- -void RuleBasedBreakIterator::init(UErrorCode &status) { - fCharIter = NULL; - fData = NULL; - fPosition = 0; - fRuleStatusIndex = 0; - fDone = false; - fDictionaryCharCount = 0; - fLanguageBreakEngines = NULL; - fUnhandledBreakEngine = NULL; - fBreakCache = NULL; - fDictionaryCache = NULL; - - // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER. - // fText = UTEXT_INITIALIZER; - static const UText initializedUText = UTEXT_INITIALIZER; - uprv_memcpy(&fText, &initializedUText, sizeof(UText)); - - if (U_FAILURE(status)) { - return; - } - - utext_openUChars(&fText, NULL, 0, &status); - fDictionaryCache = new DictionaryCache(this, status); - fBreakCache = new BreakCache(this, status); - if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) { - status = U_MEMORY_ALLOCATION_ERROR; - } - -#ifdef RBBI_DEBUG - static UBool debugInitDone = FALSE; - if (debugInitDone == FALSE) { - char *debugEnv = getenv("U_RBBIDEBUG"); - if (debugEnv && uprv_strstr(debugEnv, "trace")) { - gTrace = TRUE; - } - debugInitDone = TRUE; - } -#endif -} - - - -//----------------------------------------------------------------------------- -// -// clone - Returns a newly-constructed RuleBasedBreakIterator with the same -// behavior, and iterating over the same text, as this one. -// Virtual function: does the right thing with subclasses. -// -//----------------------------------------------------------------------------- -BreakIterator* -RuleBasedBreakIterator::clone(void) const { - return new RuleBasedBreakIterator(*this); -} - -/** - * Equality operator. Returns TRUE if both BreakIterators are of the - * same class, have the same behavior, and iterate over the same text. - */ -UBool -RuleBasedBreakIterator::operator==(const BreakIterator& that) const { - if (typeid(*this) != typeid(that)) { - return FALSE; - } - if (this == &that) { - return TRUE; - } - - // The base class BreakIterator carries no state that participates in equality, - // and does not implement an equality function that would otherwise be - // checked at this point. - - const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that; - - if (!utext_equals(&fText, &that2.fText)) { - // The two break iterators are operating on different text, - // or have a different iteration position. - // Note that fText's position is always the same as the break iterator's position. - return FALSE; - }; - - if (!(fPosition == that2.fPosition && - fRuleStatusIndex == that2.fRuleStatusIndex && - fDone == that2.fDone)) { - return FALSE; - } - - if (that2.fData == fData || - (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) { - // The two break iterators are using the same rules. - return TRUE; - } - return FALSE; -} - -/** - * Compute a hash code for this BreakIterator - * @return A hash code - */ -int32_t -RuleBasedBreakIterator::hashCode(void) const { - int32_t hash = 0; - if (fData != NULL) { - hash = fData->hashCode(); - } - return hash; -} - - -void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - fBreakCache->reset(); - fDictionaryCache->reset(); - utext_clone(&fText, ut, FALSE, TRUE, &status); - - // Set up a dummy CharacterIterator to be returned if anyone - // calls getText(). With input from UText, there is no reasonable - // way to return a characterIterator over the actual input text. - // Return one over an empty string instead - this is the closest - // we can come to signaling a failure. - // (GetText() is obsolete, this failure is sort of OK) - fSCharIter.setText(UnicodeString()); - - if (fCharIter != &fSCharIter) { - // existing fCharIter was adopted from the outside. Delete it now. - delete fCharIter; - } - fCharIter = &fSCharIter; - - this->first(); -} - - -UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const { - UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status); - return result; -} - - -//======================================================================= -// BreakIterator overrides -//======================================================================= - -/** - * Return a CharacterIterator over the text being analyzed. - */ -CharacterIterator& -RuleBasedBreakIterator::getText() const { - return *fCharIter; -} - -/** - * Set the iterator to analyze a new piece of text. This function resets - * the current iteration position to the beginning of the text. - * @param newText An iterator over the text to analyze. - */ -void -RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { - // If we are holding a CharacterIterator adopted from a - // previous call to this function, delete it now. - if (fCharIter != &fSCharIter) { - delete fCharIter; - } - - fCharIter = newText; - UErrorCode status = U_ZERO_ERROR; - fBreakCache->reset(); - fDictionaryCache->reset(); - if (newText==NULL || newText->startIndex() != 0) { - // startIndex !=0 wants to be an error, but there's no way to report it. - // Make the iterator text be an empty string. - utext_openUChars(&fText, NULL, 0, &status); - } else { - utext_openCharacterIterator(&fText, newText, &status); - } - this->first(); -} - -/** - * Set the iterator to analyze a new piece of text. This function resets - * the current iteration position to the beginning of the text. - * @param newText An iterator over the text to analyze. - */ -void -RuleBasedBreakIterator::setText(const UnicodeString& newText) { - UErrorCode status = U_ZERO_ERROR; - fBreakCache->reset(); - fDictionaryCache->reset(); - utext_openConstUnicodeString(&fText, &newText, &status); - - // Set up a character iterator on the string. - // Needed in case someone calls getText(). - // Can not, unfortunately, do this lazily on the (probably never) - // call to getText(), because getText is const. - fSCharIter.setText(newText); - - if (fCharIter != &fSCharIter) { - // old fCharIter was adopted from the outside. Delete it. - delete fCharIter; - } - fCharIter = &fSCharIter; - - this->first(); -} - - -/** - * Provide a new UText for the input text. Must reference text with contents identical - * to the original. - * Intended for use with text data originating in Java (garbage collected) environments - * where the data may be moved in memory at arbitrary times. - */ -RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) { - if (U_FAILURE(status)) { - return *this; - } - if (input == NULL) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return *this; - } - int64_t pos = utext_getNativeIndex(&fText); - // Shallow read-only clone of the new UText into the existing input UText - utext_clone(&fText, input, FALSE, TRUE, &status); - if (U_FAILURE(status)) { - return *this; - } - utext_setNativeIndex(&fText, pos); - if (utext_getNativeIndex(&fText) != pos) { - // Sanity check. The new input utext is supposed to have the exact same - // contents as the old. If we can't set to the same position, it doesn't. - // The contents underlying the old utext might be invalid at this point, - // so it's not safe to check directly. - status = U_ILLEGAL_ARGUMENT_ERROR; - } - return *this; -} - - -/** - * Sets the current iteration position to the beginning of the text, position zero. - * @return The new iterator position, which is zero. - */ -int32_t RuleBasedBreakIterator::first(void) { - UErrorCode status = U_ZERO_ERROR; - if (!fBreakCache->seek(0)) { - fBreakCache->populateNear(0, status); - } - fBreakCache->current(); - U_ASSERT(fPosition == 0); - return 0; -} - -/** - * Sets the current iteration position to the end of the text. - * @return The text's past-the-end offset. - */ -int32_t RuleBasedBreakIterator::last(void) { - int32_t endPos = (int32_t)utext_nativeLength(&fText); - UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position. - (void)endShouldBeBoundary; - U_ASSERT(endShouldBeBoundary); - U_ASSERT(fPosition == endPos); - return endPos; -} - -/** - * Advances the iterator either forward or backward the specified number of steps. - * Negative values move backward, and positive values move forward. This is - * equivalent to repeatedly calling next() or previous(). - * @param n The number of steps to move. The sign indicates the direction - * (negative is backwards, and positive is forwards). - * @return The character offset of the boundary position n boundaries away from - * the current one. - */ -int32_t RuleBasedBreakIterator::next(int32_t n) { - int32_t result = 0; - if (n > 0) { - for (; n > 0 && result != UBRK_DONE; --n) { - result = next(); - } - } else if (n < 0) { - for (; n < 0 && result != UBRK_DONE; ++n) { - result = previous(); - } - } else { - result = current(); - } - return result; -} - -/** - * Advances the iterator to the next boundary position. - * @return The position of the first boundary after this one. - */ -int32_t RuleBasedBreakIterator::next(void) { - fBreakCache->next(); - return fDone ? UBRK_DONE : fPosition; -} - -/** - * Move the iterator backwards, to the boundary preceding the current one. - * - * Starts from the current position within fText. - * Starting position need not be on a boundary. - * - * @return The position of the boundary position immediately preceding the starting position. - */ -int32_t RuleBasedBreakIterator::previous(void) { - UErrorCode status = U_ZERO_ERROR; - fBreakCache->previous(status); - return fDone ? UBRK_DONE : fPosition; -} - -/** - * Sets the iterator to refer to the first boundary position following - * the specified position. - * @param startPos The position from which to begin searching for a break position. - * @return The position of the first break after the current position. - */ -int32_t RuleBasedBreakIterator::following(int32_t startPos) { - // if the supplied position is before the beginning, return the - // text's starting offset - if (startPos < 0) { - return first(); - } - - // Move requested offset to a code point start. It might be on a trail surrogate, - // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text. - utext_setNativeIndex(&fText, startPos); - startPos = (int32_t)utext_getNativeIndex(&fText); - - UErrorCode status = U_ZERO_ERROR; - fBreakCache->following(startPos, status); - return fDone ? UBRK_DONE : fPosition; -} - -/** - * Sets the iterator to refer to the last boundary position before the - * specified position. - * @param offset The position to begin searching for a break from. - * @return The position of the last boundary before the starting position. - */ -int32_t RuleBasedBreakIterator::preceding(int32_t offset) { - if (offset > utext_nativeLength(&fText)) { - return last(); - } - - // Move requested offset to a code point start. It might be on a trail surrogate, - // or on a trail byte if the input is UTF-8. - - utext_setNativeIndex(&fText, offset); - int32_t adjustedOffset = static_cast(utext_getNativeIndex(&fText)); - - UErrorCode status = U_ZERO_ERROR; - fBreakCache->preceding(adjustedOffset, status); - return fDone ? UBRK_DONE : fPosition; -} - -/** - * Returns true if the specfied position is a boundary position. As a side - * effect, leaves the iterator pointing to the first boundary position at - * or after "offset". - * - * @param offset the offset to check. - * @return True if "offset" is a boundary position. - */ -UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { - // out-of-range indexes are never boundary positions - if (offset < 0) { - first(); // For side effects on current position, tag values. - return FALSE; - } - - // Adjust offset to be on a code point boundary and not beyond the end of the text. - // Note that isBoundary() is always false for offsets that are not on code point boundaries. - // But we still need the side effect of leaving iteration at the following boundary. - - utext_setNativeIndex(&fText, offset); - int32_t adjustedOffset = static_cast(utext_getNativeIndex(&fText)); - - bool result = false; - UErrorCode status = U_ZERO_ERROR; - if (fBreakCache->seek(adjustedOffset) || fBreakCache->populateNear(adjustedOffset, status)) { - result = (fBreakCache->current() == offset); - } - - if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) { - // Original offset is beyond the end of the text. Return FALSE, it's not a boundary, - // but the iteration position remains set to the end of the text, which is a boundary. - return FALSE; - } - if (!result) { - // Not on a boundary. isBoundary() must leave iterator on the following boundary. - // Cache->seek(), above, left us on the preceding boundary, so advance one. - next(); - } - return result; -} - - -/** - * Returns the current iteration position. - * @return The current iteration position. - */ -int32_t RuleBasedBreakIterator::current(void) const { - return fPosition; -} - - -//======================================================================= -// implementation -//======================================================================= - -// -// RBBIRunMode - the state machine runs an extra iteration at the beginning and end -// of user text. A variable with this enum type keeps track of where we -// are. The state machine only fetches user input while in the RUN mode. -// -enum RBBIRunMode { - RBBI_START, // state machine processing is before first char of input - RBBI_RUN, // state machine processing is in the user text - RBBI_END // state machine processing is after end of user text. -}; - - -// Map from look-ahead break states (corresponds to rules) to boundary positions. -// Allows multiple lookahead break rules to be in flight at the same time. -// -// This is a temporary approach for ICU 57. A better fix is to make the look-ahead numbers -// in the state table be sequential, then we can just index an array. And the -// table could also tell us in advance how big that array needs to be. -// -// Before ICU 57 there was just a single simple variable for a look-ahead match that -// was in progress. Two rules at once did not work. - -static const int32_t kMaxLookaheads = 8; -struct LookAheadResults { - int32_t fUsedSlotLimit; - int32_t fPositions[8]; - int16_t fKeys[8]; - - LookAheadResults() : fUsedSlotLimit(0), fPositions(), fKeys() {}; - - int32_t getPosition(int16_t key) { - for (int32_t i=0; i= kMaxLookaheads) { - U_ASSERT(FALSE); - i = kMaxLookaheads - 1; - } - fKeys[i] = key; - fPositions[i] = position; - U_ASSERT(fUsedSlotLimit == i); - fUsedSlotLimit = i + 1; - } -}; - - -//----------------------------------------------------------------------------------- -// -// handleNext() -// Run the state machine to find a boundary -// -//----------------------------------------------------------------------------------- -int32_t RuleBasedBreakIterator::handleNext() { - int32_t state; - uint16_t category = 0; - RBBIRunMode mode; - - RBBIStateTableRow *row; - UChar32 c; - LookAheadResults lookAheadMatches; - int32_t result = 0; - int32_t initialPosition = 0; - const RBBIStateTable *statetable = fData->fForwardTable; - const char *tableData = statetable->fTableData; - uint32_t tableRowLen = statetable->fRowLen; - #ifdef RBBI_DEBUG - if (gTrace) { - RBBIDebugPuts("Handle Next pos char state category"); - } - #endif - - // handleNext alway sets the break tag value. - // Set the default for it. - fRuleStatusIndex = 0; - - fDictionaryCharCount = 0; - - // if we're already at the end of the text, return DONE. - initialPosition = fPosition; - UTEXT_SETNATIVEINDEX(&fText, initialPosition); - result = initialPosition; - c = UTEXT_NEXT32(&fText); - if (c==U_SENTINEL) { - fDone = TRUE; - return UBRK_DONE; - } - - // Set the initial state for the state machine - state = START_STATE; - row = (RBBIStateTableRow *) - //(statetable->fTableData + (statetable->fRowLen * state)); - (tableData + tableRowLen * state); - - - mode = RBBI_RUN; - if (statetable->fFlags & RBBI_BOF_REQUIRED) { - category = 2; - mode = RBBI_START; - } - - - // loop until we reach the end of the text or transition to state 0 - // - for (;;) { - if (c == U_SENTINEL) { - // Reached end of input string. - if (mode == RBBI_END) { - // We have already run the loop one last time with the - // character set to the psueudo {eof} value. Now it is time - // to unconditionally bail out. - break; - } - // Run the loop one last time with the fake end-of-input character category. - mode = RBBI_END; - category = 1; - } - - // - // Get the char category. An incoming category of 1 or 2 means that - // we are preset for doing the beginning or end of input, and - // that we shouldn't get a category from an actual text input character. - // - if (mode == RBBI_RUN) { - // look up the current character's character category, which tells us - // which column in the state table to look at. - // Note: the 16 in UTRIE_GET16 refers to the size of the data being returned, - // not the size of the character going in, which is a UChar32. - // - category = UTRIE2_GET16(fData->fTrie, c); - - // Check the dictionary bit in the character's category. - // Counter is only used by dictionary based iteration. - // Chars that need to be handled by a dictionary have a flag bit set - // in their category values. - // - if ((category & 0x4000) != 0) { - fDictionaryCharCount++; - // And off the dictionary flag bit. - category &= ~0x4000; - } - } - - #ifdef RBBI_DEBUG - if (gTrace) { - RBBIDebugPrintf(" %4" PRId64 " ", utext_getNativeIndex(&fText)); - if (0x20<=c && c<0x7f) { - RBBIDebugPrintf("\"%c\" ", c); - } else { - RBBIDebugPrintf("%5x ", c); - } - RBBIDebugPrintf("%3d %3d\n", state, category); - } - #endif - - // State Transition - move machine to its next state - // - - // fNextState is a variable-length array. - U_ASSERT(categoryfHeader->fCatCount); - state = row->fNextState[category]; /*Not accessing beyond memory*/ - row = (RBBIStateTableRow *) - // (statetable->fTableData + (statetable->fRowLen * state)); - (tableData + tableRowLen * state); - - - if (row->fAccepting == -1) { - // Match found, common case. - if (mode != RBBI_START) { - result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); - } - fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values. - } - - int16_t completedRule = row->fAccepting; - if (completedRule > 0) { - // Lookahead match is completed. - int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule); - if (lookaheadResult >= 0) { - fRuleStatusIndex = row->fTagIdx; - fPosition = lookaheadResult; - return lookaheadResult; - } - } - int16_t rule = row->fLookAhead; - if (rule != 0) { - // At the position of a '/' in a look-ahead match. Record it. - int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); - lookAheadMatches.setPosition(rule, pos); - } - - if (state == STOP_STATE) { - // This is the normal exit from the lookup state machine. - // We have advanced through the string until it is certain that no - // longer match is possible, no matter what characters follow. - break; - } - - // Advance to the next character. - // If this is a beginning-of-input loop iteration, don't advance - // the input position. The next iteration will be processing the - // first real input character. - if (mode == RBBI_RUN) { - c = UTEXT_NEXT32(&fText); - } else { - if (mode == RBBI_START) { - mode = RBBI_RUN; - } - } - } - - // The state machine is done. Check whether it found a match... - - // If the iterator failed to advance in the match engine, force it ahead by one. - // (This really indicates a defect in the break rules. They should always match - // at least one character.) - if (result == initialPosition) { - utext_setNativeIndex(&fText, initialPosition); - utext_next32(&fText); - result = (int32_t)utext_getNativeIndex(&fText); - fRuleStatusIndex = 0; - } - - // Leave the iterator at our result position. - fPosition = result; - #ifdef RBBI_DEBUG - if (gTrace) { - RBBIDebugPrintf("result = %d\n\n", result); - } - #endif - return result; -} - - -//----------------------------------------------------------------------------------- -// -// handleSafePrevious() -// -// Iterate backwards using the safe reverse rules. -// The logic of this function is similar to handleNext(), but simpler -// because the safe table does not require as many options. -// -//----------------------------------------------------------------------------------- -int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) { - int32_t state; - uint16_t category = 0; - RBBIStateTableRow *row; - UChar32 c; - int32_t result = 0; - - const RBBIStateTable *stateTable = fData->fReverseTable; - UTEXT_SETNATIVEINDEX(&fText, fromPosition); - #ifdef RBBI_DEBUG - if (gTrace) { - RBBIDebugPuts("Handle Previous pos char state category"); - } - #endif - - // if we're already at the start of the text, return DONE. - if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) { - return BreakIterator::DONE; - } - - // Set the initial state for the state machine - c = UTEXT_PREVIOUS32(&fText); - state = START_STATE; - row = (RBBIStateTableRow *) - (stateTable->fTableData + (stateTable->fRowLen * state)); - - // loop until we reach the start of the text or transition to state 0 - // - for (; c != U_SENTINEL; c = UTEXT_PREVIOUS32(&fText)) { - - // look up the current character's character category, which tells us - // which column in the state table to look at. - // Note: the 16 in UTRIE_GET16 refers to the size of the data being returned, - // not the size of the character going in, which is a UChar32. - // - // And off the dictionary flag bit. For reverse iteration it is not used. - category = UTRIE2_GET16(fData->fTrie, c); - category &= ~0x4000; - - #ifdef RBBI_DEBUG - if (gTrace) { - RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText)); - if (0x20<=c && c<0x7f) { - RBBIDebugPrintf("\"%c\" ", c); - } else { - RBBIDebugPrintf("%5x ", c); - } - RBBIDebugPrintf("%3d %3d\n", state, category); - } - #endif - - // State Transition - move machine to its next state - // - // fNextState is a variable-length array. - U_ASSERT(categoryfHeader->fCatCount); - state = row->fNextState[category]; /*Not accessing beyond memory*/ - row = (RBBIStateTableRow *) - (stateTable->fTableData + (stateTable->fRowLen * state)); - - if (state == STOP_STATE) { - // This is the normal exit from the lookup state machine. - // Transistion to state zero means we have found a safe point. - break; - } - } - - // The state machine is done. Check whether it found a match... - result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); - #ifdef RBBI_DEBUG - if (gTrace) { - RBBIDebugPrintf("result = %d\n\n", result); - } - #endif - return result; -} - -//------------------------------------------------------------------------------- -// -// getRuleStatus() Return the break rule tag associated with the current -// iterator position. If the iterator arrived at its current -// position by iterating forwards, the value will have been -// cached by the handleNext() function. -// -//------------------------------------------------------------------------------- - -int32_t RuleBasedBreakIterator::getRuleStatus() const { - - // fLastRuleStatusIndex indexes to the start of the appropriate status record - // (the number of status values.) - // This function returns the last (largest) of the array of status values. - int32_t idx = fRuleStatusIndex + fData->fRuleStatusTable[fRuleStatusIndex]; - int32_t tagVal = fData->fRuleStatusTable[idx]; - - return tagVal; -} - - -int32_t RuleBasedBreakIterator::getRuleStatusVec( - int32_t *fillInVec, int32_t capacity, UErrorCode &status) { - if (U_FAILURE(status)) { - return 0; - } - - int32_t numVals = fData->fRuleStatusTable[fRuleStatusIndex]; - int32_t numValsToCopy = numVals; - if (numVals > capacity) { - status = U_BUFFER_OVERFLOW_ERROR; - numValsToCopy = capacity; - } - int i; - for (i=0; ifRuleStatusTable[fRuleStatusIndex + i + 1]; - } - return numVals; -} - - - -//------------------------------------------------------------------------------- -// -// getBinaryRules Access to the compiled form of the rules, -// for use by build system tools that save the data -// for standard iterator types. -// -//------------------------------------------------------------------------------- -const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) { - const uint8_t *retPtr = NULL; - length = 0; - - if (fData != NULL) { - retPtr = (const uint8_t *)fData->fHeader; - length = fData->fHeader->fLength; - } - return retPtr; -} - - -BreakIterator * RuleBasedBreakIterator::createBufferClone(void * /*stackBuffer*/, - int32_t &bufferSize, - UErrorCode &status) -{ - if (U_FAILURE(status)){ - return NULL; - } - - if (bufferSize == 0) { - bufferSize = 1; // preflighting for deprecated functionality - return NULL; - } - - BreakIterator *clonedBI = clone(); - if (clonedBI == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - status = U_SAFECLONE_ALLOCATED_WARNING; - } - return (RuleBasedBreakIterator *)clonedBI; -} - -U_NAMESPACE_END - - -static icu::UStack *gLanguageBreakFactories = nullptr; -static const icu::UnicodeString *gEmptyString = nullptr; -static icu::UInitOnce gLanguageBreakFactoriesInitOnce = U_INITONCE_INITIALIZER; -static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER; - -/** - * Release all static memory held by breakiterator. - */ -U_CDECL_BEGIN -static UBool U_CALLCONV rbbi_cleanup(void) { - delete gLanguageBreakFactories; - gLanguageBreakFactories = nullptr; - delete gEmptyString; - gEmptyString = nullptr; - gLanguageBreakFactoriesInitOnce.reset(); - gRBBIInitOnce.reset(); - return TRUE; -} -U_CDECL_END - -U_CDECL_BEGIN -static void U_CALLCONV _deleteFactory(void *obj) { - delete (icu::LanguageBreakFactory *) obj; -} -U_CDECL_END -U_NAMESPACE_BEGIN - -static void U_CALLCONV rbbiInit() { - gEmptyString = new UnicodeString(); - ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup); -} - -static void U_CALLCONV initLanguageFactories() { - UErrorCode status = U_ZERO_ERROR; - U_ASSERT(gLanguageBreakFactories == NULL); - gLanguageBreakFactories = new UStack(_deleteFactory, NULL, status); - if (gLanguageBreakFactories != NULL && U_SUCCESS(status)) { - ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status); - gLanguageBreakFactories->push(builtIn, status); -#ifdef U_LOCAL_SERVICE_HOOK - LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status); - if (extra != NULL) { - gLanguageBreakFactories->push(extra, status); - } -#endif - } - ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup); -} - - -static const LanguageBreakEngine* -getLanguageBreakEngineFromFactory(UChar32 c) -{ - umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories); - if (gLanguageBreakFactories == NULL) { - return NULL; - } - - int32_t i = gLanguageBreakFactories->size(); - const LanguageBreakEngine *lbe = NULL; - while (--i >= 0) { - LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i)); - lbe = factory->getEngineFor(c); - if (lbe != NULL) { - break; - } - } - return lbe; -} - - -//------------------------------------------------------------------------------- -// -// getLanguageBreakEngine Find an appropriate LanguageBreakEngine for the -// the character c. -// -//------------------------------------------------------------------------------- -const LanguageBreakEngine * -RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { - const LanguageBreakEngine *lbe = NULL; - UErrorCode status = U_ZERO_ERROR; - - if (fLanguageBreakEngines == NULL) { - fLanguageBreakEngines = new UStack(status); - if (fLanguageBreakEngines == NULL || U_FAILURE(status)) { - delete fLanguageBreakEngines; - fLanguageBreakEngines = 0; - return NULL; - } - } - - int32_t i = fLanguageBreakEngines->size(); - while (--i >= 0) { - lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i)); - if (lbe->handles(c)) { - return lbe; - } - } - - // No existing dictionary took the character. See if a factory wants to - // give us a new LanguageBreakEngine for this character. - lbe = getLanguageBreakEngineFromFactory(c); - - // If we got one, use it and push it on our stack. - if (lbe != NULL) { - fLanguageBreakEngines->push((void *)lbe, status); - // Even if we can't remember it, we can keep looking it up, so - // return it even if the push fails. - return lbe; - } - - // No engine is forthcoming for this character. Add it to the - // reject set. Create the reject break engine if needed. - if (fUnhandledBreakEngine == NULL) { - fUnhandledBreakEngine = new UnhandledEngine(status); - if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - // Put it last so that scripts for which we have an engine get tried - // first. - fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status); - // If we can't insert it, or creation failed, get rid of it - if (U_FAILURE(status)) { - delete fUnhandledBreakEngine; - fUnhandledBreakEngine = 0; - return NULL; - } - } - - // Tell the reject engine about the character; at its discretion, it may - // add more than just the one character. - fUnhandledBreakEngine->handleCharacter(c); - - return fUnhandledBreakEngine; -} - -void RuleBasedBreakIterator::dumpCache() { - fBreakCache->dumpCache(); -} - -void RuleBasedBreakIterator::dumpTables() { - fData->printData(); -} - -/** - * Returns the description used to create this iterator - */ - -const UnicodeString& -RuleBasedBreakIterator::getRules() const { - if (fData != NULL) { - return fData->getRuleSourceString(); - } else { - umtx_initOnce(gRBBIInitOnce, &rbbiInit); - return *gEmptyString; - } -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbi_cache.cpp b/deps/node/deps/icu-small/source/common/rbbi_cache.cpp deleted file mode 100644 index 519c6104..00000000 --- a/deps/node/deps/icu-small/source/common/rbbi_cache.cpp +++ /dev/null @@ -1,658 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// file: rbbi_cache.cpp - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/ubrk.h" -#include "unicode/rbbi.h" - -#include "rbbi_cache.h" - -#include "brkeng.h" -#include "cmemory.h" -#include "rbbidata.h" -#include "rbbirb.h" -#include "uassert.h" -#include "uvectr32.h" - -U_NAMESPACE_BEGIN - -/* - * DictionaryCache implementation - */ - -RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) : - fBI(bi), fBreaks(status), fPositionInCache(-1), - fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) { -} - -RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() { -} - -void RuleBasedBreakIterator::DictionaryCache::reset() { - fPositionInCache = -1; - fStart = 0; - fLimit = 0; - fFirstRuleStatusIndex = 0; - fOtherRuleStatusIndex = 0; - fBreaks.removeAllElements(); -} - -UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) { - if (fromPos >= fLimit || fromPos < fStart) { - fPositionInCache = -1; - return FALSE; - } - - // Sequential iteration, move from previous boundary to the following - - int32_t r = 0; - if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { - ++fPositionInCache; - if (fPositionInCache >= fBreaks.size()) { - fPositionInCache = -1; - return FALSE; - } - r = fBreaks.elementAti(fPositionInCache); - U_ASSERT(r > fromPos); - *result = r; - *statusIndex = fOtherRuleStatusIndex; - return TRUE; - } - - // Random indexing. Linear search for the boundary following the given position. - - for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) { - r= fBreaks.elementAti(fPositionInCache); - if (r > fromPos) { - *result = r; - *statusIndex = fOtherRuleStatusIndex; - return TRUE; - } - } - U_ASSERT(FALSE); - fPositionInCache = -1; - return FALSE; -} - - -UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) { - if (fromPos <= fStart || fromPos > fLimit) { - fPositionInCache = -1; - return FALSE; - } - - if (fromPos == fLimit) { - fPositionInCache = fBreaks.size() - 1; - if (fPositionInCache >= 0) { - U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos); - } - } - - int32_t r; - if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { - --fPositionInCache; - r = fBreaks.elementAti(fPositionInCache); - U_ASSERT(r < fromPos); - *result = r; - *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; - return TRUE; - } - - if (fPositionInCache == 0) { - fPositionInCache = -1; - return FALSE; - } - - for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) { - r = fBreaks.elementAti(fPositionInCache); - if (r < fromPos) { - *result = r; - *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; - return TRUE; - } - } - U_ASSERT(FALSE); - fPositionInCache = -1; - return FALSE; -} - -void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos, - int32_t firstRuleStatus, int32_t otherRuleStatus) { - if ((endPos - startPos) <= 1) { - return; - } - - reset(); - fFirstRuleStatusIndex = firstRuleStatus; - fOtherRuleStatusIndex = otherRuleStatus; - - int32_t rangeStart = startPos; - int32_t rangeEnd = endPos; - - uint16_t category; - int32_t current; - UErrorCode status = U_ZERO_ERROR; - int32_t foundBreakCount = 0; - UText *text = &fBI->fText; - - // Loop through the text, looking for ranges of dictionary characters. - // For each span, find the appropriate break engine, and ask it to find - // any breaks within the span. - - utext_setNativeIndex(text, rangeStart); - UChar32 c = utext_current32(text); - category = UTRIE2_GET16(fBI->fData->fTrie, c); - - while(U_SUCCESS(status)) { - while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) { - utext_next32(text); // TODO: cleaner loop structure. - c = utext_current32(text); - category = UTRIE2_GET16(fBI->fData->fTrie, c); - } - if (current >= rangeEnd) { - break; - } - - // We now have a dictionary character. Get the appropriate language object - // to deal with it. - const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c); - - // Ask the language object if there are any breaks. It will add them to the cache and - // leave the text pointer on the other side of its range, ready to search for the next one. - if (lbe != NULL) { - foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks); - } - - // Reload the loop variables for the next go-round - c = utext_current32(text); - category = UTRIE2_GET16(fBI->fData->fTrie, c); - } - - // If we found breaks, ensure that the first and last entries are - // the original starting and ending position. And initialize the - // cache iteration position to the first entry. - - // printf("foundBreakCount = %d\n", foundBreakCount); - if (foundBreakCount > 0) { - U_ASSERT(foundBreakCount == fBreaks.size()); - if (startPos < fBreaks.elementAti(0)) { - // The dictionary did not place a boundary at the start of the segment of text. - // Add one now. This should not commonly happen, but it would be easy for interactions - // of the rules for dictionary segments and the break engine implementations to - // inadvertently cause it. Cover it here, just in case. - fBreaks.insertElementAt(startPos, 0, status); - } - if (endPos > fBreaks.peeki()) { - fBreaks.push(endPos, status); - } - fPositionInCache = 0; - // Note: Dictionary matching may extend beyond the original limit. - fStart = fBreaks.elementAti(0); - fLimit = fBreaks.peeki(); - } else { - // there were no language-based breaks, even though the segment contained - // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache - // for this range will fail, and the calling code will fall back to the rule based boundaries. - } -} - - -/* - * BreakCache implemetation - */ - -RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) : - fBI(bi), fSideBuffer(status) { - reset(); -} - - -RuleBasedBreakIterator::BreakCache::~BreakCache() { -} - - -void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) { - fStartBufIdx = 0; - fEndBufIdx = 0; - fTextIdx = pos; - fBufIdx = 0; - fBoundaries[0] = pos; - fStatuses[0] = (uint16_t)ruleStatus; -} - - -int32_t RuleBasedBreakIterator::BreakCache::current() { - fBI->fPosition = fTextIdx; - fBI->fRuleStatusIndex = fStatuses[fBufIdx]; - fBI->fDone = FALSE; - return fTextIdx; -} - - -void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) { - // startPos is in the cache. Do a next() from that position. - // TODO: an awkward set of interactions with bi->fDone - // seek() does not clear it; it can't because of interactions with populateNear(). - // next() does not clear it in the fast-path case, where everything matters. Maybe it should. - // So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end. - fBI->fDone = false; - next(); - } - return; -} - - -void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) { - if (startPos == fTextIdx) { - previous(status); - } else { - // seek() leaves the BreakCache positioned at the preceding boundary - // if the requested position is between two bounaries. - // current() pushes the BreakCache position out to the BreakIterator itself. - U_ASSERT(startPos > fTextIdx); - current(); - } - } - return; -} - - -/* - * Out-of-line code for BreakCache::next(). - * Cache does not already contain the boundary - */ -void RuleBasedBreakIterator::BreakCache::nextOL() { - fBI->fDone = !populateFollowing(); - fBI->fPosition = fTextIdx; - fBI->fRuleStatusIndex = fStatuses[fBufIdx]; - return; -} - - -void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - int32_t initialBufIdx = fBufIdx; - if (fBufIdx == fStartBufIdx) { - // At start of cache. Prepend to it. - populatePreceding(status); - } else { - // Cache already holds the next boundary - fBufIdx = modChunkSize(fBufIdx - 1); - fTextIdx = fBoundaries[fBufIdx]; - } - fBI->fDone = (fBufIdx == initialBufIdx); - fBI->fPosition = fTextIdx; - fBI->fRuleStatusIndex = fStatuses[fBufIdx]; - return; -} - - -UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) { - if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) { - return FALSE; - } - if (pos == fBoundaries[fStartBufIdx]) { - // Common case: seek(0), from BreakIterator::first() - fBufIdx = fStartBufIdx; - fTextIdx = fBoundaries[fBufIdx]; - return TRUE; - } - if (pos == fBoundaries[fEndBufIdx]) { - fBufIdx = fEndBufIdx; - fTextIdx = fBoundaries[fBufIdx]; - return TRUE; - } - - int32_t min = fStartBufIdx; - int32_t max = fEndBufIdx; - while (min != max) { - int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2; - probe = modChunkSize(probe); - if (fBoundaries[probe] > pos) { - max = probe; - } else { - min = modChunkSize(probe + 1); - } - } - U_ASSERT(fBoundaries[max] > pos); - fBufIdx = modChunkSize(max - 1); - fTextIdx = fBoundaries[fBufIdx]; - U_ASSERT(fTextIdx <= pos); - return TRUE; -} - - -UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) { - if (U_FAILURE(status)) { - return FALSE; - } - U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]); - - // Find a boundary somewhere in the vicinity of the requested position. - // Depending on the safe rules and the text data, it could be either before, at, or after - // the requested position. - - - // If the requested position is not near already cached positions, clear the existing cache, - // find a near-by boundary and begin new cache contents there. - - if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) { - int32_t aBoundary = 0; - int32_t ruleStatusIndex = 0; - if (position > 20) { - int32_t backupPos = fBI->handleSafePrevious(position); - - if (backupPos > 0) { - // Advance to the boundary following the backup position. - // There is a complication: the safe reverse rules identify pairs of code points - // that are safe. If advancing from the safe point moves forwards by less than - // two code points, we need to advance one more time to ensure that the boundary - // is good, including a correct rules status value. - // - fBI->fPosition = backupPos; - aBoundary = fBI->handleNext(); - if (aBoundary <= backupPos + 4) { - // +4 is a quick test for possibly having advanced only one codepoint. - // Four being the length of the longest potential code point, a supplementary in UTF-8 - utext_setNativeIndex(&fBI->fText, aBoundary); - if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) { - // The initial handleNext() only advanced by a single code point. Go again. - aBoundary = fBI->handleNext(); // Safe rules identify safe pairs. - } - } - ruleStatusIndex = fBI->fRuleStatusIndex; - } - } - reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point. - } - - // Fill in boundaries between existing cache content and the new requested position. - - if (fBoundaries[fEndBufIdx] < position) { - // The last position in the cache precedes the requested position. - // Add following position(s) to the cache. - while (fBoundaries[fEndBufIdx] < position) { - if (!populateFollowing()) { - U_ASSERT(false); - return false; - } - } - fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer. - fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries. - while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos. - previous(status); - } - return true; - } - - if (fBoundaries[fStartBufIdx] > position) { - // The first position in the cache is beyond the requested position. - // back up more until we get a boundary <= the requested position. - while (fBoundaries[fStartBufIdx] > position) { - populatePreceding(status); - } - fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer. - fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries. - while (fTextIdx < position) { // Move forwards to a position at or following the requested pos. - next(); - } - if (fTextIdx > position) { - // If position is not itself a boundary, the next() loop above will overshoot. - // Back up one, leaving cache position at the boundary preceding the requested position. - previous(status); - } - return true; - } - - U_ASSERT(fTextIdx == position); - return true; -} - - - -UBool RuleBasedBreakIterator::BreakCache::populateFollowing() { - int32_t fromPosition = fBoundaries[fEndBufIdx]; - int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx]; - int32_t pos = 0; - int32_t ruleStatusIdx = 0; - - if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) { - addFollowing(pos, ruleStatusIdx, UpdateCachePosition); - return TRUE; - } - - fBI->fPosition = fromPosition; - pos = fBI->handleNext(); - if (pos == UBRK_DONE) { - return FALSE; - } - - ruleStatusIdx = fBI->fRuleStatusIndex; - if (fBI->fDictionaryCharCount > 0) { - // The text segment obtained from the rules includes dictionary characters. - // Subdivide it, with subdivided results going into the dictionary cache. - fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx); - if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) { - addFollowing(pos, ruleStatusIdx, UpdateCachePosition); - return TRUE; - // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point. - // But be careful with interactions with populateNear(). - } - } - - // Rule based segment did not include dictionary characters. - // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them, - // meaning that we didn't take the return, above. - // Add its end point to the cache. - addFollowing(pos, ruleStatusIdx, UpdateCachePosition); - - // Add several non-dictionary boundaries at this point, to optimize straight forward iteration. - // (subsequent calls to BreakIterator::next() will take the fast path, getting cached results. - // - for (int count=0; count<6; ++count) { - pos = fBI->handleNext(); - if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) { - break; - } - addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition); - } - - return TRUE; -} - - -UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) { - if (U_FAILURE(status)) { - return FALSE; - } - - int32_t fromPosition = fBoundaries[fStartBufIdx]; - if (fromPosition == 0) { - return FALSE; - } - - int32_t position = 0; - int32_t positionStatusIdx = 0; - - if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) { - addPreceding(position, positionStatusIdx, UpdateCachePosition); - return TRUE; - } - - int32_t backupPosition = fromPosition; - - // Find a boundary somewhere preceding the first already-cached boundary - do { - backupPosition = backupPosition - 30; - if (backupPosition <= 0) { - backupPosition = 0; - } else { - backupPosition = fBI->handleSafePrevious(backupPosition); - } - if (backupPosition == UBRK_DONE || backupPosition == 0) { - position = 0; - positionStatusIdx = 0; - } else { - // Advance to the boundary following the backup position. - // There is a complication: the safe reverse rules identify pairs of code points - // that are safe. If advancing from the safe point moves forwards by less than - // two code points, we need to advance one more time to ensure that the boundary - // is good, including a correct rules status value. - // - fBI->fPosition = backupPosition; - position = fBI->handleNext(); - if (position <= backupPosition + 4) { - // +4 is a quick test for possibly having advanced only one codepoint. - // Four being the length of the longest potential code point, a supplementary in UTF-8 - utext_setNativeIndex(&fBI->fText, position); - if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) { - // The initial handleNext() only advanced by a single code point. Go again. - position = fBI->handleNext(); // Safe rules identify safe pairs. - } - }; - positionStatusIdx = fBI->fRuleStatusIndex; - } - } while (position >= fromPosition); - - // Find boundaries between the one we just located and the first already-cached boundary - // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer.. - - fSideBuffer.removeAllElements(); - fSideBuffer.addElement(position, status); - fSideBuffer.addElement(positionStatusIdx, status); - - do { - int32_t prevPosition = fBI->fPosition = position; - int32_t prevStatusIdx = positionStatusIdx; - position = fBI->handleNext(); - positionStatusIdx = fBI->fRuleStatusIndex; - if (position == UBRK_DONE) { - break; - } - - UBool segmentHandledByDictionary = FALSE; - if (fBI->fDictionaryCharCount != 0) { - // Segment from the rules includes dictionary characters. - // Subdivide it, with subdivided results going into the dictionary cache. - int32_t dictSegEndPosition = position; - fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx); - while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) { - segmentHandledByDictionary = true; - U_ASSERT(position > prevPosition); - if (position >= fromPosition) { - break; - } - U_ASSERT(position <= dictSegEndPosition); - fSideBuffer.addElement(position, status); - fSideBuffer.addElement(positionStatusIdx, status); - prevPosition = position; - } - U_ASSERT(position==dictSegEndPosition || position>=fromPosition); - } - - if (!segmentHandledByDictionary && position < fromPosition) { - fSideBuffer.addElement(position, status); - fSideBuffer.addElement(positionStatusIdx, status); - } - } while (position < fromPosition); - - // Move boundaries from the side buffer to the main circular buffer. - UBool success = FALSE; - if (!fSideBuffer.isEmpty()) { - positionStatusIdx = fSideBuffer.popi(); - position = fSideBuffer.popi(); - addPreceding(position, positionStatusIdx, UpdateCachePosition); - success = TRUE; - } - - while (!fSideBuffer.isEmpty()) { - positionStatusIdx = fSideBuffer.popi(); - position = fSideBuffer.popi(); - if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) { - // No space in circular buffer to hold a new preceding result while - // also retaining the current cache (iteration) position. - // Bailing out is safe; the cache will refill again if needed. - break; - } - } - - return success; -} - - -void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) { - U_ASSERT(position > fBoundaries[fEndBufIdx]); - U_ASSERT(ruleStatusIdx <= UINT16_MAX); - int32_t nextIdx = modChunkSize(fEndBufIdx + 1); - if (nextIdx == fStartBufIdx) { - fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1. - } - fBoundaries[nextIdx] = position; - fStatuses[nextIdx] = static_cast(ruleStatusIdx); - fEndBufIdx = nextIdx; - if (update == UpdateCachePosition) { - // Set current position to the newly added boundary. - fBufIdx = nextIdx; - fTextIdx = position; - } else { - // Retaining the original cache position. - // Check if the added boundary wraps around the buffer, and would over-write the original position. - // It's the responsibility of callers of this function to not add too many. - U_ASSERT(nextIdx != fBufIdx); - } -} - -bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) { - U_ASSERT(position < fBoundaries[fStartBufIdx]); - U_ASSERT(ruleStatusIdx <= UINT16_MAX); - int32_t nextIdx = modChunkSize(fStartBufIdx - 1); - if (nextIdx == fEndBufIdx) { - if (fBufIdx == fEndBufIdx && update == RetainCachePosition) { - // Failure. The insertion of the new boundary would claim the buffer position that is the - // current iteration position. And we also want to retain the current iteration position. - // (The buffer is already completely full of entries that precede the iteration position.) - return false; - } - fEndBufIdx = modChunkSize(fEndBufIdx - 1); - } - fBoundaries[nextIdx] = position; - fStatuses[nextIdx] = static_cast(ruleStatusIdx); - fStartBufIdx = nextIdx; - if (update == UpdateCachePosition) { - fBufIdx = nextIdx; - fTextIdx = position; - } - return true; -} - - -void RuleBasedBreakIterator::BreakCache::dumpCache() { -#ifdef RBBI_DEBUG - RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx); - for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) { - RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]); - if (i == fEndBufIdx) { - break; - } - } -#endif -} - -U_NAMESPACE_END - -#endif // #if !UCONFIG_NO_BREAK_ITERATION diff --git a/deps/node/deps/icu-small/source/common/rbbi_cache.h b/deps/node/deps/icu-small/source/common/rbbi_cache.h deleted file mode 100644 index fd6deb43..00000000 --- a/deps/node/deps/icu-small/source/common/rbbi_cache.h +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright (C) 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// file: rbbi_cache.h -// -#ifndef RBBI_CACHE_H -#define RBBI_CACHE_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/rbbi.h" -#include "unicode/uobject.h" - -#include "uvectr32.h" - -U_NAMESPACE_BEGIN - -/* DictionaryCache stores the boundaries obtained from a run of dictionary characters. - * Dictionary boundaries are moved first to this cache, then from here - * to the main BreakCache, where they may inter-leave with non-dictionary - * boundaries. The public BreakIterator API always fetches directly - * from the main BreakCache, not from here. - * - * In common situations, the number of boundaries in a single dictionary run - * should be quite small, it will be terminated by punctuation, spaces, - * or any other non-dictionary characters. The main BreakCache may end - * up with boundaries from multiple dictionary based runs. - * - * The boundaries are stored in a simple ArrayList (vector), with the - * assumption that they will be accessed sequentially. - */ -class RuleBasedBreakIterator::DictionaryCache: public UMemory { - public: - DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status); - ~DictionaryCache(); - - void reset(); - - UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex); - UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex); - - /** - * Populate the cache with the dictionary based boundaries within a region of text. - * @param startPos The start position of a range of text - * @param endPos The end position of a range of text - * @param firstRuleStatus The rule status index that applies to the break at startPos - * @param otherRuleStatus The rule status index that applies to boundaries other than startPos - * @internal - */ - void populateDictionary(int32_t startPos, int32_t endPos, - int32_t firstRuleStatus, int32_t otherRuleStatus); - - - - RuleBasedBreakIterator *fBI; - - UVector32 fBreaks; // A vector containing the boundaries. - int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following() - // or preceding(). Optimizes sequential access. - int32_t fStart; // Text position of first boundary in cache. - int32_t fLimit; // Last boundary in cache. Which is the limit of the - // text segment being handled by the dictionary. - int32_t fFirstRuleStatusIndex; // Rule status info for first boundary. - int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries. -}; - - -/* - * class BreakCache - * - * Cache of break boundary positions and rule status values. - * Break iterator API functions, next(), previous(), etc., will use cached results - * when possible, and otherwise cache new results as they are obtained. - * - * Uniformly caches both dictionary and rule based (non-dictionary) boundaries. - * - * The cache is implemented as a single circular buffer. - */ - -/* - * size of the circular cache buffer. - */ - -class RuleBasedBreakIterator::BreakCache: public UMemory { - public: - BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status); - virtual ~BreakCache(); - void reset(int32_t pos = 0, int32_t ruleStatus = 0); - void next() { if (fBufIdx == fEndBufIdx) { - nextOL(); - } else { - fBufIdx = modChunkSize(fBufIdx + 1); - fTextIdx = fBI->fPosition = fBoundaries[fBufIdx]; - fBI->fRuleStatusIndex = fStatuses[fBufIdx]; - } - }; - - - void nextOL(); - void previous(UErrorCode &status); - - // Move the iteration state to the position following the startPosition. - // Input position must be pinned to the input length. - void following(int32_t startPosition, UErrorCode &status); - - void preceding(int32_t startPosition, UErrorCode &status); - - /* - * Update the state of the public BreakIterator (fBI) to reflect the - * current state of the break iterator cache (this). - */ - int32_t current(); - - /** - * Add boundaries to the cache near the specified position. - * The given position need not be a boundary itself. - * The input position must be within the range of the text, and - * on a code point boundary. - * If the requested position is a break boundary, leave the iteration - * position on it. - * If the requested position is not a boundary, leave the iteration - * position on the preceding boundary and include both the - * preceding and following boundaries in the cache. - * Additional boundaries, either preceding or following, may be added - * to the cache as a side effect. - * - * Return FALSE if the operation failed. - */ - UBool populateNear(int32_t position, UErrorCode &status); - - /** - * Add boundary(s) to the cache following the current last boundary. - * Return FALSE if at the end of the text, and no more boundaries can be added. - * Leave iteration position at the first newly added boundary, or unchanged if no boundary was added. - */ - UBool populateFollowing(); - - /** - * Add one or more boundaries to the cache preceding the first currently cached boundary. - * Leave the iteration position on the first added boundary. - * Return false if no boundaries could be added (if at the start of the text.) - */ - UBool populatePreceding(UErrorCode &status); - - enum UpdatePositionValues { - RetainCachePosition = 0, - UpdateCachePosition = 1 - }; - - /* - * Add the boundary following the current position. - * The current position can be left as it was, or changed to the newly added boundary, - * as specified by the update parameter. - */ - void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); - - - /* - * Add the boundary preceding the current position. - * The current position can be left as it was, or changed to the newly added boundary, - * as specified by the update parameter. - */ - bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); - - /** - * Set the cache position to the specified position, or, if the position - * falls between to cached boundaries, to the preceding boundary. - * Fails if the requested position is outside of the range of boundaries currently held by the cache. - * The startPosition must be on a code point boundary. - * - * Return TRUE if successful, FALSE if the specified position is after - * the last cached boundary or before the first. - */ - UBool seek(int32_t startPosition); - - void dumpCache(); - - private: - static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); }; - - static constexpr int32_t CACHE_SIZE = 128; - static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two."); - - RuleBasedBreakIterator *fBI; - int32_t fStartBufIdx; - int32_t fEndBufIdx; // inclusive - - int32_t fTextIdx; - int32_t fBufIdx; - - int32_t fBoundaries[CACHE_SIZE]; - uint16_t fStatuses[CACHE_SIZE]; - - UVector32 fSideBuffer; -}; - -U_NAMESPACE_END - -#endif // #if !UCONFIG_NO_BREAK_ITERATION - -#endif // RBBI_CACHE_H diff --git a/deps/node/deps/icu-small/source/common/rbbicst.pl b/deps/node/deps/icu-small/source/common/rbbicst.pl deleted file mode 100755 index 839b9501..00000000 --- a/deps/node/deps/icu-small/source/common/rbbicst.pl +++ /dev/null @@ -1,456 +0,0 @@ -#************************************************************************** -# Copyright (C) 2016 and later: Unicode, Inc. and others. -# License & terms of use: http://www.unicode.org/copyright.html#License -#************************************************************************** -#************************************************************************** -# Copyright (C) 2002-2016 International Business Machines Corporation -# and others. All rights reserved. -#************************************************************************** -# -# rbbicst Compile the RBBI rule paser state table data into initialized C data. -# Usage: -# cd icu/source/common -# perl rbbicst.pl < rbbirpt.txt > rbbirpt.h -# perl rbbicst.pl -j < rbbirpt.txt > RBBIRuleParseTable.java -# -# The output file, rbbrpt.h, is included by some of the .cpp rbbi -# implementation files. This perl script is NOT run as part -# of a normal ICU build. It is run by hand when needed, and the -# rbbirpt.h generated file is put back into cvs. -# -# See rbbirpt.txt for a description of the input format for this script. -# - -if ($ARGV[0] eq "-j") { - $javaOutput = 1; - shift @ARGV; -} - - -$num_states = 1; # Always the state number for the line being compiled. -$line_num = 0; # The line number in the input file. - -$states{"pop"} = 255; # Add the "pop" to the list of defined state names. - # This prevents any state from being labelled with "pop", - # and resolves references to "pop" in the next state field. - -line_loop: while (<>) { - chomp(); - $line = $_; - @fields = split(); - $line_num++; - - # Remove # comments, which are any fields beginning with a #, plus all - # that follow on the line. - for ($i=0; $i<@fields; $i++) { - if ($fields[$i] =~ /^#/) { - @fields = @fields[0 .. $i-1]; - last; - } - } - # ignore blank lines, and those with no fields left after stripping comments.. - if (@fields == 0) { - next; - } - - # - # State Label: handling. - # Does the first token end with a ":"? If so, it's the name of a state. - # Put in a hash, together with the current state number, - # so that we can later look up the number from the name. - # - if (@fields[0] =~ /.*:$/) { - $state_name = @fields[0]; - $state_name =~ s/://; # strip off the colon from the state name. - - if ($states{$state_name} != 0) { - print " rbbicst: at line $line-num duplicate definition of state $state_name\n"; - } - $states{$state_name} = $num_states; - $stateNames[$num_states] = $state_name; - - # if the label was the only thing on this line, go on to the next line, - # otherwise assume that a state definition is on the same line and fall through. - if (@fields == 1) { - next line_loop; - } - shift @fields; # shift off label field in preparation - # for handling the rest of the line. - } - - # - # State Transition line. - # syntax is this, - # character [n] target-state [^push-state] [function-name] - # where - # [something] is an optional something - # character is either a single quoted character e.g. '[' - # or a name of a character class, e.g. white_space - # - - $state_line_num[$num_states] = $line_num; # remember line number with each state - # so we can make better error messages later. - # - # First field, character class or literal character for this transition. - # - if ($fields[0] =~ /^'.'$/) { - # We've got a quoted literal character. - $state_literal_chars[$num_states] = $fields[0]; - $state_literal_chars[$num_states] =~ s/'//g; - } else { - # We've got the name of a character class. - $state_char_class[$num_states] = $fields[0]; - if ($fields[0] =~ /[\W]/) { - print " rbbicsts: at line $line_num, bad character literal or character class name.\n"; - print " scanning $fields[0]\n"; - exit(-1); - } - } - shift @fields; - - # - # do the 'n' flag - # - $state_flag[$num_states] = $javaOutput? "false" : "FALSE"; - if ($fields[0] eq "n") { - $state_flag[$num_states] = $javaOutput? "true": "TRUE"; - shift @fields; - } - - # - # do the destination state. - # - $state_dest_state[$num_states] = $fields[0]; - if ($fields[0] eq "") { - print " rbbicsts: at line $line_num, destination state missing.\n"; - exit(-1); - } - shift @fields; - - # - # do the push state, if present. - # - if ($fields[0] =~ /^\^/) { - $fields[0] =~ s/^\^//; - $state_push_state[$num_states] = $fields[0]; - if ($fields[0] eq "" ) { - print " rbbicsts: at line $line_num, expected state after ^ (no spaces).\n"; - exit(-1); - } - shift @fields; - } - - # - # Lastly, do the optional action name. - # - if ($fields[0] ne "") { - $state_func_name[$num_states] = $fields[0]; - shift @fields; - } - - # - # There should be no fields left on the line at this point. - # - if (@fields > 0) { - print " rbbicsts: at line $line_num, unexpected extra stuff on input line.\n"; - print " scanning $fields[0]\n"; - } - $num_states++; -} - -# -# We've read in the whole file, now go back and output the -# C source code for the state transition table. -# -# We read all states first, before writing anything, so that the state numbers -# for the destination states are all available to be written. -# - -# -# Make hashes for the names of the character classes and -# for the names of the actions that appeared. -# -for ($state=1; $state < $num_states; $state++) { - if ($state_char_class[$state] ne "") { - if ($charClasses{$state_char_class[$state]} == 0) { - $charClasses{$state_char_class[$state]} = 1; - } - } - if ($state_func_name[$state] eq "") { - $state_func_name[$state] = "doNOP"; - } - if ($actions{$state_action_name[$state]} == 0) { - $actions{$state_func_name[$state]} = 1; - } -} - -# -# Check that all of the destination states have been defined -# -# -$states{"exit"} = 0; # Predefined state name, terminates state machine. -for ($state=1; $state<$num_states; $state++) { - if ($states{$state_dest_state[$state]} == 0 && $state_dest_state[$state] ne "exit") { - print "Error at line $state_line_num[$state]: target state \"$state_dest_state[$state]\" is not defined.\n"; - $errors++; - } - if ($state_push_state[$state] ne "" && $states{$state_push_state[$state]} == 0) { - print "Error at line $state_line_num[$state]: target state \"$state_push_state[$state]\" is not defined.\n"; - $errors++; - } -} - -die if ($errors>0); - -# -# Assign numbers to each of the character classes classes used. -# Sets are numbered from 128 - 250 -# The values 0-127 in the state table are used for matching -# individual ASCII characters (the only thing that can appear in the rules.) -# The "set" names appearing in the code below (default, etc.) need special -# handling because they do not correspond to a normal set of characters, -# but trigger special handling by code in the state machine. -# -$i = 128; -foreach $setName (sort keys %charClasses) { - if ($setName eq "default") { - $charClasses{$setName} = 255;} - elsif ($setName eq "escaped") { - $charClasses{$setName} = 254;} - elsif ($setName eq "escapedP") { - $charClasses{$setName} = 253;} - elsif ($setName eq "eof") { - $charClasses{$setName} = 252;} - else { - # Normal (single) character class. Number them. - $charClasses{$setName} = $i; - $i++; - } -} - - -my ($sec, $min, $hour, , $day, $mon, $year, $wday, $yday, $isdst) = localtime; -$year += 1900; - -if ($javaOutput) { - print "/*\n"; - print " *******************************************************************************\n"; - print " * Copyright (C) 2003-$year,\n"; - print " * International Business Machines Corporation and others. All Rights Reserved.\n"; - print " *******************************************************************************\n"; - print " */\n"; - print " \n"; - print "package com.ibm.icu.text;\n"; - print " \n"; - print "/**\n"; - print " * Generated Java File. Do not edit by hand.\n"; - print " * This file contains the state table for the ICU Rule Based Break Iterator\n"; - print " * rule parser.\n"; - print " * It is generated by the Perl script \"rbbicst.pl\" from\n"; - print " * the rule parser state definitions file \"rbbirpt.txt\".\n"; - print " * \@internal \n"; - print " *\n"; - print " */\n"; - - print "class RBBIRuleParseTable\n"; - print "{\n"; - - # - # Emit the constants for the actions to be performed. - # - $n = 1; - foreach $act (sort keys %actions) { - print " static final short $act = $n;\n"; - $n++; - } - print " \n"; - - # - # Emit constants for char class names - # - foreach $setName (sort keys %charClasses) { - print " static final short kRuleSet_$setName = $charClasses{$setName};\n"; - } - print "\n\n"; - - - print " static class RBBIRuleTableElement { \n"; - print " short fAction; \n"; - print " short fCharClass; \n"; - print " short fNextState; \n"; - print " short fPushState; \n"; - print " boolean fNextChar; \n"; - print " String fStateName; \n"; - print " RBBIRuleTableElement(short a, int cc, int ns, int ps, boolean nc, String sn) { \n"; - print " fAction = a; \n"; - print " fCharClass = (short)cc; \n"; - print " fNextState = (short)ns; \n"; - print " fPushState = (short)ps; \n"; - print " fNextChar = nc; \n"; - print " fStateName = sn; \n"; - print " } \n"; - print " }; \n"; - print " \n"; - - - print " static RBBIRuleTableElement[] gRuleParseStateTable = { \n "; - print " new RBBIRuleTableElement(doNOP, 0, 0,0, true, null ) // 0 \n"; #output the unused state 0. - for ($state=1; $state < $num_states; $state++) { - print " , new RBBIRuleTableElement($state_func_name[$state],"; - if ($state_literal_chars[$state] ne "") { - $c = $state_literal_chars[$state]; - print("'$c', "); - }else { - print " $charClasses{$state_char_class[$state]},"; - } - print " $states{$state_dest_state[$state]},"; - - # The push-state field is optional. If omitted, fill field with a zero, which flags - # the state machine that there is no push state. - if ($state_push_state[$state] eq "") { - print "0, "; - } else { - print " $states{$state_push_state[$state]},"; - } - print " $state_flag[$state], "; - - # if this is the first row of the table for this state, put out the state name. - if ($stateNames[$state] ne "") { - print " \"$stateNames[$state]\") "; - } else { - print " null ) "; - } - - # Put out a comment showing the number (index) of this state row, - print " // $state "; - print "\n"; - } - print " };\n"; - - print "}; \n"; - -} -else -{ - # - # C++ Output ... - # - - - print "//---------------------------------------------------------------------------------\n"; - print "//\n"; - print "// Generated Header File. Do not edit by hand.\n"; - print "// This file contains the state table for the ICU Rule Based Break Iterator\n"; - print "// rule parser.\n"; - print "// It is generated by the Perl script \"rbbicst.pl\" from\n"; - print "// the rule parser state definitions file \"rbbirpt.txt\".\n"; - print "//\n"; - print "// Copyright (C) 2002-$year International Business Machines Corporation \n"; - print "// and others. All rights reserved. \n"; - print "//\n"; - print "//---------------------------------------------------------------------------------\n"; - print "#ifndef RBBIRPT_H\n"; - print "#define RBBIRPT_H\n"; - print "\n"; - print "#include \"unicode/utypes.h\"\n"; - print "\n"; - print "U_NAMESPACE_BEGIN\n"; - - # - # Emit the constants for indicies of Unicode Sets - # Define one constant for each of the character classes encountered. - # At the same time, store the index corresponding to the set name back into hash. - # - print "//\n"; - print "// Character classes for RBBI rule scanning.\n"; - print "//\n"; - foreach $setName (sort keys %charClasses) { - if ($charClasses{$setName} < 250) { - # Normal character class. - print " static const uint8_t kRuleSet_$setName = $charClasses{$setName};\n"; - } - } - print "\n\n"; - - # - # Emit the enum for the actions to be performed. - # - print "enum RBBI_RuleParseAction {\n"; - foreach $act (sort keys %actions) { - print " $act,\n"; - } - print " rbbiLastAction};\n\n"; - - # - # Emit the struct definition for transtion table elements. - # - print "//-------------------------------------------------------------------------------\n"; - print "//\n"; - print "// RBBIRuleTableEl represents the structure of a row in the transition table\n"; - print "// for the rule parser state machine.\n"; - print "//-------------------------------------------------------------------------------\n"; - print "struct RBBIRuleTableEl {\n"; - print " RBBI_RuleParseAction fAction;\n"; - print " uint8_t fCharClass; // 0-127: an individual ASCII character\n"; - print " // 128-255: character class index\n"; - print " uint8_t fNextState; // 0-250: normal next-stat numbers\n"; - print " // 255: pop next-state from stack.\n"; - print " uint8_t fPushState;\n"; - print " UBool fNextChar;\n"; - print "};\n\n"; - - # - # emit the state transition table - # - print "static const struct RBBIRuleTableEl gRuleParseStateTable[] = {\n"; - print " {doNOP, 0, 0, 0, TRUE}\n"; # State 0 is a dummy. Real states start with index = 1. - for ($state=1; $state < $num_states; $state++) { - print " , {$state_func_name[$state],"; - if ($state_literal_chars[$state] ne "") { - $c = $state_literal_chars[$state]; - printf(" %d /* $c */,", ord($c)); # use numeric value, so EBCDIC machines are ok. - }else { - print " $charClasses{$state_char_class[$state]},"; - } - print " $states{$state_dest_state[$state]},"; - - # The push-state field is optional. If omitted, fill field with a zero, which flags - # the state machine that there is no push state. - if ($state_push_state[$state] eq "") { - print "0, "; - } else { - print " $states{$state_push_state[$state]},"; - } - print " $state_flag[$state]} "; - - # Put out a C++ comment showing the number (index) of this state row, - # and, if this is the first row of the table for this state, the state name. - print " // $state "; - if ($stateNames[$state] ne "") { - print " $stateNames[$state]"; - } - print "\n"; - }; - print " };\n"; - - - # - # emit a mapping array from state numbers to state names. - # - # This array is used for producing debugging output from the rule parser. - # - print "#ifdef RBBI_DEBUG\n"; - print "static const char * const RBBIRuleStateNames[] = {"; - for ($state=0; $state<$num_states; $state++) { - if ($stateNames[$state] ne "") { - print " \"$stateNames[$state]\",\n"; - } else { - print " 0,\n"; - } - } - print " 0};\n"; - print "#endif\n\n"; - - print "U_NAMESPACE_END\n"; - print "#endif\n"; -} diff --git a/deps/node/deps/icu-small/source/common/rbbidata.cpp b/deps/node/deps/icu-small/source/common/rbbidata.cpp deleted file mode 100644 index fdcb5649..00000000 --- a/deps/node/deps/icu-small/source/common/rbbidata.cpp +++ /dev/null @@ -1,425 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -*************************************************************************** -* Copyright (C) 1999-2014 International Business Machines Corporation * -* and others. All rights reserved. * -*************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/utypes.h" -#include "rbbidata.h" -#include "rbbirb.h" -#include "utrie2.h" -#include "udatamem.h" -#include "cmemory.h" -#include "cstring.h" -#include "umutex.h" - -#include "uassert.h" - - -U_NAMESPACE_BEGIN - -//----------------------------------------------------------------------------- -// -// Constructors. -// -//----------------------------------------------------------------------------- -RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) { - init0(); - init(data, status); -} - -RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) { - init0(); - init(data, status); - fDontFreeData = TRUE; -} - -RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { - init0(); - if (U_FAILURE(status)) { - return; - } - const DataHeader *dh = udm->pHeader; - int32_t headerSize = dh->dataHeader.headerSize; - if ( !(headerSize >= 20 && - dh->info.isBigEndian == U_IS_BIG_ENDIAN && - dh->info.charsetFamily == U_CHARSET_FAMILY && - dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk " - dh->info.dataFormat[1] == 0x72 && - dh->info.dataFormat[2] == 0x6b && - dh->info.dataFormat[3] == 0x20 && - isDataVersionAcceptable(dh->info.formatVersion)) - ) { - status = U_INVALID_FORMAT_ERROR; - return; - } - const char *dataAsBytes = reinterpret_cast(dh); - const RBBIDataHeader *rbbidh = reinterpret_cast(dataAsBytes + headerSize); - init(rbbidh, status); - fUDataMem = udm; -} - -UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) { - return RBBI_DATA_FORMAT_VERSION[0] == version[0]; -} - - -//----------------------------------------------------------------------------- -// -// init(). Does most of the work of construction, shared between the -// constructors. -// -//----------------------------------------------------------------------------- -void RBBIDataWrapper::init0() { - fHeader = NULL; - fForwardTable = NULL; - fReverseTable = NULL; - fRuleSource = NULL; - fRuleStatusTable = NULL; - fTrie = NULL; - fUDataMem = NULL; - fRefCount = 0; - fDontFreeData = TRUE; -} - -void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - fHeader = data; - if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) { - status = U_INVALID_FORMAT_ERROR; - return; - } - // Note: in ICU version 3.2 and earlier, there was a formatVersion 1 - // that is no longer supported. At that time fFormatVersion was - // an int32_t field, rather than an array of 4 bytes. - - fDontFreeData = FALSE; - if (data->fFTableLen != 0) { - fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable); - } - if (data->fRTableLen != 0) { - fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable); - } - - fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, - (uint8_t *)data + fHeader->fTrie, - fHeader->fTrieLen, - NULL, // *actual length - &status); - if (U_FAILURE(status)) { - return; - } - - fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource); - fRuleString.setTo(TRUE, fRuleSource, -1); - U_ASSERT(data->fRuleSourceLen > 0); - - fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable); - fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t); - - fRefCount = 1; - -#ifdef RBBI_DEBUG - char *debugEnv = getenv("U_RBBIDEBUG"); - if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();} -#endif -} - - -//----------------------------------------------------------------------------- -// -// Destructor. Don't call this - use removeReference() instead. -// -//----------------------------------------------------------------------------- -RBBIDataWrapper::~RBBIDataWrapper() { - U_ASSERT(fRefCount == 0); - utrie2_close(fTrie); - fTrie = NULL; - if (fUDataMem) { - udata_close(fUDataMem); - } else if (!fDontFreeData) { - uprv_free((void *)fHeader); - } -} - - - -//----------------------------------------------------------------------------- -// -// Operator == Consider two RBBIDataWrappers to be equal if they -// refer to the same underlying data. Although -// the data wrappers are normally shared between -// iterator instances, it's possible to independently -// open the same data twice, and get two instances, which -// should still be ==. -// -//----------------------------------------------------------------------------- -UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const { - if (fHeader == other.fHeader) { - return TRUE; - } - if (fHeader->fLength != other.fHeader->fLength) { - return FALSE; - } - if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) { - return TRUE; - } - return FALSE; -} - -int32_t RBBIDataWrapper::hashCode() { - return fHeader->fFTableLen; -} - - - -//----------------------------------------------------------------------------- -// -// Reference Counting. A single RBBIDataWrapper object is shared among -// however many RulesBasedBreakIterator instances are -// referencing the same data. -// -//----------------------------------------------------------------------------- -void RBBIDataWrapper::removeReference() { - if (umtx_atomic_dec(&fRefCount) == 0) { - delete this; - } -} - - -RBBIDataWrapper *RBBIDataWrapper::addReference() { - umtx_atomic_inc(&fRefCount); - return this; -} - - - -//----------------------------------------------------------------------------- -// -// getRuleSourceString -// -//----------------------------------------------------------------------------- -const UnicodeString &RBBIDataWrapper::getRuleSourceString() const { - return fRuleString; -} - - -//----------------------------------------------------------------------------- -// -// print - debugging function to dump the runtime data tables. -// -//----------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) { - uint32_t c; - uint32_t s; - - RBBIDebugPrintf(" %s\n", heading); - - RBBIDebugPrintf("State | Acc LA TagIx"); - for (c=0; cfCatCount; c++) {RBBIDebugPrintf("%3d ", c);} - RBBIDebugPrintf("\n------|---------------"); for (c=0;cfCatCount; c++) { - RBBIDebugPrintf("----"); - } - RBBIDebugPrintf("\n"); - - if (table == NULL) { - RBBIDebugPrintf(" N U L L T A B L E\n\n"); - return; - } - for (s=0; sfNumStates; s++) { - RBBIStateTableRow *row = (RBBIStateTableRow *) - (table->fTableData + (table->fRowLen * s)); - RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx); - for (c=0; cfCatCount; c++) { - RBBIDebugPrintf("%3d ", row->fNextState[c]); - } - RBBIDebugPrintf("\n"); - } - RBBIDebugPrintf("\n"); -} -#endif - - -void RBBIDataWrapper::printData() { -#ifdef RBBI_DEBUG - RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader); - RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1], - fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]); - RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength); - RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount); - - printTable("Forward State Transition Table", fForwardTable); - printTable("Reverse State Transition Table", fReverseTable); - - RBBIDebugPrintf("\nOrignal Rules source:\n"); - for (int32_t c=0; fRuleSource[c] != 0; c++) { - RBBIDebugPrintf("%c", fRuleSource[c]); - } - RBBIDebugPrintf("\n\n"); -#endif -} - - -U_NAMESPACE_END -U_NAMESPACE_USE - -//----------------------------------------------------------------------------- -// -// ubrk_swap - byte swap and char encoding swap of RBBI data -// -//----------------------------------------------------------------------------- - -U_CAPI int32_t U_EXPORT2 -ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, - UErrorCode *status) { - - if (status == NULL || U_FAILURE(*status)) { - return 0; - } - if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - // - // Check that the data header is for for break data. - // (Header contents are defined in genbrk.cpp) - // - const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); - if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */ - pInfo->dataFormat[1]==0x72 && - pInfo->dataFormat[2]==0x6b && - pInfo->dataFormat[3]==0x20 && - RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) { - udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *status=U_UNSUPPORTED_ERROR; - return 0; - } - - // - // Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific - // RBBIDataHeader). This swap also conveniently gets us - // the size of the ICU d.h., which lets us locate the start - // of the RBBI specific data. - // - int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); - - - // - // Get the RRBI Data Header, and check that it appears to be OK. - // - const uint8_t *inBytes =(const uint8_t *)inData+headerSize; - RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes; - if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || - !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) || - ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) { - udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); - *status=U_UNSUPPORTED_ERROR; - return 0; - } - - // - // Prefight operation? Just return the size - // - int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength); - int32_t totalSize = headerSize + breakDataLength; - if (length < 0) { - return totalSize; - } - - // - // Check that length passed in is consistent with length from RBBI data header. - // - if (length < totalSize) { - udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n", - breakDataLength); - *status=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - - // - // Swap the Data. Do the data itself first, then the RBBI Data Header, because - // we need to reference the header to locate the data, and an - // inplace swap of the header leaves it unusable. - // - uint8_t *outBytes = (uint8_t *)outData + headerSize; - RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes; - - int32_t tableStartOffset; - int32_t tableLength; - - // - // If not swapping in place, zero out the output buffer before starting. - // Individual tables and other data items within are aligned to 8 byte boundaries - // when originally created. Any unused space between items needs to be zero. - // - if (inBytes != outBytes) { - uprv_memset(outBytes, 0, breakDataLength); - } - - // - // Each state table begins with several 32 bit fields. Calculate the size - // in bytes of these. - // - int32_t topSize = offsetof(RBBIStateTable, fTableData); - - // Forward state table. - tableStartOffset = ds->readUInt32(rbbiDH->fFTable); - tableLength = ds->readUInt32(rbbiDH->fFTableLen); - - if (tableLength > 0) { - ds->swapArray32(ds, inBytes+tableStartOffset, topSize, - outBytes+tableStartOffset, status); - ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, - outBytes+tableStartOffset+topSize, status); - } - - // Reverse state table. Same layout as forward table, above. - tableStartOffset = ds->readUInt32(rbbiDH->fRTable); - tableLength = ds->readUInt32(rbbiDH->fRTableLen); - - if (tableLength > 0) { - ds->swapArray32(ds, inBytes+tableStartOffset, topSize, - outBytes+tableStartOffset, status); - ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, - outBytes+tableStartOffset+topSize, status); - } - - // Trie table for character categories - utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), - outBytes+ds->readUInt32(rbbiDH->fTrie), status); - - // Source Rules Text. It's UChar data - ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen), - outBytes+ds->readUInt32(rbbiDH->fRuleSource), status); - - // Table of rule status values. It's all int_32 values - ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen), - outBytes+ds->readUInt32(rbbiDH->fStatusTable), status); - - // And, last, the header. - // It is all int32_t values except for fFormataVersion, which is an array of four bytes. - // Swap the whole thing as int32_t, then re-swap the one field. - // - ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status); - ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status); - - return totalSize; -} - - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbidata.h b/deps/node/deps/icu-small/source/common/rbbidata.h deleted file mode 100644 index 3e573039..00000000 --- a/deps/node/deps/icu-small/source/common/rbbidata.h +++ /dev/null @@ -1,197 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2014 International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: rbbidata.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* RBBI data formats Includes -* -* Structs that describes the format of the Binary RBBI data, -* as it is stored in ICU's data file. -* -* RBBIDataWrapper - Instances of this class sit between the -* raw data structs and the RulesBasedBreakIterator objects -* that are created by applications. The wrapper class -* provides reference counting for the underlying data, -* and direct pointers to data that would not otherwise -* be accessible without ugly pointer arithmetic. The -* wrapper does not attempt to provide any higher level -* abstractions for the data itself. -* -* There will be only one instance of RBBIDataWrapper for any -* set of RBBI run time data being shared by instances -* (clones) of RulesBasedBreakIterator. -*/ - -#ifndef __RBBIDATA_H__ -#define __RBBIDATA_H__ - -#include "unicode/utypes.h" -#include "unicode/udata.h" -#include "udataswp.h" - -/** - * Swap RBBI data. See udataswp.h. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ubrk_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -#ifdef __cplusplus - -#include "unicode/uobject.h" -#include "unicode/unistr.h" -#include "unicode/uversion.h" -#include "umutex.h" -#include "utrie2.h" - -U_NAMESPACE_BEGIN - -// The current RBBI data format version. -static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {5, 0, 0, 0}; - -/* - * The following structs map exactly onto the raw data from ICU common data file. - */ -struct RBBIDataHeader { - uint32_t fMagic; /* == 0xbla0 */ - UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */ - /* if there is one associated with this data. */ - /* (version originates in rbbi, is copied to UDataInfo) */ - uint32_t fLength; /* Total length in bytes of this RBBI Data, */ - /* including all sections, not just the header. */ - uint32_t fCatCount; /* Number of character categories. */ - - /* */ - /* Offsets and sizes of each of the subsections within the RBBI data. */ - /* All offsets are bytes from the start of the RBBIDataHeader. */ - /* All sizes are in bytes. */ - /* */ - uint32_t fFTable; /* forward state transition table. */ - uint32_t fFTableLen; - uint32_t fRTable; /* Offset to the reverse state transition table. */ - uint32_t fRTableLen; - uint32_t fTrie; /* Offset to Trie data for character categories */ - uint32_t fTrieLen; - uint32_t fRuleSource; /* Offset to the source for for the break */ - uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ - uint32_t fStatusTable; /* Offset to the table of rule status values */ - uint32_t fStatusTableLen; - - uint32_t fReserved[6]; /* Reserved for expansion */ - -}; - - - -struct RBBIStateTableRow { - int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ - /* Value 0: not an accepting state. */ - /* -1: Unconditional Accepting state. */ - /* positive: Look-ahead match has completed. */ - /* Actual boundary position happened earlier */ - /* Value here == fLookAhead in earlier */ - /* state, at actual boundary pos. */ - int16_t fLookAhead; /* Non-zero if this row is for a state that */ - /* corresponds to a '/' in the rule source. */ - /* Value is the same as the fAccepting */ - /* value for the rule (which will appear */ - /* in a different state. */ - int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ - /* from a rule. Value is the index in the */ - /* StatusTable of the set of matching */ - /* tags (rule status values) */ - int16_t fReserved; - uint16_t fNextState[1]; /* Next State, indexed by char category. */ - /* Variable-length array declared with length 1 */ - /* to disable bounds checkers. */ - /* Array Size is actually fData->fHeader->fCatCount*/ - /* CAUTION: see RBBITableBuilder::getTableSize() */ - /* before changing anything here. */ -}; - - -struct RBBIStateTable { - uint32_t fNumStates; /* Number of states. */ - uint32_t fRowLen; /* Length of a state table row, in bytes. */ - uint32_t fFlags; /* Option Flags for this state table */ - uint32_t fReserved; /* reserved */ - char fTableData[1]; /* First RBBIStateTableRow begins here. */ - /* Variable-length array declared with length 1 */ - /* to disable bounds checkers. */ - /* (making it char[] simplifies ugly address */ - /* arithmetic for indexing variable length rows.) */ -}; - -typedef enum { - RBBI_LOOKAHEAD_HARD_BREAK = 1, - RBBI_BOF_REQUIRED = 2 -} RBBIStateTableFlags; - - -/* */ -/* The reference counting wrapper class */ -/* */ -class RBBIDataWrapper : public UMemory { -public: - enum EDontAdopt { - kDontAdopt - }; - RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); - RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); - RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); - ~RBBIDataWrapper(); - - static UBool isDataVersionAcceptable(const UVersionInfo version); - - void init0(); - void init(const RBBIDataHeader *data, UErrorCode &status); - RBBIDataWrapper *addReference(); - void removeReference(); - UBool operator ==(const RBBIDataWrapper &other) const; - int32_t hashCode(); - const UnicodeString &getRuleSourceString() const; - void printData(); - void printTable(const char *heading, const RBBIStateTable *table); - - /* */ - /* Pointers to items within the data */ - /* */ - const RBBIDataHeader *fHeader; - const RBBIStateTable *fForwardTable; - const RBBIStateTable *fReverseTable; - const UChar *fRuleSource; - const int32_t *fRuleStatusTable; - - /* number of int32_t values in the rule status table. Used to sanity check indexing */ - int32_t fStatusMaxIdx; - - UTrie2 *fTrie; - -private: - u_atomic_int32_t fRefCount; - UDataMemory *fUDataMem; - UnicodeString fRuleString; - UBool fDontFreeData; - - RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ - RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ -}; - - - -U_NAMESPACE_END - -#endif /* C++ */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/rbbinode.cpp b/deps/node/deps/icu-small/source/common/rbbinode.cpp deleted file mode 100644 index 2181d81a..00000000 --- a/deps/node/deps/icu-small/source/common/rbbinode.cpp +++ /dev/null @@ -1,372 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -*************************************************************************** -* Copyright (C) 2002-2016 International Business Machines Corporation * -* and others. All rights reserved. * -*************************************************************************** -*/ - -// -// File: rbbinode.cpp -// -// Implementation of class RBBINode, which represents a node in the -// tree generated when parsing the Rules Based Break Iterator rules. -// -// This "Class" is actually closer to a struct. -// Code using it is expected to directly access fields much of the time. -// - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/unistr.h" -#include "unicode/uniset.h" -#include "unicode/uchar.h" -#include "unicode/parsepos.h" - -#include "cstr.h" -#include "uvector.h" - -#include "rbbirb.h" -#include "rbbinode.h" - -#include "uassert.h" - - -U_NAMESPACE_BEGIN - -#ifdef RBBI_DEBUG -static int gLastSerial = 0; -#endif - - -//------------------------------------------------------------------------- -// -// Constructor. Just set the fields to reasonable default values. -// -//------------------------------------------------------------------------- -RBBINode::RBBINode(NodeType t) : UMemory() { -#ifdef RBBI_DEBUG - fSerialNum = ++gLastSerial; -#endif - fType = t; - fParent = NULL; - fLeftChild = NULL; - fRightChild = NULL; - fInputSet = NULL; - fFirstPos = 0; - fLastPos = 0; - fNullable = FALSE; - fLookAheadEnd = FALSE; - fRuleRoot = FALSE; - fChainIn = FALSE; - fVal = 0; - fPrecedence = precZero; - - UErrorCode status = U_ZERO_ERROR; - fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere - fLastPosSet = new UVector(status); - fFollowPos = new UVector(status); - if (t==opCat) {fPrecedence = precOpCat;} - else if (t==opOr) {fPrecedence = precOpOr;} - else if (t==opStart) {fPrecedence = precStart;} - else if (t==opLParen) {fPrecedence = precLParen;} - -} - - -RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { -#ifdef RBBI_DEBUG - fSerialNum = ++gLastSerial; -#endif - fType = other.fType; - fParent = NULL; - fLeftChild = NULL; - fRightChild = NULL; - fInputSet = other.fInputSet; - fPrecedence = other.fPrecedence; - fText = other.fText; - fFirstPos = other.fFirstPos; - fLastPos = other.fLastPos; - fNullable = other.fNullable; - fVal = other.fVal; - fRuleRoot = FALSE; - fChainIn = other.fChainIn; - UErrorCode status = U_ZERO_ERROR; - fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere - fLastPosSet = new UVector(status); - fFollowPos = new UVector(status); -} - - -//------------------------------------------------------------------------- -// -// Destructor. Deletes both this node AND any child nodes, -// except in the case of variable reference nodes. For -// these, the l. child points back to the definition, which -// is common for all references to the variable, meaning -// it can't be deleted here. -// -//------------------------------------------------------------------------- -RBBINode::~RBBINode() { - // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); - delete fInputSet; - fInputSet = NULL; - - switch (this->fType) { - case varRef: - case setRef: - // for these node types, multiple instances point to the same "children" - // Storage ownership of children handled elsewhere. Don't delete here. - break; - - default: - delete fLeftChild; - fLeftChild = NULL; - delete fRightChild; - fRightChild = NULL; - } - - - delete fFirstPosSet; - delete fLastPosSet; - delete fFollowPos; - -} - - -//------------------------------------------------------------------------- -// -// cloneTree Make a copy of the subtree rooted at this node. -// Discard any variable references encountered along the way, -// and replace with copies of the variable's definitions. -// Used to replicate the expression underneath variable -// references in preparation for generating the DFA tables. -// -//------------------------------------------------------------------------- -RBBINode *RBBINode::cloneTree() { - RBBINode *n; - - if (fType == RBBINode::varRef) { - // If the current node is a variable reference, skip over it - // and clone the definition of the variable instead. - n = fLeftChild->cloneTree(); - } else if (fType == RBBINode::uset) { - n = this; - } else { - n = new RBBINode(*this); - // Check for null pointer. - if (n != NULL) { - if (fLeftChild != NULL) { - n->fLeftChild = fLeftChild->cloneTree(); - n->fLeftChild->fParent = n; - } - if (fRightChild != NULL) { - n->fRightChild = fRightChild->cloneTree(); - n->fRightChild->fParent = n; - } - } - } - return n; -} - - - -//------------------------------------------------------------------------- -// -// flattenVariables Walk a parse tree, replacing any variable -// references with a copy of the variable's definition. -// Aside from variables, the tree is not changed. -// -// Return the root of the tree. If the root was not a variable -// reference, it remains unchanged - the root we started with -// is the root we return. If, however, the root was a variable -// reference, the root of the newly cloned replacement tree will -// be returned, and the original tree deleted. -// -// This function works by recursively walking the tree -// without doing anything until a variable reference is -// found, then calling cloneTree() at that point. Any -// nested references are handled by cloneTree(), not here. -// -//------------------------------------------------------------------------- -RBBINode *RBBINode::flattenVariables() { - if (fType == varRef) { - RBBINode *retNode = fLeftChild->cloneTree(); - if (retNode != NULL) { - retNode->fRuleRoot = this->fRuleRoot; - retNode->fChainIn = this->fChainIn; - } - delete this; // TODO: undefined behavior. Fix. - return retNode; - } - - if (fLeftChild != NULL) { - fLeftChild = fLeftChild->flattenVariables(); - fLeftChild->fParent = this; - } - if (fRightChild != NULL) { - fRightChild = fRightChild->flattenVariables(); - fRightChild->fParent = this; - } - return this; -} - - -//------------------------------------------------------------------------- -// -// flattenSets Walk the parse tree, replacing any nodes of type setRef -// with a copy of the expression tree for the set. A set's -// equivalent expression tree is precomputed and saved as -// the left child of the uset node. -// -//------------------------------------------------------------------------- -void RBBINode::flattenSets() { - U_ASSERT(fType != setRef); - - if (fLeftChild != NULL) { - if (fLeftChild->fType==setRef) { - RBBINode *setRefNode = fLeftChild; - RBBINode *usetNode = setRefNode->fLeftChild; - RBBINode *replTree = usetNode->fLeftChild; - fLeftChild = replTree->cloneTree(); - fLeftChild->fParent = this; - delete setRefNode; - } else { - fLeftChild->flattenSets(); - } - } - - if (fRightChild != NULL) { - if (fRightChild->fType==setRef) { - RBBINode *setRefNode = fRightChild; - RBBINode *usetNode = setRefNode->fLeftChild; - RBBINode *replTree = usetNode->fLeftChild; - fRightChild = replTree->cloneTree(); - fRightChild->fParent = this; - delete setRefNode; - } else { - fRightChild->flattenSets(); - } - } -} - - - -//------------------------------------------------------------------------- -// -// findNodes() Locate all the nodes of the specified type, starting -// at the specified root. -// -//------------------------------------------------------------------------- -void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { - /* test for buffer overflows */ - if (U_FAILURE(status)) { - return; - } - if (fType == kind) { - dest->addElement(this, status); - } - if (fLeftChild != NULL) { - fLeftChild->findNodes(dest, kind, status); - } - if (fRightChild != NULL) { - fRightChild->findNodes(dest, kind, status); - } -} - - -//------------------------------------------------------------------------- -// -// print. Print out a single node, for debugging. -// -//------------------------------------------------------------------------- -#ifdef RBBI_DEBUG - -static int32_t serial(const RBBINode *node) { - return (node == NULL? -1 : node->fSerialNum); -} - - -void RBBINode::printNode(const RBBINode *node) { - static const char * const nodeTypeNames[] = { - "setRef", - "uset", - "varRef", - "leafChar", - "lookAhead", - "tag", - "endMark", - "opStart", - "opCat", - "opOr", - "opStar", - "opPlus", - "opQuestion", - "opBreak", - "opReverse", - "opLParen" - }; - - if (node==NULL) { - RBBIDebugPrintf("%10p", (void *)node); - } else { - RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", - (void *)node, node->fSerialNum, nodeTypeNames[node->fType], - node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', - serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), - node->fFirstPos, node->fVal); - if (node->fType == varRef) { - RBBI_DEBUG_printUnicodeString(node->fText); - } - } - RBBIDebugPrintf("\n"); -} -#endif - - -#ifdef RBBI_DEBUG -U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { - RBBIDebugPrintf("%*s", minWidth, CStr(s)()); -} -#endif - - -//------------------------------------------------------------------------- -// -// print. Print out the tree of nodes rooted at "this" -// -//------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -void RBBINode::printNodeHeader() { - RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); -} - -void RBBINode::printTree(const RBBINode *node, UBool printHeading) { - if (printHeading) { - printNodeHeader(); - } - printNode(node); - if (node != NULL) { - // Only dump the definition under a variable reference if asked to. - // Unconditinally dump children of all other node types. - if (node->fType != varRef) { - if (node->fLeftChild != NULL) { - printTree(node->fLeftChild, FALSE); - } - - if (node->fRightChild != NULL) { - printTree(node->fRightChild, FALSE); - } - } - } -} -#endif - - - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbinode.h b/deps/node/deps/icu-small/source/common/rbbinode.h deleted file mode 100644 index e3366216..00000000 --- a/deps/node/deps/icu-small/source/common/rbbinode.h +++ /dev/null @@ -1,126 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 2001-2016, International Business Machines Corporation and - * others. All Rights Reserved. - ********************************************************************/ - -#ifndef RBBINODE_H -#define RBBINODE_H - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "unicode/uobject.h" - -// -// class RBBINode -// -// Represents a node in the parse tree generated when reading -// a rule file. -// - -U_NAMESPACE_BEGIN - -class UnicodeSet; -class UVector; - -class RBBINode : public UMemory { - public: - enum NodeType { - setRef, - uset, - varRef, - leafChar, - lookAhead, - tag, - endMark, - opStart, - opCat, - opOr, - opStar, - opPlus, - opQuestion, - opBreak, - opReverse, - opLParen - }; - - enum OpPrecedence { - precZero, - precStart, - precLParen, - precOpOr, - precOpCat - }; - - NodeType fType; - RBBINode *fParent; - RBBINode *fLeftChild; - RBBINode *fRightChild; - UnicodeSet *fInputSet; // For uset nodes only. - OpPrecedence fPrecedence; // For binary ops only. - - UnicodeString fText; // Text corresponding to this node. - // May be lazily evaluated when (if) needed - // for some node types. - int fFirstPos; // Position in the rule source string of the - // first text associated with the node. - // If there's a left child, this will be the same - // as that child's left pos. - int fLastPos; // Last position in the rule source string - // of any text associated with this node. - // If there's a right child, this will be the same - // as that child's last postion. - - UBool fNullable; // See Aho. - int32_t fVal; // For leafChar nodes, the value. - // Values are the character category, - // corresponds to columns in the final - // state transition table. - - UBool fLookAheadEnd; // For endMark nodes, set TRUE if - // marking the end of a look-ahead rule. - - UBool fRuleRoot; // True if this node is the root of a rule. - UBool fChainIn; // True if chaining into this rule is allowed - // (no '^' present). - - UVector *fFirstPosSet; - UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion. - UVector *fFollowPos; - - - RBBINode(NodeType t); - RBBINode(const RBBINode &other); - ~RBBINode(); - - RBBINode *cloneTree(); - RBBINode *flattenVariables(); - void flattenSets(); - void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status); - -#ifdef RBBI_DEBUG - static void printNodeHeader(); - static void printNode(const RBBINode *n); - static void printTree(const RBBINode *n, UBool withHeading); -#endif - - private: - RBBINode &operator = (const RBBINode &other); // No defs. - UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used. - -#ifdef RBBI_DEBUG - public: - int fSerialNum; // Debugging aids. -#endif -}; - -#ifdef RBBI_DEBUG -U_CFUNC void -RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0); -#endif - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/rbbirb.cpp b/deps/node/deps/icu-small/source/common/rbbirb.cpp deleted file mode 100644 index 5f5661af..00000000 --- a/deps/node/deps/icu-small/source/common/rbbirb.cpp +++ /dev/null @@ -1,328 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// file: rbbirb.cpp -// -// Copyright (C) 2002-2011, International Business Machines Corporation and others. -// All Rights Reserved. -// -// This file contains the RBBIRuleBuilder class implementation. This is the main class for -// building (compiling) break rules into the tables required by the runtime -// RBBI engine. -// - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/brkiter.h" -#include "unicode/rbbi.h" -#include "unicode/ubrk.h" -#include "unicode/unistr.h" -#include "unicode/uniset.h" -#include "unicode/uchar.h" -#include "unicode/uchriter.h" -#include "unicode/parsepos.h" -#include "unicode/parseerr.h" - -#include "cmemory.h" -#include "cstring.h" -#include "rbbirb.h" -#include "rbbinode.h" -#include "rbbiscan.h" -#include "rbbisetb.h" -#include "rbbitblb.h" -#include "rbbidata.h" -#include "uassert.h" - - -U_NAMESPACE_BEGIN - - -//---------------------------------------------------------------------------------------- -// -// Constructor. -// -//---------------------------------------------------------------------------------------- -RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, - UParseError *parseErr, - UErrorCode &status) - : fRules(rules), fStrippedRules(rules) -{ - fStatus = &status; // status is checked below - fParseError = parseErr; - fDebugEnv = NULL; -#ifdef RBBI_DEBUG - fDebugEnv = getenv("U_RBBIDEBUG"); -#endif - - - fForwardTree = NULL; - fReverseTree = NULL; - fSafeFwdTree = NULL; - fSafeRevTree = NULL; - fDefaultTree = &fForwardTree; - fForwardTable = NULL; - fRuleStatusVals = NULL; - fChainRules = FALSE; - fLBCMNoChain = FALSE; - fLookAheadHardBreak = FALSE; - fUSetNodes = NULL; - fRuleStatusVals = NULL; - fScanner = NULL; - fSetBuilder = NULL; - if (parseErr) { - uprv_memset(parseErr, 0, sizeof(UParseError)); - } - - if (U_FAILURE(status)) { - return; - } - - fUSetNodes = new UVector(status); // bcos status gets overwritten here - fRuleStatusVals = new UVector(status); - fScanner = new RBBIRuleScanner(this); - fSetBuilder = new RBBISetBuilder(this); - if (U_FAILURE(status)) { - return; - } - if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - } -} - - - -//---------------------------------------------------------------------------------------- -// -// Destructor -// -//---------------------------------------------------------------------------------------- -RBBIRuleBuilder::~RBBIRuleBuilder() { - - int i; - for (i=0; ; i++) { - RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i); - if (n==NULL) { - break; - } - delete n; - } - - delete fUSetNodes; - delete fSetBuilder; - delete fForwardTable; - delete fForwardTree; - delete fReverseTree; - delete fSafeFwdTree; - delete fSafeRevTree; - delete fScanner; - delete fRuleStatusVals; -} - - - - - -//---------------------------------------------------------------------------------------- -// -// flattenData() - Collect up the compiled RBBI rule data and put it into -// the format for saving in ICU data files, -// which is also the format needed by the RBBI runtime engine. -// -//---------------------------------------------------------------------------------------- -static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;} - -RBBIDataHeader *RBBIRuleBuilder::flattenData() { - int32_t i; - - if (U_FAILURE(*fStatus)) { - return NULL; - } - - // Remove whitespace from the rules to make it smaller. - // The rule parser has already removed comments. - fStrippedRules = fScanner->stripRules(fStrippedRules); - - // Calculate the size of each section in the data. - // Sizes here are padded up to a multiple of 8 for better memory alignment. - // Sections sizes actually stored in the header are for the actual data - // without the padding. - // - int32_t headerSize = align8(sizeof(RBBIDataHeader)); - int32_t forwardTableSize = align8(fForwardTable->getTableSize()); - int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize()); - int32_t trieSize = align8(fSetBuilder->getTrieSize()); - int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); - int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar)); - - int32_t totalSize = headerSize - + forwardTableSize - + reverseTableSize - + statusTableSize + trieSize + rulesSize; - - RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize); - if (data == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(data, 0, totalSize); - - - data->fMagic = 0xb1a0; - data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0]; - data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1]; - data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2]; - data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3]; - data->fLength = totalSize; - data->fCatCount = fSetBuilder->getNumCharCategories(); - - data->fFTable = headerSize; - data->fFTableLen = forwardTableSize; - - data->fRTable = data->fFTable + data->fFTableLen; - data->fRTableLen = reverseTableSize; - - data->fTrie = data->fRTable + data->fRTableLen; - data->fTrieLen = fSetBuilder->getTrieSize(); - data->fStatusTable = data->fTrie + trieSize; - data->fStatusTableLen= statusTableSize; - data->fRuleSource = data->fStatusTable + statusTableSize; - data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar); - - uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); - - fForwardTable->exportTable((uint8_t *)data + data->fFTable); - fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable); - fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie); - - int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable); - for (i=0; isize(); i++) { - ruleStatusTable[i] = fRuleStatusVals->elementAti(i); - } - - fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); - - return data; -} - - -//---------------------------------------------------------------------------------------- -// -// createRuleBasedBreakIterator construct from source rules that are passed in -// in a UnicodeString -// -//---------------------------------------------------------------------------------------- -BreakIterator * -RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, - UParseError *parseError, - UErrorCode &status) -{ - // - // Read the input rules, generate a parse tree, symbol table, - // and list of all Unicode Sets referenced by the rules. - // - RBBIRuleBuilder builder(rules, parseError, status); - if (U_FAILURE(status)) { // status checked here bcos build below doesn't - return NULL; - } - - RBBIDataHeader *data = builder.build(status); - - if (U_FAILURE(status)) { - return nullptr; - } - - // - // Create a break iterator from the compiled rules. - // (Identical to creation from stored pre-compiled rules) - // - // status is checked after init in construction. - RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status); - if (U_FAILURE(status)) { - delete This; - This = NULL; - } - else if(This == NULL) { // test for NULL - status = U_MEMORY_ALLOCATION_ERROR; - } - return This; -} - -RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { - if (U_FAILURE(status)) { - return nullptr; - } - - fScanner->parse(); - if (U_FAILURE(status)) { - return nullptr; - } - - // - // UnicodeSet processing. - // Munge the Unicode Sets to create a set of character categories. - // Generate the mapping tables (TRIE) from input code points to - // the character categories. - // - fSetBuilder->buildRanges(); - - // - // Generate the DFA state transition table. - // - fForwardTable = new RBBITableBuilder(this, &fForwardTree, status); - if (fForwardTable == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - - fForwardTable->buildForwardTable(); - optimizeTables(); - fForwardTable->buildSafeReverseTable(status); - - -#ifdef RBBI_DEBUG - if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) { - fForwardTable->printStates(); - fForwardTable->printRuleStatusTable(); - fForwardTable->printReverseTable(); - } -#endif - - fSetBuilder->buildTrie(); - - // - // Package up the compiled data into a memory image - // in the run-time format. - // - RBBIDataHeader *data = flattenData(); // returns NULL if error - if (U_FAILURE(status)) { - return nullptr; - } - return data; -} - -void RBBIRuleBuilder::optimizeTables() { - bool didSomething; - do { - didSomething = false; - - // Begin looking for duplicates with char class 3. - // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively, - // and should not have other categories merged into them. - IntPair duplPair = {3, 0}; - while (fForwardTable->findDuplCharClassFrom(&duplPair)) { - fSetBuilder->mergeCategories(duplPair); - fForwardTable->removeColumn(duplPair.second); - didSomething = true; - } - - while (fForwardTable->removeDuplicateStates() > 0) { - didSomething = true; - } - } while (didSomething); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbirb.h b/deps/node/deps/icu-small/source/common/rbbirb.h deleted file mode 100644 index 37992daa..00000000 --- a/deps/node/deps/icu-small/source/common/rbbirb.h +++ /dev/null @@ -1,234 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// rbbirb.h -// -// Copyright (C) 2002-2008, International Business Machines Corporation and others. -// All Rights Reserved. -// -// This file contains declarations for several classes from the -// Rule Based Break Iterator rule builder. -// - - -#ifndef RBBIRB_H -#define RBBIRB_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include - -#include "unicode/uobject.h" -#include "unicode/rbbi.h" -#include "unicode/uniset.h" -#include "unicode/parseerr.h" -#include "uhash.h" -#include "uvector.h" -#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that - // looks up references to $variables within a set. - - -U_NAMESPACE_BEGIN - -class RBBIRuleScanner; -struct RBBIRuleTableEl; -class RBBISetBuilder; -class RBBINode; -class RBBITableBuilder; - - - -//-------------------------------------------------------------------------------- -// -// RBBISymbolTable. Implements SymbolTable interface that is used by the -// UnicodeSet parser to resolve references to $variables. -// -//-------------------------------------------------------------------------------- -class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one -public: // of these structs for each entry. - RBBISymbolTableEntry(); - UnicodeString key; - RBBINode *val; - ~RBBISymbolTableEntry(); - -private: - RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class - RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class -}; - - -class RBBISymbolTable : public UMemory, public SymbolTable { -private: - const UnicodeString &fRules; - UHashtable *fHashTable; - RBBIRuleScanner *fRuleScanner; - - // These next two fields are part of the mechanism for passing references to - // already-constructed UnicodeSets back to the UnicodeSet constructor - // when the pattern includes $variable references. - const UnicodeString ffffString; // = "/uffff" - UnicodeSet *fCachedSetLookup; - -public: - // API inherited from class SymbolTable - virtual const UnicodeString* lookup(const UnicodeString& s) const; - virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; - virtual UnicodeString parseReference(const UnicodeString& text, - ParsePosition& pos, int32_t limit) const; - - // Additional Functions - RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status); - virtual ~RBBISymbolTable(); - - virtual RBBINode *lookupNode(const UnicodeString &key) const; - virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err); - -#ifdef RBBI_DEBUG - virtual void rbbiSymtablePrint() const; -#else - // A do-nothing inline function for non-debug builds. Member funcs can't be empty - // or the call sites won't compile. - int32_t fFakeField; - #define rbbiSymtablePrint() fFakeField=0; -#endif - -private: - RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class - RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class -}; - - -//-------------------------------------------------------------------------------- -// -// class RBBIRuleBuilder The top-level class handling RBBI rule compiling. -// -//-------------------------------------------------------------------------------- -class RBBIRuleBuilder : public UMemory { -public: - - // Create a rule based break iterator from a set of rules. - // This function is the main entry point into the rule builder. The - // public ICU API for creating RBBIs uses this function to do the actual work. - // - static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules, - UParseError *parseError, - UErrorCode &status); - -public: - // The "public" functions and data members that appear below are accessed - // (and shared) by the various parts that make up the rule builder. They - // are NOT intended to be accessed by anything outside of the - // rule builder implementation. - RBBIRuleBuilder(const UnicodeString &rules, - UParseError *parseErr, - UErrorCode &status - ); - - virtual ~RBBIRuleBuilder(); - - /** - * Build the state tables and char class Trie from the source rules. - */ - RBBIDataHeader *build(UErrorCode &status); - - - /** - * Fold together redundant character classes (table columns) and - * redundant states (table rows). Done after initial table generation, - * before serializing the result. - */ - void optimizeTables(); - - char *fDebugEnv; // controls debug trace output - UErrorCode *fStatus; // Error reporting. Keeping status - UParseError *fParseError; // here avoids passing it everywhere. - const UnicodeString &fRules; // The rule string that we are compiling - UnicodeString fStrippedRules; // The rule string, with comments stripped. - - RBBIRuleScanner *fScanner; // The scanner. - RBBINode *fForwardTree; // The parse trees, generated by the scanner, - RBBINode *fReverseTree; // then manipulated by subsequent steps. - RBBINode *fSafeFwdTree; - RBBINode *fSafeRevTree; - - RBBINode **fDefaultTree; // For rules not qualified with a ! - // the tree to which they belong to. - - UBool fChainRules; // True for chained Unicode TR style rules. - // False for traditional regexp rules. - - UBool fLBCMNoChain; // True: suppress chaining of rules on - // chars with LineBreak property == CM. - - UBool fLookAheadHardBreak; // True: Look ahead matches cause an - // immediate break, no continuing for the - // longest match. - - RBBISetBuilder *fSetBuilder; // Set and Character Category builder. - UVector *fUSetNodes; // Vector of all uset nodes. - - RBBITableBuilder *fForwardTable; // State transition table, build time form. - - UVector *fRuleStatusVals; // The values that can be returned - // from getRuleStatus(). - - RBBIDataHeader *flattenData(); // Create the flattened (runtime format) - // data tables.. -private: - RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class - RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class -}; - - - - -//---------------------------------------------------------------------------- -// -// RBBISetTableEl is an entry in the hash table of UnicodeSets that have -// been encountered. The val Node will be of nodetype uset -// and contain pointers to the actual UnicodeSets. -// The Key is the source string for initializing the set. -// -// The hash table is used to avoid creating duplicate -// unnamed (not $var references) UnicodeSets. -// -// Memory Management: -// The Hash Table owns these RBBISetTableEl structs and -// the key strings. It does NOT own the val nodes. -// -//---------------------------------------------------------------------------- -struct RBBISetTableEl { - UnicodeString *key; - RBBINode *val; -}; - -/** - * A pair of ints, used to bundle pairs of states or pairs of character classes. - */ -typedef std::pair IntPair; - - -//---------------------------------------------------------------------------- -// -// RBBIDebugPrintf Printf equivalent, for debugging output. -// Conditional compilation of the implementation lets us -// get rid of the stdio dependency in environments where it -// is unavailable. -// -//---------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -#include -#define RBBIDebugPrintf printf -#define RBBIDebugPuts puts -#else -#undef RBBIDebugPrintf -#define RBBIDebugPuts(arg) -#endif - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/rbbirpt.h b/deps/node/deps/icu-small/source/common/rbbirpt.h deleted file mode 100644 index b94c4c25..00000000 --- a/deps/node/deps/icu-small/source/common/rbbirpt.h +++ /dev/null @@ -1,296 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -//--------------------------------------------------------------------------------- -// -// Generated Header File. Do not edit by hand. -// This file contains the state table for the ICU Rule Based Break Iterator -// rule parser. -// It is generated by the Perl script "rbbicst.pl" from -// the rule parser state definitions file "rbbirpt.txt". -// -// Copyright (C) 2002-2016 International Business Machines Corporation -// and others. All rights reserved. -// -//--------------------------------------------------------------------------------- -#ifndef RBBIRPT_H -#define RBBIRPT_H - -#include "unicode/utypes.h" - -U_NAMESPACE_BEGIN -// -// Character classes for RBBI rule scanning. -// - static const uint8_t kRuleSet_digit_char = 128; - static const uint8_t kRuleSet_name_char = 129; - static const uint8_t kRuleSet_name_start_char = 130; - static const uint8_t kRuleSet_rule_char = 131; - static const uint8_t kRuleSet_white_space = 132; - - -enum RBBI_RuleParseAction { - doCheckVarDef, - doDotAny, - doEndAssign, - doEndOfRule, - doEndVariableName, - doExit, - doExprCatOperator, - doExprFinished, - doExprOrOperator, - doExprRParen, - doExprStart, - doLParen, - doNOP, - doNoChain, - doOptionEnd, - doOptionStart, - doReverseDir, - doRuleChar, - doRuleError, - doRuleErrorAssignExpr, - doScanUnicodeSet, - doSlash, - doStartAssign, - doStartTagValue, - doStartVariableName, - doTagDigit, - doTagExpectedError, - doTagValue, - doUnaryOpPlus, - doUnaryOpQuestion, - doUnaryOpStar, - doVariableNameExpectedErr, - rbbiLastAction}; - -//------------------------------------------------------------------------------- -// -// RBBIRuleTableEl represents the structure of a row in the transition table -// for the rule parser state machine. -//------------------------------------------------------------------------------- -struct RBBIRuleTableEl { - RBBI_RuleParseAction fAction; - uint8_t fCharClass; // 0-127: an individual ASCII character - // 128-255: character class index - uint8_t fNextState; // 0-250: normal next-stat numbers - // 255: pop next-state from stack. - uint8_t fPushState; - UBool fNextChar; -}; - -static const struct RBBIRuleTableEl gRuleParseStateTable[] = { - {doNOP, 0, 0, 0, TRUE} - , {doExprStart, 254, 29, 9, FALSE} // 1 start - , {doNOP, 132, 1,0, TRUE} // 2 - , {doNoChain, 94 /* ^ */, 12, 9, TRUE} // 3 - , {doExprStart, 36 /* $ */, 88, 98, FALSE} // 4 - , {doNOP, 33 /* ! */, 19,0, TRUE} // 5 - , {doNOP, 59 /* ; */, 1,0, TRUE} // 6 - , {doNOP, 252, 0,0, FALSE} // 7 - , {doExprStart, 255, 29, 9, FALSE} // 8 - , {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 9 break-rule-end - , {doNOP, 132, 9,0, TRUE} // 10 - , {doRuleError, 255, 103,0, FALSE} // 11 - , {doExprStart, 254, 29,0, FALSE} // 12 start-after-caret - , {doNOP, 132, 12,0, TRUE} // 13 - , {doRuleError, 94 /* ^ */, 103,0, FALSE} // 14 - , {doExprStart, 36 /* $ */, 88, 37, FALSE} // 15 - , {doRuleError, 59 /* ; */, 103,0, FALSE} // 16 - , {doRuleError, 252, 103,0, FALSE} // 17 - , {doExprStart, 255, 29,0, FALSE} // 18 - , {doNOP, 33 /* ! */, 21,0, TRUE} // 19 rev-option - , {doReverseDir, 255, 28, 9, FALSE} // 20 - , {doOptionStart, 130, 23,0, TRUE} // 21 option-scan1 - , {doRuleError, 255, 103,0, FALSE} // 22 - , {doNOP, 129, 23,0, TRUE} // 23 option-scan2 - , {doOptionEnd, 255, 25,0, FALSE} // 24 - , {doNOP, 59 /* ; */, 1,0, TRUE} // 25 option-scan3 - , {doNOP, 132, 25,0, TRUE} // 26 - , {doRuleError, 255, 103,0, FALSE} // 27 - , {doExprStart, 255, 29, 9, FALSE} // 28 reverse-rule - , {doRuleChar, 254, 38,0, TRUE} // 29 term - , {doNOP, 132, 29,0, TRUE} // 30 - , {doRuleChar, 131, 38,0, TRUE} // 31 - , {doNOP, 91 /* [ */, 94, 38, FALSE} // 32 - , {doLParen, 40 /* ( */, 29, 38, TRUE} // 33 - , {doNOP, 36 /* $ */, 88, 37, FALSE} // 34 - , {doDotAny, 46 /* . */, 38,0, TRUE} // 35 - , {doRuleError, 255, 103,0, FALSE} // 36 - , {doCheckVarDef, 255, 38,0, FALSE} // 37 term-var-ref - , {doNOP, 132, 38,0, TRUE} // 38 expr-mod - , {doUnaryOpStar, 42 /* * */, 43,0, TRUE} // 39 - , {doUnaryOpPlus, 43 /* + */, 43,0, TRUE} // 40 - , {doUnaryOpQuestion, 63 /* ? */, 43,0, TRUE} // 41 - , {doNOP, 255, 43,0, FALSE} // 42 - , {doExprCatOperator, 254, 29,0, FALSE} // 43 expr-cont - , {doNOP, 132, 43,0, TRUE} // 44 - , {doExprCatOperator, 131, 29,0, FALSE} // 45 - , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 46 - , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 47 - , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 48 - , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 49 - , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 50 - , {doExprCatOperator, 123 /* { */, 67,0, TRUE} // 51 - , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 52 - , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 53 - , {doExprFinished, 255, 255,0, FALSE} // 54 - , {doSlash, 47 /* / */, 57,0, TRUE} // 55 look-ahead - , {doNOP, 255, 103,0, FALSE} // 56 - , {doExprCatOperator, 254, 29,0, FALSE} // 57 expr-cont-no-slash - , {doNOP, 132, 43,0, TRUE} // 58 - , {doExprCatOperator, 131, 29,0, FALSE} // 59 - , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 60 - , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 61 - , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 62 - , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 63 - , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 64 - , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 65 - , {doExprFinished, 255, 255,0, FALSE} // 66 - , {doNOP, 132, 67,0, TRUE} // 67 tag-open - , {doStartTagValue, 128, 70,0, FALSE} // 68 - , {doTagExpectedError, 255, 103,0, FALSE} // 69 - , {doNOP, 132, 74,0, TRUE} // 70 tag-value - , {doNOP, 125 /* } */, 74,0, FALSE} // 71 - , {doTagDigit, 128, 70,0, TRUE} // 72 - , {doTagExpectedError, 255, 103,0, FALSE} // 73 - , {doNOP, 132, 74,0, TRUE} // 74 tag-close - , {doTagValue, 125 /* } */, 77,0, TRUE} // 75 - , {doTagExpectedError, 255, 103,0, FALSE} // 76 - , {doExprCatOperator, 254, 29,0, FALSE} // 77 expr-cont-no-tag - , {doNOP, 132, 77,0, TRUE} // 78 - , {doExprCatOperator, 131, 29,0, FALSE} // 79 - , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 80 - , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 81 - , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 82 - , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 83 - , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 84 - , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 85 - , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 86 - , {doExprFinished, 255, 255,0, FALSE} // 87 - , {doStartVariableName, 36 /* $ */, 90,0, TRUE} // 88 scan-var-name - , {doNOP, 255, 103,0, FALSE} // 89 - , {doNOP, 130, 92,0, TRUE} // 90 scan-var-start - , {doVariableNameExpectedErr, 255, 103,0, FALSE} // 91 - , {doNOP, 129, 92,0, TRUE} // 92 scan-var-body - , {doEndVariableName, 255, 255,0, FALSE} // 93 - , {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 94 scan-unicode-set - , {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 95 - , {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 96 - , {doNOP, 255, 103,0, FALSE} // 97 - , {doNOP, 132, 98,0, TRUE} // 98 assign-or-rule - , {doStartAssign, 61 /* = */, 29, 101, TRUE} // 99 - , {doNOP, 255, 37, 9, FALSE} // 100 - , {doEndAssign, 59 /* ; */, 1,0, TRUE} // 101 assign-end - , {doRuleErrorAssignExpr, 255, 103,0, FALSE} // 102 - , {doExit, 255, 103,0, TRUE} // 103 errorDeath - }; -#ifdef RBBI_DEBUG -static const char * const RBBIRuleStateNames[] = { 0, - "start", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "break-rule-end", - 0, - 0, - "start-after-caret", - 0, - 0, - 0, - 0, - 0, - 0, - "rev-option", - 0, - "option-scan1", - 0, - "option-scan2", - 0, - "option-scan3", - 0, - 0, - "reverse-rule", - "term", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "term-var-ref", - "expr-mod", - 0, - 0, - 0, - 0, - "expr-cont", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "look-ahead", - 0, - "expr-cont-no-slash", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "tag-open", - 0, - 0, - "tag-value", - 0, - 0, - 0, - "tag-close", - 0, - 0, - "expr-cont-no-tag", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - "scan-var-name", - 0, - "scan-var-start", - 0, - "scan-var-body", - 0, - "scan-unicode-set", - 0, - 0, - 0, - "assign-or-rule", - 0, - 0, - "assign-end", - 0, - "errorDeath", - 0}; -#endif - -U_NAMESPACE_END -#endif diff --git a/deps/node/deps/icu-small/source/common/rbbiscan.cpp b/deps/node/deps/icu-small/source/common/rbbiscan.cpp deleted file mode 100644 index 170c212e..00000000 --- a/deps/node/deps/icu-small/source/common/rbbiscan.cpp +++ /dev/null @@ -1,1279 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// file: rbbiscan.cpp -// -// Copyright (C) 2002-2016, International Business Machines Corporation and others. -// All Rights Reserved. -// -// This file contains the Rule Based Break Iterator Rule Builder functions for -// scanning the rules and assembling a parse tree. This is the first phase -// of compiling the rules. -// -// The overall of the rules is managed by class RBBIRuleBuilder, which will -// create and use an instance of this class as part of the process. -// - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/unistr.h" -#include "unicode/uniset.h" -#include "unicode/uchar.h" -#include "unicode/uchriter.h" -#include "unicode/parsepos.h" -#include "unicode/parseerr.h" -#include "cmemory.h" -#include "cstring.h" - -#include "rbbirpt.h" // Contains state table for the rbbi rules parser. - // generated by a Perl script. -#include "rbbirb.h" -#include "rbbinode.h" -#include "rbbiscan.h" -#include "rbbitblb.h" - -#include "uassert.h" - -//------------------------------------------------------------------------------ -// -// Unicode Set init strings for each of the character classes needed for parsing a rule file. -// (Initialized with hex values for portability to EBCDIC based machines. -// Really ugly, but there's no good way to avoid it.) -// -// The sets are referred to by name in the rbbirpt.txt, which is the -// source form of the state transition table for the RBBI rule parser. -// -//------------------------------------------------------------------------------ -static const UChar gRuleSet_rule_char_pattern[] = { - // Characters that may appear as literals in patterns without escaping or quoting. - // [ ^ [ \ p { Z } \ u 0 0 2 0 - 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30, - // - \ u 0 0 7 f ] - [ \ p - 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x37, 0x66, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, - // { L } ] - [ \ p { N } ] ] - 0x7b, 0x4c, 0x7d, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0x5d, 0}; - -static const UChar gRuleSet_name_char_pattern[] = { -// [ _ \ p { L } \ p { N } ] - 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0}; - -static const UChar gRuleSet_digit_char_pattern[] = { -// [ 0 - 9 ] - 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0}; - -static const UChar gRuleSet_name_start_char_pattern[] = { -// [ _ \ p { L } ] - 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5d, 0 }; - -static const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00}; // "any" - - -U_CDECL_BEGIN -static void U_CALLCONV RBBISetTable_deleter(void *p) { - icu::RBBISetTableEl *px = (icu::RBBISetTableEl *)p; - delete px->key; - // Note: px->val is owned by the linked list "fSetsListHead" in scanner. - // Don't delete the value nodes here. - uprv_free(px); -} -U_CDECL_END - -U_NAMESPACE_BEGIN - -//------------------------------------------------------------------------------ -// -// Constructor. -// -//------------------------------------------------------------------------------ -RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) -{ - fRB = rb; - fScanIndex = 0; - fNextIndex = 0; - fQuoteMode = FALSE; - fLineNum = 1; - fCharNum = 0; - fLastChar = 0; - - fStateTable = NULL; - fStack[0] = 0; - fStackPtr = 0; - fNodeStack[0] = NULL; - fNodeStackPtr = 0; - - fReverseRule = FALSE; - fLookAheadRule = FALSE; - fNoChainInRule = FALSE; - - fSymbolTable = NULL; - fSetTable = NULL; - fRuleNum = 0; - fOptionStart = 0; - - // Do not check status until after all critical fields are sufficiently initialized - // that the destructor can run cleanly. - if (U_FAILURE(*rb->fStatus)) { - return; - } - - // - // Set up the constant Unicode Sets. - // Note: These could be made static, lazily initialized, and shared among - // all instances of RBBIRuleScanners. BUT this is quite a bit simpler, - // and the time to build these few sets should be small compared to a - // full break iterator build. - fRuleSets[kRuleSet_rule_char-128] - = UnicodeSet(UnicodeString(gRuleSet_rule_char_pattern), *rb->fStatus); - // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:] - fRuleSets[kRuleSet_white_space-128]. - add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029); - fRuleSets[kRuleSet_name_char-128] - = UnicodeSet(UnicodeString(gRuleSet_name_char_pattern), *rb->fStatus); - fRuleSets[kRuleSet_name_start_char-128] - = UnicodeSet(UnicodeString(gRuleSet_name_start_char_pattern), *rb->fStatus); - fRuleSets[kRuleSet_digit_char-128] - = UnicodeSet(UnicodeString(gRuleSet_digit_char_pattern), *rb->fStatus); - if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) { - // This case happens if ICU's data is missing. UnicodeSet tries to look up property - // names from the init string, can't find them, and claims an illegal argument. - // Change the error so that the actual problem will be clearer to users. - *rb->fStatus = U_BRK_INIT_ERROR; - } - if (U_FAILURE(*rb->fStatus)) { - return; - } - - fSymbolTable = new RBBISymbolTable(this, rb->fRules, *rb->fStatus); - if (fSymbolTable == NULL) { - *rb->fStatus = U_MEMORY_ALLOCATION_ERROR; - return; - } - fSetTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, rb->fStatus); - if (U_FAILURE(*rb->fStatus)) { - return; - } - uhash_setValueDeleter(fSetTable, RBBISetTable_deleter); -} - - - -//------------------------------------------------------------------------------ -// -// Destructor -// -//------------------------------------------------------------------------------ -RBBIRuleScanner::~RBBIRuleScanner() { - delete fSymbolTable; - if (fSetTable != NULL) { - uhash_close(fSetTable); - fSetTable = NULL; - - } - - - // Node Stack. - // Normally has one entry, which is the entire parse tree for the rules. - // If errors occured, there may be additional subtrees left on the stack. - while (fNodeStackPtr > 0) { - delete fNodeStack[fNodeStackPtr]; - fNodeStackPtr--; - } - -} - -//------------------------------------------------------------------------------ -// -// doParseAction Do some action during rule parsing. -// Called by the parse state machine. -// Actions build the parse tree and Unicode Sets, -// and maintain the parse stack for nested expressions. -// -// TODO: unify EParseAction and RBBI_RuleParseAction enum types. -// They represent exactly the same thing. They're separate -// only to work around enum forward declaration restrictions -// in some compilers, while at the same time avoiding multiple -// definitions problems. I'm sure that there's a better way. -// -//------------------------------------------------------------------------------ -UBool RBBIRuleScanner::doParseActions(int32_t action) -{ - RBBINode *n = NULL; - - UBool returnVal = TRUE; - - switch (action) { - - case doExprStart: - pushNewNode(RBBINode::opStart); - fRuleNum++; - break; - - - case doNoChain: - // Scanned a '^' while on the rule start state. - fNoChainInRule = TRUE; - break; - - - case doExprOrOperator: - { - fixOpStack(RBBINode::precOpCat); - RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; - RBBINode *orNode = pushNewNode(RBBINode::opOr); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - orNode->fLeftChild = operandNode; - operandNode->fParent = orNode; - } - break; - - case doExprCatOperator: - // concatenation operator. - // For the implicit concatenation of adjacent terms in an expression that are - // not separated by any other operator. Action is invoked between the - // actions for the two terms. - { - fixOpStack(RBBINode::precOpCat); - RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; - RBBINode *catNode = pushNewNode(RBBINode::opCat); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - catNode->fLeftChild = operandNode; - operandNode->fParent = catNode; - } - break; - - case doLParen: - // Open Paren. - // The openParen node is a dummy operation type with a low precedence, - // which has the affect of ensuring that any real binary op that - // follows within the parens binds more tightly to the operands than - // stuff outside of the parens. - pushNewNode(RBBINode::opLParen); - break; - - case doExprRParen: - fixOpStack(RBBINode::precLParen); - break; - - case doNOP: - break; - - case doStartAssign: - // We've just scanned "$variable = " - // The top of the node stack has the $variable ref node. - - // Save the start position of the RHS text in the StartExpression node - // that precedes the $variableReference node on the stack. - // This will eventually be used when saving the full $variable replacement - // text as a string. - n = fNodeStack[fNodeStackPtr-1]; - n->fFirstPos = fNextIndex; // move past the '=' - - // Push a new start-of-expression node; needed to keep parse of the - // RHS expression happy. - pushNewNode(RBBINode::opStart); - break; - - - - - case doEndAssign: - { - // We have reached the end of an assignement statement. - // Current scan char is the ';' that terminates the assignment. - - // Terminate expression, leaves expression parse tree rooted in TOS node. - fixOpStack(RBBINode::precStart); - - RBBINode *startExprNode = fNodeStack[fNodeStackPtr-2]; - RBBINode *varRefNode = fNodeStack[fNodeStackPtr-1]; - RBBINode *RHSExprNode = fNodeStack[fNodeStackPtr]; - - // Save original text of right side of assignment, excluding the terminating ';' - // in the root of the node for the right-hand-side expression. - RHSExprNode->fFirstPos = startExprNode->fFirstPos; - RHSExprNode->fLastPos = fScanIndex; - fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText); - - // Expression parse tree becomes l. child of the $variable reference node. - varRefNode->fLeftChild = RHSExprNode; - RHSExprNode->fParent = varRefNode; - - // Make a symbol table entry for the $variableRef node. - fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus); - if (U_FAILURE(*fRB->fStatus)) { - // This is a round-about way to get the parse position set - // so that duplicate symbols error messages include a line number. - UErrorCode t = *fRB->fStatus; - *fRB->fStatus = U_ZERO_ERROR; - error(t); - } - - // Clean up the stack. - delete startExprNode; - fNodeStackPtr-=3; - break; - } - - case doEndOfRule: - { - fixOpStack(RBBINode::precStart); // Terminate expression, leaves expression - if (U_FAILURE(*fRB->fStatus)) { // parse tree rooted in TOS node. - break; - } -#ifdef RBBI_DEBUG - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");} -#endif - U_ASSERT(fNodeStackPtr == 1); - RBBINode *thisRule = fNodeStack[fNodeStackPtr]; - - // If this rule includes a look-ahead '/', add a endMark node to the - // expression tree. - if (fLookAheadRule) { - RBBINode *endNode = pushNewNode(RBBINode::endMark); - RBBINode *catNode = pushNewNode(RBBINode::opCat); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - fNodeStackPtr -= 2; - catNode->fLeftChild = thisRule; - catNode->fRightChild = endNode; - fNodeStack[fNodeStackPtr] = catNode; - endNode->fVal = fRuleNum; - endNode->fLookAheadEnd = TRUE; - thisRule = catNode; - - // TODO: Disable chaining out of look-ahead (hard break) rules. - // The break on rule match is forced, so there is no point in building up - // the state table to chain into another rule for a longer match. - } - - // Mark this node as being the root of a rule. - thisRule->fRuleRoot = TRUE; - - // Flag if chaining into this rule is wanted. - // - if (fRB->fChainRules && // If rule chaining is enabled globally via !!chain - !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule - thisRule->fChainIn = TRUE; - } - - - // All rule expressions are ORed together. - // The ';' that terminates an expression really just functions as a '|' with - // a low operator prededence. - // - // Each of the four sets of rules are collected separately. - // (forward, reverse, safe_forward, safe_reverse) - // OR this rule into the appropriate group of them. - // - RBBINode **destRules = (fReverseRule? &fRB->fSafeRevTree : fRB->fDefaultTree); - - if (*destRules != NULL) { - // This is not the first rule encounted. - // OR previous stuff (from *destRules) - // with the current rule expression (on the Node Stack) - // with the resulting OR expression going to *destRules - // - thisRule = fNodeStack[fNodeStackPtr]; - RBBINode *prevRules = *destRules; - RBBINode *orNode = pushNewNode(RBBINode::opOr); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - orNode->fLeftChild = prevRules; - prevRules->fParent = orNode; - orNode->fRightChild = thisRule; - thisRule->fParent = orNode; - *destRules = orNode; - } - else - { - // This is the first rule encountered (for this direction). - // Just move its parse tree from the stack to *destRules. - *destRules = fNodeStack[fNodeStackPtr]; - } - fReverseRule = FALSE; // in preparation for the next rule. - fLookAheadRule = FALSE; - fNoChainInRule = FALSE; - fNodeStackPtr = 0; - } - break; - - - case doRuleError: - error(U_BRK_RULE_SYNTAX); - returnVal = FALSE; - break; - - - case doVariableNameExpectedErr: - error(U_BRK_RULE_SYNTAX); - break; - - - // - // Unary operands + ? * - // These all appear after the operand to which they apply. - // When we hit one, the operand (may be a whole sub expression) - // will be on the top of the stack. - // Unary Operator becomes TOS, with the old TOS as its one child. - case doUnaryOpPlus: - { - RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; - RBBINode *plusNode = pushNewNode(RBBINode::opPlus); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - plusNode->fLeftChild = operandNode; - operandNode->fParent = plusNode; - } - break; - - case doUnaryOpQuestion: - { - RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; - RBBINode *qNode = pushNewNode(RBBINode::opQuestion); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - qNode->fLeftChild = operandNode; - operandNode->fParent = qNode; - } - break; - - case doUnaryOpStar: - { - RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; - RBBINode *starNode = pushNewNode(RBBINode::opStar); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - starNode->fLeftChild = operandNode; - operandNode->fParent = starNode; - } - break; - - case doRuleChar: - // A "Rule Character" is any single character that is a literal part - // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]" - // These are pretty uncommon in break rules; the terms are more commonly - // sets. To keep things uniform, treat these characters like as - // sets that just happen to contain only one character. - { - n = pushNewNode(RBBINode::setRef); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - findSetFor(UnicodeString(fC.fChar), n); - n->fFirstPos = fScanIndex; - n->fLastPos = fNextIndex; - fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); - break; - } - - case doDotAny: - // scanned a ".", meaning match any single character. - { - n = pushNewNode(RBBINode::setRef); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - findSetFor(UnicodeString(TRUE, kAny, 3), n); - n->fFirstPos = fScanIndex; - n->fLastPos = fNextIndex; - fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); - break; - } - - case doSlash: - // Scanned a '/', which identifies a look-ahead break position in a rule. - n = pushNewNode(RBBINode::lookAhead); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - n->fVal = fRuleNum; - n->fFirstPos = fScanIndex; - n->fLastPos = fNextIndex; - fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); - fLookAheadRule = TRUE; - break; - - - case doStartTagValue: - // Scanned a '{', the opening delimiter for a tag value within a rule. - n = pushNewNode(RBBINode::tag); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - n->fVal = 0; - n->fFirstPos = fScanIndex; - n->fLastPos = fNextIndex; - break; - - case doTagDigit: - // Just scanned a decimal digit that's part of a tag value - { - n = fNodeStack[fNodeStackPtr]; - uint32_t v = u_charDigitValue(fC.fChar); - U_ASSERT(v < 10); - n->fVal = n->fVal*10 + v; - break; - } - - case doTagValue: - n = fNodeStack[fNodeStackPtr]; - n->fLastPos = fNextIndex; - fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); - break; - - case doTagExpectedError: - error(U_BRK_MALFORMED_RULE_TAG); - returnVal = FALSE; - break; - - case doOptionStart: - // Scanning a !!option. At the start of string. - fOptionStart = fScanIndex; - break; - - case doOptionEnd: - { - UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart); - if (opt == UNICODE_STRING("chain", 5)) { - fRB->fChainRules = TRUE; - } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) { - fRB->fLBCMNoChain = TRUE; - } else if (opt == UNICODE_STRING("forward", 7)) { - fRB->fDefaultTree = &fRB->fForwardTree; - } else if (opt == UNICODE_STRING("reverse", 7)) { - fRB->fDefaultTree = &fRB->fReverseTree; - } else if (opt == UNICODE_STRING("safe_forward", 12)) { - fRB->fDefaultTree = &fRB->fSafeFwdTree; - } else if (opt == UNICODE_STRING("safe_reverse", 12)) { - fRB->fDefaultTree = &fRB->fSafeRevTree; - } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) { - fRB->fLookAheadHardBreak = TRUE; - } else if (opt == UNICODE_STRING("quoted_literals_only", 20)) { - fRuleSets[kRuleSet_rule_char-128].clear(); - } else if (opt == UNICODE_STRING("unquoted_literals", 17)) { - fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus); - } else { - error(U_BRK_UNRECOGNIZED_OPTION); - } - } - break; - - case doReverseDir: - fReverseRule = TRUE; - break; - - case doStartVariableName: - n = pushNewNode(RBBINode::varRef); - if (U_FAILURE(*fRB->fStatus)) { - break; - } - n->fFirstPos = fScanIndex; - break; - - case doEndVariableName: - n = fNodeStack[fNodeStackPtr]; - if (n==NULL || n->fType != RBBINode::varRef) { - error(U_BRK_INTERNAL_ERROR); - break; - } - n->fLastPos = fScanIndex; - fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText); - // Look the newly scanned name up in the symbol table - // If there's an entry, set the l. child of the var ref to the replacement expression. - // (We also pass through here when scanning assignments, but no harm is done, other - // than a slight wasted effort that seems hard to avoid. Lookup will be null) - n->fLeftChild = fSymbolTable->lookupNode(n->fText); - break; - - case doCheckVarDef: - n = fNodeStack[fNodeStackPtr]; - if (n->fLeftChild == NULL) { - error(U_BRK_UNDEFINED_VARIABLE); - returnVal = FALSE; - } - break; - - case doExprFinished: - break; - - case doRuleErrorAssignExpr: - error(U_BRK_ASSIGN_ERROR); - returnVal = FALSE; - break; - - case doExit: - returnVal = FALSE; - break; - - case doScanUnicodeSet: - scanSet(); - break; - - default: - error(U_BRK_INTERNAL_ERROR); - returnVal = FALSE; - break; - } - return returnVal && U_SUCCESS(*fRB->fStatus); -} - - - - -//------------------------------------------------------------------------------ -// -// Error Report a rule parse error. -// Only report it if no previous error has been recorded. -// -//------------------------------------------------------------------------------ -void RBBIRuleScanner::error(UErrorCode e) { - if (U_SUCCESS(*fRB->fStatus)) { - *fRB->fStatus = e; - if (fRB->fParseError) { - fRB->fParseError->line = fLineNum; - fRB->fParseError->offset = fCharNum; - fRB->fParseError->preContext[0] = 0; - fRB->fParseError->postContext[0] = 0; - } - } -} - - - - -//------------------------------------------------------------------------------ -// -// fixOpStack The parse stack holds partially assembled chunks of the parse tree. -// An entry on the stack may be as small as a single setRef node, -// or as large as the parse tree -// for an entire expression (this will be the one item left on the stack -// when the parsing of an RBBI rule completes. -// -// This function is called when a binary operator is encountered. -// It looks back up the stack for operators that are not yet associated -// with a right operand, and if the precedence of the stacked operator >= -// the precedence of the current operator, binds the operand left, -// to the previously encountered operator. -// -//------------------------------------------------------------------------------ -void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) { - RBBINode *n; - // printNodeStack("entering fixOpStack()"); - for (;;) { - n = fNodeStack[fNodeStackPtr-1]; // an operator node - if (n->fPrecedence == 0) { - RBBIDebugPuts("RBBIRuleScanner::fixOpStack, bad operator node"); - error(U_BRK_INTERNAL_ERROR); - return; - } - - if (n->fPrecedence < p || n->fPrecedence <= RBBINode::precLParen) { - // The most recent operand goes with the current operator, - // not with the previously stacked one. - break; - } - // Stack operator is a binary op ( '|' or concatenation) - // TOS operand becomes right child of this operator. - // Resulting subexpression becomes the TOS operand. - n->fRightChild = fNodeStack[fNodeStackPtr]; - fNodeStack[fNodeStackPtr]->fParent = n; - fNodeStackPtr--; - // printNodeStack("looping in fixOpStack() "); - } - - if (p <= RBBINode::precLParen) { - // Scan is at a right paren or end of expression. - // The scanned item must match the stack, or else there was an error. - // Discard the left paren (or start expr) node from the stack, - // leaving the completed (sub)expression as TOS. - if (n->fPrecedence != p) { - // Right paren encountered matched start of expression node, or - // end of expression matched with a left paren node. - error(U_BRK_MISMATCHED_PAREN); - } - fNodeStack[fNodeStackPtr-1] = fNodeStack[fNodeStackPtr]; - fNodeStackPtr--; - // Delete the now-discarded LParen or Start node. - delete n; - } - // printNodeStack("leaving fixOpStack()"); -} - - - - -//------------------------------------------------------------------------------ -// -// findSetFor given a UnicodeString, -// - find the corresponding Unicode Set (uset node) -// (create one if necessary) -// - Set fLeftChild of the caller's node (should be a setRef node) -// to the uset node -// Maintain a hash table of uset nodes, so the same one is always used -// for the same string. -// If a "to adopt" set is provided and we haven't seen this key before, -// add the provided set to the hash table. -// If the string is one (32 bit) char in length, the set contains -// just one element which is the char in question. -// If the string is "any", return a set containing all chars. -// -//------------------------------------------------------------------------------ -void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) { - - RBBISetTableEl *el; - - // First check whether we've already cached a set for this string. - // If so, just use the cached set in the new node. - // delete any set provided by the caller, since we own it. - el = (RBBISetTableEl *)uhash_get(fSetTable, &s); - if (el != NULL) { - delete setToAdopt; - node->fLeftChild = el->val; - U_ASSERT(node->fLeftChild->fType == RBBINode::uset); - return; - } - - // Haven't seen this set before. - // If the caller didn't provide us with a prebuilt set, - // create a new UnicodeSet now. - if (setToAdopt == NULL) { - if (s.compare(kAny, -1) == 0) { - setToAdopt = new UnicodeSet(0x000000, 0x10ffff); - } else { - UChar32 c; - c = s.char32At(0); - setToAdopt = new UnicodeSet(c, c); - } - } - - // - // Make a new uset node to refer to this UnicodeSet - // This new uset node becomes the child of the caller's setReference node. - // - RBBINode *usetNode = new RBBINode(RBBINode::uset); - if (usetNode == NULL) { - error(U_MEMORY_ALLOCATION_ERROR); - return; - } - usetNode->fInputSet = setToAdopt; - usetNode->fParent = node; - node->fLeftChild = usetNode; - usetNode->fText = s; - - - // - // Add the new uset node to the list of all uset nodes. - // - fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus); - - - // - // Add the new set to the set hash table. - // - el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl)); - UnicodeString *tkey = new UnicodeString(s); - if (tkey == NULL || el == NULL || setToAdopt == NULL) { - // Delete to avoid memory leak - delete tkey; - tkey = NULL; - uprv_free(el); - el = NULL; - delete setToAdopt; - setToAdopt = NULL; - - error(U_MEMORY_ALLOCATION_ERROR); - return; - } - el->key = tkey; - el->val = usetNode; - uhash_put(fSetTable, el->key, el, fRB->fStatus); - - return; -} - - - -// -// Assorted Unicode character constants. -// Numeric because there is no portable way to enter them as literals. -// (Think EBCDIC). -// -static const UChar chCR = 0x0d; // New lines, for terminating comments. -static const UChar chLF = 0x0a; -static const UChar chNEL = 0x85; // NEL newline variant -static const UChar chLS = 0x2028; // Unicode Line Separator -static const UChar chApos = 0x27; // single quote, for quoted chars. -static const UChar chPound = 0x23; // '#', introduces a comment. -static const UChar chBackSlash = 0x5c; // '\' introduces a char escape -static const UChar chLParen = 0x28; -static const UChar chRParen = 0x29; - - -//------------------------------------------------------------------------------ -// -// stripRules Return a rules string without extra spaces. -// (Comments are removed separately, during rule parsing.) -// -//------------------------------------------------------------------------------ -UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { - UnicodeString strippedRules; - int32_t rulesLength = rules.length(); - bool skippingSpaces = false; - - for (int32_t idx=0; idx= fRB->fRules.length()) { - return (UChar32)-1; - } - ch = fRB->fRules.char32At(fNextIndex); - fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1); - - if (ch == chCR || - ch == chNEL || - ch == chLS || - (ch == chLF && fLastChar != chCR)) { - // Character is starting a new line. Bump up the line number, and - // reset the column to 0. - fLineNum++; - fCharNum=0; - if (fQuoteMode) { - error(U_BRK_NEW_LINE_IN_QUOTED_STRING); - fQuoteMode = FALSE; - } - } - else { - // Character is not starting a new line. Except in the case of a - // LF following a CR, increment the column position. - if (ch != chLF) { - fCharNum++; - } - } - fLastChar = ch; - return ch; -} - - -//------------------------------------------------------------------------------ -// -// nextChar for rules scanning. At this level, we handle stripping -// out comments and processing backslash character escapes. -// The rest of the rules grammar is handled at the next level up. -// -//------------------------------------------------------------------------------ -void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { - - // Unicode Character constants needed for the processing done by nextChar(), - // in hex because literals wont work on EBCDIC machines. - - fScanIndex = fNextIndex; - c.fChar = nextCharLL(); - c.fEscaped = FALSE; - - // - // check for '' sequence. - // These are recognized in all contexts, whether in quoted text or not. - // - if (c.fChar == chApos) { - if (fRB->fRules.char32At(fNextIndex) == chApos) { - c.fChar = nextCharLL(); // get nextChar officially so character counts - c.fEscaped = TRUE; // stay correct. - } - else - { - // Single quote, by itself. - // Toggle quoting mode. - // Return either '(' or ')', because quotes cause a grouping of the quoted text. - fQuoteMode = !fQuoteMode; - if (fQuoteMode == TRUE) { - c.fChar = chLParen; - } else { - c.fChar = chRParen; - } - c.fEscaped = FALSE; // The paren that we return is not escaped. - return; - } - } - - if (fQuoteMode) { - c.fEscaped = TRUE; - } - else - { - // We are not in a 'quoted region' of the source. - // - if (c.fChar == chPound) { - // Start of a comment. Consume the rest of it. - // The new-line char that terminates the comment is always returned. - // It will be treated as white-space, and serves to break up anything - // that might otherwise incorrectly clump together with a comment in - // the middle (a variable name, for example.) - int32_t commentStart = fScanIndex; - for (;;) { - c.fChar = nextCharLL(); - if (c.fChar == (UChar32)-1 || // EOF - c.fChar == chCR || - c.fChar == chLF || - c.fChar == chNEL || - c.fChar == chLS) {break;} - } - for (int32_t i=commentStart; ifStrippedRules.setCharAt(i, u' '); - } - } - if (c.fChar == (UChar32)-1) { - return; - } - - // - // check for backslash escaped characters. - // Use UnicodeString::unescapeAt() to handle them. - // - if (c.fChar == chBackSlash) { - c.fEscaped = TRUE; - int32_t startX = fNextIndex; - c.fChar = fRB->fRules.unescapeAt(fNextIndex); - if (fNextIndex == startX) { - error(U_BRK_HEX_DIGITS_EXPECTED); - } - fCharNum += fNextIndex-startX; - } - } - // putc(c.fChar, stdout); -} - -//------------------------------------------------------------------------------ -// -// Parse RBBI rules. The state machine for rules parsing is here. -// The state tables are hand-written in the file rbbirpt.txt, -// and converted to the form used here by a perl -// script rbbicst.pl -// -//------------------------------------------------------------------------------ -void RBBIRuleScanner::parse() { - uint16_t state; - const RBBIRuleTableEl *tableEl; - - if (U_FAILURE(*fRB->fStatus)) { - return; - } - - state = 1; - nextChar(fC); - // - // Main loop for the rule parsing state machine. - // Runs once per state transition. - // Each time through optionally performs, depending on the state table, - // - an advance to the the next input char - // - an action to be performed. - // - pushing or popping a state to/from the local state return stack. - // - for (;;) { - // Bail out if anything has gone wrong. - // RBBI rule file parsing stops on the first error encountered. - if (U_FAILURE(*fRB->fStatus)) { - break; - } - - // Quit if state == 0. This is the normal way to exit the state machine. - // - if (state == 0) { - break; - } - - // Find the state table element that matches the input char from the rule, or the - // class of the input character. Start with the first table row for this - // state, then linearly scan forward until we find a row that matches the - // character. The last row for each state always matches all characters, so - // the search will stop there, if not before. - // - tableEl = &gRuleParseStateTable[state]; - #ifdef RBBI_DEBUG - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { - RBBIDebugPrintf("char, line, col = (\'%c\', %d, %d) state=%s ", - fC.fChar, fLineNum, fCharNum, RBBIRuleStateNames[state]); - } - #endif - - for (;;) { - #ifdef RBBI_DEBUG - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);} - #endif - if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) { - // Table row specified an individual character, not a set, and - // the input character is not escaped, and - // the input character matched it. - break; - } - if (tableEl->fCharClass == 255) { - // Table row specified default, match anything character class. - break; - } - if (tableEl->fCharClass == 254 && fC.fEscaped) { - // Table row specified "escaped" and the char was escaped. - break; - } - if (tableEl->fCharClass == 253 && fC.fEscaped && - (fC.fChar == 0x50 || fC.fChar == 0x70 )) { - // Table row specified "escaped P" and the char is either 'p' or 'P'. - break; - } - if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { - // Table row specified eof and we hit eof on the input. - break; - } - - if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class && - fC.fEscaped == FALSE && // char is not escaped && - fC.fChar != (UChar32)-1) { // char is not EOF - U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets)); - if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { - // Table row specified a character class, or set of characters, - // and the current char matches it. - break; - } - } - - // No match on this row, advance to the next row for this state, - tableEl++; - } - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");} - - // - // We've found the row of the state table that matches the current input - // character from the rules string. - // Perform any action specified by this row in the state table. - if (doParseActions((int32_t)tableEl->fAction) == FALSE) { - // Break out of the state machine loop if the - // the action signalled some kind of error, or - // the action was to exit, occurs on normal end-of-rules-input. - break; - } - - if (tableEl->fPushState != 0) { - fStackPtr++; - if (fStackPtr >= kStackSize) { - error(U_BRK_INTERNAL_ERROR); - RBBIDebugPuts("RBBIRuleScanner::parse() - state stack overflow."); - fStackPtr--; - } - fStack[fStackPtr] = tableEl->fPushState; - } - - if (tableEl->fNextChar) { - nextChar(fC); - } - - // Get the next state from the table entry, or from the - // state stack if the next state was specified as "pop". - if (tableEl->fNextState != 255) { - state = tableEl->fNextState; - } else { - state = fStack[fStackPtr]; - fStackPtr--; - if (fStackPtr < 0) { - error(U_BRK_INTERNAL_ERROR); - RBBIDebugPuts("RBBIRuleScanner::parse() - state stack underflow."); - fStackPtr++; - } - } - - } - - if (U_FAILURE(*fRB->fStatus)) { - return; - } - - // If there are no forward rules set an error. - // - if (fRB->fForwardTree == NULL) { - error(U_BRK_RULE_SYNTAX); - return; - } - - // - // Parsing of the input RBBI rules is complete. - // We now have a parse tree for the rule expressions - // and a list of all UnicodeSets that are referenced. - // -#ifdef RBBI_DEBUG - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();} - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) { - RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n"); - RBBINode::printTree(fRB->fForwardTree, TRUE); - RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n"); - RBBINode::printTree(fRB->fReverseTree, TRUE); - RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n"); - RBBINode::printTree(fRB->fSafeFwdTree, TRUE); - RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n"); - RBBINode::printTree(fRB->fSafeRevTree, TRUE); - } -#endif -} - - -//------------------------------------------------------------------------------ -// -// printNodeStack for debugging... -// -//------------------------------------------------------------------------------ -#ifdef RBBI_DEBUG -void RBBIRuleScanner::printNodeStack(const char *title) { - int i; - RBBIDebugPrintf("%s. Dumping node stack...\n", title); - for (i=fNodeStackPtr; i>0; i--) {RBBINode::printTree(fNodeStack[i], TRUE);} -} -#endif - - - - -//------------------------------------------------------------------------------ -// -// pushNewNode create a new RBBINode of the specified type and push it -// onto the stack of nodes. -// -//------------------------------------------------------------------------------ -RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { - if (U_FAILURE(*fRB->fStatus)) { - return NULL; - } - if (fNodeStackPtr >= kStackSize - 1) { - error(U_BRK_RULE_SYNTAX); - RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow."); - return NULL; - } - fNodeStackPtr++; - fNodeStack[fNodeStackPtr] = new RBBINode(t); - if (fNodeStack[fNodeStackPtr] == NULL) { - *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; - } - return fNodeStack[fNodeStackPtr]; -} - - - -//------------------------------------------------------------------------------ -// -// scanSet Construct a UnicodeSet from the text at the current scan -// position. Advance the scan position to the first character -// after the set. -// -// A new RBBI setref node referring to the set is pushed onto the node -// stack. -// -// The scan position is normally under the control of the state machine -// that controls rule parsing. UnicodeSets, however, are parsed by -// the UnicodeSet constructor, not by the RBBI rule parser. -// -//------------------------------------------------------------------------------ -void RBBIRuleScanner::scanSet() { - UnicodeSet *uset; - ParsePosition pos; - int startPos; - int i; - - if (U_FAILURE(*fRB->fStatus)) { - return; - } - - pos.setIndex(fScanIndex); - startPos = fScanIndex; - UErrorCode localStatus = U_ZERO_ERROR; - uset = new UnicodeSet(); - if (uset == NULL) { - localStatus = U_MEMORY_ALLOCATION_ERROR; - } else { - uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); - } - if (U_FAILURE(localStatus)) { - // TODO: Get more accurate position of the error from UnicodeSet's return info. - // UnicodeSet appears to not be reporting correctly at this time. - #ifdef RBBI_DEBUG - RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex()); - #endif - error(localStatus); - delete uset; - return; - } - - // Verify that the set contains at least one code point. - // - U_ASSERT(uset!=NULL); - if (uset->isEmpty()) { - // This set is empty. - // Make it an error, because it almost certainly is not what the user wanted. - // Also, avoids having to think about corner cases in the tree manipulation code - // that occurs later on. - error(U_BRK_RULE_EMPTY_SET); - delete uset; - return; - } - - - // Advance the RBBI parse postion over the UnicodeSet pattern. - // Don't just set fScanIndex because the line/char positions maintained - // for error reporting would be thrown off. - i = pos.getIndex(); - for (;;) { - if (fNextIndex >= i) { - break; - } - nextCharLL(); - } - - if (U_SUCCESS(*fRB->fStatus)) { - RBBINode *n; - - n = pushNewNode(RBBINode::setRef); - if (U_FAILURE(*fRB->fStatus)) { - return; - } - n->fFirstPos = startPos; - n->fLastPos = fNextIndex; - fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); - // findSetFor() serves several purposes here: - // - Adopts storage for the UnicodeSet, will be responsible for deleting. - // - Mantains collection of all sets in use, needed later for establishing - // character categories for run time engine. - // - Eliminates mulitiple instances of the same set. - // - Creates a new uset node if necessary (if this isn't a duplicate.) - findSetFor(n->fText, n, uset); - } - -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbiscan.h b/deps/node/deps/icu-small/source/common/rbbiscan.h deleted file mode 100644 index 3d484db0..00000000 --- a/deps/node/deps/icu-small/source/common/rbbiscan.h +++ /dev/null @@ -1,165 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// rbbiscan.h -// -// Copyright (C) 2002-2016, International Business Machines Corporation and others. -// All Rights Reserved. -// -// This file contains declarations for class RBBIRuleScanner -// - - -#ifndef RBBISCAN_H -#define RBBISCAN_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/rbbi.h" -#include "unicode/uniset.h" -#include "unicode/parseerr.h" -#include "uhash.h" -#include "uvector.h" -#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that - // looks up references to $variables within a set. -#include "rbbinode.h" -#include "rbbirpt.h" - -U_NAMESPACE_BEGIN - -class RBBIRuleBuilder; -class RBBISymbolTable; - - -//-------------------------------------------------------------------------------- -// -// class RBBIRuleScanner does the lowest level, character-at-a-time -// scanning of break iterator rules. -// -// The output of the scanner is parse trees for -// the rule expressions and a list of all Unicode Sets -// encountered. -// -//-------------------------------------------------------------------------------- - -class RBBIRuleScanner : public UMemory { -public: - - enum { - kStackSize = 100 // The size of the state stack for - }; // rules parsing. Corresponds roughly - // to the depth of parentheses nesting - // that is allowed in the rules. - - struct RBBIRuleChar { - UChar32 fChar; - UBool fEscaped; - RBBIRuleChar() : fChar(0), fEscaped(FALSE) {}; - }; - - RBBIRuleScanner(RBBIRuleBuilder *rb); - - - virtual ~RBBIRuleScanner(); - - void nextChar(RBBIRuleChar &c); // Get the next char from the input stream. - // Return false if at end. - - UBool push(const RBBIRuleChar &c); // Push (unget) one character. - // Only a single character may be pushed. - - void parse(); // Parse the rules, generating two parse - // trees, one each for the forward and - // reverse rules, - // and a list of UnicodeSets encountered. - - /** - * Return a rules string without unnecessary - * characters. - */ - static UnicodeString stripRules(const UnicodeString &rules); -private: - - UBool doParseActions(int32_t a); - void error(UErrorCode e); // error reporting convenience function. - void fixOpStack(RBBINode::OpPrecedence p); - // a character. - void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL); - - UChar32 nextCharLL(); -#ifdef RBBI_DEBUG - void printNodeStack(const char *title); -#endif - RBBINode *pushNewNode(RBBINode::NodeType t); - void scanSet(); - - - RBBIRuleBuilder *fRB; // The rule builder that we are part of. - - int32_t fScanIndex; // Index of current character being processed - // in the rule input string. - int32_t fNextIndex; // Index of the next character, which - // is the first character not yet scanned. - UBool fQuoteMode; // Scan is in a 'quoted region' - int32_t fLineNum; // Line number in input file. - int32_t fCharNum; // Char position within the line. - UChar32 fLastChar; // Previous char, needed to count CR-LF - // as a single line, not two. - - RBBIRuleChar fC; // Current char for parse state machine - // processing. - UnicodeString fVarName; // $variableName, valid when we've just - // scanned one. - - RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule - // parsing. index by p[state][char-class] - - uint16_t fStack[kStackSize]; // State stack, holds state pushes - int32_t fStackPtr; // and pops as specified in the state - // transition rules. - - RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created - // during the parse of a rule - int32_t fNodeStackPtr; - - - UBool fReverseRule; // True if the rule currently being scanned - // is a reverse direction rule (if it - // starts with a '!') - - UBool fLookAheadRule; // True if the rule includes a '/' - // somewhere within it. - - UBool fNoChainInRule; // True if the current rule starts with a '^'. - - RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of - // $variable symbols. - - UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to - // the sets created while parsing rules. - // The key is the string used for creating - // the set. - - UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during - // the scanning of RBBI rules. The - // indicies for these are assigned by the - // perl script that builds the state tables. - // See rbbirpt.h. - - int32_t fRuleNum; // Counts each rule as it is scanned. - - int32_t fOptionStart; // Input index of start of a !!option - // keyword, while being scanned. - - UnicodeSet *gRuleSet_rule_char; - UnicodeSet *gRuleSet_white_space; - UnicodeSet *gRuleSet_name_char; - UnicodeSet *gRuleSet_name_start_char; - - RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class - RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/rbbisetb.cpp b/deps/node/deps/icu-small/source/common/rbbisetb.cpp deleted file mode 100644 index e6b98cf3..00000000 --- a/deps/node/deps/icu-small/source/common/rbbisetb.cpp +++ /dev/null @@ -1,684 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// rbbisetb.cpp -// -/* -*************************************************************************** -* Copyright (C) 2002-2008 International Business Machines Corporation * -* and others. All rights reserved. * -*************************************************************************** -*/ -// -// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules -// (part of the rule building process.) -// -// Starting with the rules parse tree from the scanner, -// -// - Enumerate the set of UnicodeSets that are referenced -// by the RBBI rules. -// - compute a set of non-overlapping character ranges -// with all characters within a range belonging to the same -// set of input uniocde sets. -// - Derive a set of non-overlapping UnicodeSet (like things) -// that will correspond to columns in the state table for -// the RBBI execution engine. All characters within one -// of these sets belong to the same set of the original -// UnicodeSets from the user's rules. -// - construct the trie table that maps input characters -// to the index of the matching non-overlapping set of set from -// the previous step. -// - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/uniset.h" -#include "utrie2.h" -#include "uvector.h" -#include "uassert.h" -#include "cmemory.h" -#include "cstring.h" - -#include "rbbisetb.h" -#include "rbbinode.h" - -U_NAMESPACE_BEGIN - -//------------------------------------------------------------------------ -// -// Constructor -// -//------------------------------------------------------------------------ -RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb) -{ - fRB = rb; - fStatus = rb->fStatus; - fRangeList = 0; - fTrie = 0; - fTrieSize = 0; - fGroupCount = 0; - fSawBOF = FALSE; -} - - -//------------------------------------------------------------------------ -// -// Destructor -// -//------------------------------------------------------------------------ -RBBISetBuilder::~RBBISetBuilder() -{ - RangeDescriptor *nextRangeDesc; - - // Walk through & delete the linked list of RangeDescriptors - for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) { - RangeDescriptor *r = nextRangeDesc; - nextRangeDesc = r->fNext; - delete r; - } - - utrie2_close(fTrie); -} - - - - -//------------------------------------------------------------------------ -// -// build Build the list of non-overlapping character ranges -// from the Unicode Sets. -// -//------------------------------------------------------------------------ -void RBBISetBuilder::buildRanges() { - RBBINode *usetNode; - RangeDescriptor *rlRange; - - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();} - - // - // Initialize the process by creating a single range encompassing all characters - // that is in no sets. - // - fRangeList = new RangeDescriptor(*fStatus); // will check for status here - if (fRangeList == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - return; - } - fRangeList->fStartChar = 0; - fRangeList->fEndChar = 0x10ffff; - - if (U_FAILURE(*fStatus)) { - return; - } - - // - // Find the set of non-overlapping ranges of characters - // - int ni; - for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules - usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); - if (usetNode==NULL) { - break; - } - - UnicodeSet *inputSet = usetNode->fInputSet; - int32_t inputSetRangeCount = inputSet->getRangeCount(); - int inputSetRangeIndex = 0; - rlRange = fRangeList; - - for (;;) { - if (inputSetRangeIndex >= inputSetRangeCount) { - break; - } - UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex); - UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex); - - // skip over ranges from the range list that are completely - // below the current range from the input unicode set. - while (rlRange->fEndChar < inputSetRangeBegin) { - rlRange = rlRange->fNext; - } - - // If the start of the range from the range list is before with - // the start of the range from the unicode set, split the range list range - // in two, with one part being before (wholly outside of) the unicode set - // and the other containing the rest. - // Then continue the loop; the post-split current range will then be skipped - // over - if (rlRange->fStartChar < inputSetRangeBegin) { - rlRange->split(inputSetRangeBegin, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - continue; - } - - // Same thing at the end of the ranges... - // If the end of the range from the range list doesn't coincide with - // the end of the range from the unicode set, split the range list - // range in two. The first part of the split range will be - // wholly inside the Unicode set. - if (rlRange->fEndChar > inputSetRangeEnd) { - rlRange->split(inputSetRangeEnd+1, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - } - - // The current rlRange is now entirely within the UnicodeSet range. - // Add this unicode set to the list of sets for this rlRange - if (rlRange->fIncludesSets->indexOf(usetNode) == -1) { - rlRange->fIncludesSets->addElement(usetNode, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - } - - // Advance over ranges that we are finished with. - if (inputSetRangeEnd == rlRange->fEndChar) { - inputSetRangeIndex++; - } - rlRange = rlRange->fNext; - } - } - - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();} - - // - // Group the above ranges, with each group consisting of one or more - // ranges that are in exactly the same set of original UnicodeSets. - // The groups are numbered, and these group numbers are the set of - // input symbols recognized by the run-time state machine. - // - // Numbering: # 0 (state table column 0) is unused. - // # 1 is reserved - table column 1 is for end-of-input - // # 2 is reserved - table column 2 is for beginning-in-input - // # 3 is the first range list. - // - RangeDescriptor *rlSearchRange; - for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { - for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) { - if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) { - rlRange->fNum = rlSearchRange->fNum; - break; - } - } - if (rlRange->fNum == 0) { - fGroupCount ++; - rlRange->fNum = fGroupCount+2; - rlRange->setDictionaryFlag(); - addValToSets(rlRange->fIncludesSets, fGroupCount+2); - } - } - - // Handle input sets that contain the special string {eof}. - // Column 1 of the state table is reserved for EOF on input. - // Column 2 is reserved for before-the-start-input. - // (This column can be optimized away later if there are no rule - // references to {bof}.) - // Add this column value (1 or 2) to the equivalent expression - // subtree for each UnicodeSet that contains the string {eof} - // Because {bof} and {eof} are not a characters in the normal sense, - // they doesn't affect the computation of ranges or TRIE. - static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0}; - static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0}; - - UnicodeString eofString(eofUString); - UnicodeString bofString(bofUString); - for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules - usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); - if (usetNode==NULL) { - break; - } - UnicodeSet *inputSet = usetNode->fInputSet; - if (inputSet->contains(eofString)) { - addValToSet(usetNode, 1); - } - if (inputSet->contains(bofString)) { - addValToSet(usetNode, 2); - fSawBOF = TRUE; - } - } - - - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();} - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();} -} - - -// -// Build the Trie table for mapping UChar32 values to the corresponding -// range group number. -// -void RBBISetBuilder::buildTrie() { - RangeDescriptor *rlRange; - - fTrie = utrie2_open(0, // Initial value for all code points. - 0, // Error value for out-of-range input. - fStatus); - - for (rlRange = fRangeList; rlRange!=0 && U_SUCCESS(*fStatus); rlRange=rlRange->fNext) { - utrie2_setRange32(fTrie, - rlRange->fStartChar, // Range start - rlRange->fEndChar, // Range end (inclusive) - rlRange->fNum, // value for range - TRUE, // Overwrite previously written values - fStatus); - } -} - - -void RBBISetBuilder::mergeCategories(IntPair categories) { - U_ASSERT(categories.first >= 1); - U_ASSERT(categories.second > categories.first); - for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) { - int32_t rangeNum = rd->fNum & ~DICT_BIT; - int32_t rangeDict = rd->fNum & DICT_BIT; - if (rangeNum == categories.second) { - rd->fNum = categories.first | rangeDict; - } else if (rangeNum > categories.second) { - rd->fNum--; - } - } - --fGroupCount; -} - - -//----------------------------------------------------------------------------------- -// -// getTrieSize() Return the size that will be required to serialize the Trie. -// -//----------------------------------------------------------------------------------- -int32_t RBBISetBuilder::getTrieSize() { - if (U_FAILURE(*fStatus)) { - return 0; - } - utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus); - fTrieSize = utrie2_serialize(fTrie, - NULL, // Buffer - 0, // Capacity - fStatus); - if (*fStatus == U_BUFFER_OVERFLOW_ERROR) { - *fStatus = U_ZERO_ERROR; - } - // RBBIDebugPrintf("Trie table size is %d\n", trieSize); - return fTrieSize; -} - - -//----------------------------------------------------------------------------------- -// -// serializeTrie() Put the serialized trie at the specified address. -// Trust the caller to have given us enough memory. -// getTrieSize() MUST be called first. -// -//----------------------------------------------------------------------------------- -void RBBISetBuilder::serializeTrie(uint8_t *where) { - utrie2_serialize(fTrie, - where, // Buffer - fTrieSize, // Capacity - fStatus); -} - -//------------------------------------------------------------------------ -// -// addValToSets Add a runtime-mapped input value to each uset from a -// list of uset nodes. (val corresponds to a state table column.) -// For each of the original Unicode sets - which correspond -// directly to uset nodes - a logically equivalent expression -// is constructed in terms of the remapped runtime input -// symbol set. This function adds one runtime input symbol to -// a list of sets. -// -// The "logically equivalent expression" is the tree for an -// or-ing together of all of the symbols that go into the set. -// -//------------------------------------------------------------------------ -void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) { - int32_t ix; - - for (ix=0; ixsize(); ix++) { - RBBINode *usetNode = (RBBINode *)sets->elementAt(ix); - addValToSet(usetNode, val); - } -} - -void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) { - RBBINode *leafNode = new RBBINode(RBBINode::leafChar); - if (leafNode == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - return; - } - leafNode->fVal = (unsigned short)val; - if (usetNode->fLeftChild == NULL) { - usetNode->fLeftChild = leafNode; - leafNode->fParent = usetNode; - } else { - // There are already input symbols present for this set. - // Set up an OR node, with the previous stuff as the left child - // and the new value as the right child. - RBBINode *orNode = new RBBINode(RBBINode::opOr); - if (orNode == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - return; - } - orNode->fLeftChild = usetNode->fLeftChild; - orNode->fRightChild = leafNode; - orNode->fLeftChild->fParent = orNode; - orNode->fRightChild->fParent = orNode; - usetNode->fLeftChild = orNode; - orNode->fParent = usetNode; - } -} - - -//------------------------------------------------------------------------ -// -// getNumCharCategories -// -//------------------------------------------------------------------------ -int32_t RBBISetBuilder::getNumCharCategories() const { - return fGroupCount + 3; -} - - -//------------------------------------------------------------------------ -// -// sawBOF -// -//------------------------------------------------------------------------ -UBool RBBISetBuilder::sawBOF() const { - return fSawBOF; -} - - -//------------------------------------------------------------------------ -// -// getFirstChar Given a runtime RBBI character category, find -// the first UChar32 that is in the set of chars -// in the category. -//------------------------------------------------------------------------ -UChar32 RBBISetBuilder::getFirstChar(int32_t category) const { - RangeDescriptor *rlRange; - UChar32 retVal = (UChar32)-1; - for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { - if (rlRange->fNum == category) { - retVal = rlRange->fStartChar; - break; - } - } - return retVal; -} - - - -//------------------------------------------------------------------------ -// -// printRanges A debugging function. -// dump out all of the range definitions. -// -//------------------------------------------------------------------------ -#ifdef RBBI_DEBUG -void RBBISetBuilder::printRanges() { - RangeDescriptor *rlRange; - int i; - - RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n"); - for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { - RBBIDebugPrintf("%2i %4x-%4x ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar); - - for (i=0; ifIncludesSets->size(); i++) { - RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); - UnicodeString setName = UNICODE_STRING("anon", 4); - RBBINode *setRef = usetNode->fParent; - if (setRef != NULL) { - RBBINode *varRef = setRef->fParent; - if (varRef != NULL && varRef->fType == RBBINode::varRef) { - setName = varRef->fText; - } - } - RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" "); - } - RBBIDebugPrintf("\n"); - } -} -#endif - - -//------------------------------------------------------------------------ -// -// printRangeGroups A debugging function. -// dump out all of the range groups. -// -//------------------------------------------------------------------------ -#ifdef RBBI_DEBUG -void RBBISetBuilder::printRangeGroups() { - RangeDescriptor *rlRange; - RangeDescriptor *tRange; - int i; - int lastPrintedGroupNum = 0; - - RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n"); - for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { - int groupNum = rlRange->fNum & 0xbfff; - if (groupNum > lastPrintedGroupNum) { - lastPrintedGroupNum = groupNum; - RBBIDebugPrintf("%2i ", groupNum); - - if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" ");} - - for (i=0; ifIncludesSets->size(); i++) { - RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); - UnicodeString setName = UNICODE_STRING("anon", 4); - RBBINode *setRef = usetNode->fParent; - if (setRef != NULL) { - RBBINode *varRef = setRef->fParent; - if (varRef != NULL && varRef->fType == RBBINode::varRef) { - setName = varRef->fText; - } - } - RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" "); - } - - i = 0; - for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) { - if (tRange->fNum == rlRange->fNum) { - if (i++ % 5 == 0) { - RBBIDebugPrintf("\n "); - } - RBBIDebugPrintf(" %05x-%05x", tRange->fStartChar, tRange->fEndChar); - } - } - RBBIDebugPrintf("\n"); - } - } - RBBIDebugPrintf("\n"); -} -#endif - - -//------------------------------------------------------------------------ -// -// printSets A debugging function. -// dump out all of the set definitions. -// -//------------------------------------------------------------------------ -#ifdef RBBI_DEBUG -void RBBISetBuilder::printSets() { - int i; - - RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n"); - for (i=0; ; i++) { - RBBINode *usetNode; - RBBINode *setRef; - RBBINode *varRef; - UnicodeString setName; - - usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i); - if (usetNode == NULL) { - break; - } - - RBBIDebugPrintf("%3d ", i); - setName = UNICODE_STRING("anonymous", 9); - setRef = usetNode->fParent; - if (setRef != NULL) { - varRef = setRef->fParent; - if (varRef != NULL && varRef->fType == RBBINode::varRef) { - setName = varRef->fText; - } - } - RBBI_DEBUG_printUnicodeString(setName); - RBBIDebugPrintf(" "); - RBBI_DEBUG_printUnicodeString(usetNode->fText); - RBBIDebugPrintf("\n"); - if (usetNode->fLeftChild != NULL) { - RBBINode::printTree(usetNode->fLeftChild, TRUE); - } - } - RBBIDebugPrintf("\n"); -} -#endif - - - -//------------------------------------------------------------------------------------- -// -// RangeDescriptor copy constructor -// -//------------------------------------------------------------------------------------- - -RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) { - int i; - - this->fStartChar = other.fStartChar; - this->fEndChar = other.fEndChar; - this->fNum = other.fNum; - this->fNext = NULL; - UErrorCode oldstatus = status; - this->fIncludesSets = new UVector(status); - if (U_FAILURE(oldstatus)) { - status = oldstatus; - } - if (U_FAILURE(status)) { - return; - } - /* test for NULL */ - if (this->fIncludesSets == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - for (i=0; isize(); i++) { - this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status); - } -} - - -//------------------------------------------------------------------------------------- -// -// RangeDesriptor default constructor -// -//------------------------------------------------------------------------------------- -RangeDescriptor::RangeDescriptor(UErrorCode &status) { - this->fStartChar = 0; - this->fEndChar = 0; - this->fNum = 0; - this->fNext = NULL; - UErrorCode oldstatus = status; - this->fIncludesSets = new UVector(status); - if (U_FAILURE(oldstatus)) { - status = oldstatus; - } - if (U_FAILURE(status)) { - return; - } - /* test for NULL */ - if(this->fIncludesSets == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - -} - - -//------------------------------------------------------------------------------------- -// -// RangeDesriptor Destructor -// -//------------------------------------------------------------------------------------- -RangeDescriptor::~RangeDescriptor() { - delete fIncludesSets; - fIncludesSets = NULL; -} - -//------------------------------------------------------------------------------------- -// -// RangeDesriptor::split() -// -//------------------------------------------------------------------------------------- -void RangeDescriptor::split(UChar32 where, UErrorCode &status) { - U_ASSERT(where>fStartChar && where<=fEndChar); - RangeDescriptor *nr = new RangeDescriptor(*this, status); - if(nr == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - if (U_FAILURE(status)) { - delete nr; - return; - } - // RangeDescriptor copy constructor copies all fields. - // Only need to update those that are different after the split. - nr->fStartChar = where; - this->fEndChar = where-1; - nr->fNext = this->fNext; - this->fNext = nr; -} - - -//------------------------------------------------------------------------------------- -// -// RangeDescriptor::setDictionaryFlag -// -// Character Category Numbers that include characters from -// the original Unicode Set named "dictionary" have bit 14 -// set to 1. The RBBI runtime engine uses this to trigger -// use of the word dictionary. -// -// This function looks through the Unicode Sets that it -// (the range) includes, and sets the bit in fNum when -// "dictionary" is among them. -// -// TODO: a faster way would be to find the set node for -// "dictionary" just once, rather than looking it -// up by name every time. -// -//------------------------------------------------------------------------------------- -void RangeDescriptor::setDictionaryFlag() { - int i; - - static const char16_t *dictionary = u"dictionary"; - for (i=0; isize(); i++) { - RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); - RBBINode *setRef = usetNode->fParent; - if (setRef != nullptr) { - RBBINode *varRef = setRef->fParent; - if (varRef && varRef->fType == RBBINode::varRef) { - const UnicodeString *setName = &varRef->fText; - if (setName->compare(dictionary, -1) == 0) { - fNum |= RBBISetBuilder::DICT_BIT; - break; - } - } - } - } -} - - - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbisetb.h b/deps/node/deps/icu-small/source/common/rbbisetb.h deleted file mode 100644 index ed6a76b1..00000000 --- a/deps/node/deps/icu-small/source/common/rbbisetb.h +++ /dev/null @@ -1,147 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// rbbisetb.h -/* -********************************************************************** -* Copyright (c) 2001-2005, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef RBBISETB_H -#define RBBISETB_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/uobject.h" -#include "rbbirb.h" -#include "utrie2.h" -#include "uvector.h" - -U_NAMESPACE_BEGIN - -// -// RBBISetBuilder Derives the character categories used by the runtime RBBI engine -// from the Unicode Sets appearing in the source RBBI rules, and -// creates the TRIE table used to map from Unicode to the -// character categories. -// - - -// -// RangeDescriptor -// -// Each of the non-overlapping character ranges gets one of these descriptors. -// All of them are strung together in a linked list, which is kept in order -// (by character) -// -class RangeDescriptor : public UMemory { -public: - UChar32 fStartChar; // Start of range, unicode 32 bit value. - UChar32 fEndChar; // End of range, unicode 32 bit value. - int32_t fNum; // runtime-mapped input value for this range. - UVector *fIncludesSets; // vector of the the original - // Unicode sets that include this range. - // (Contains ptrs to uset nodes) - RangeDescriptor *fNext; // Next RangeDescriptor in the linked list. - - RangeDescriptor(UErrorCode &status); - RangeDescriptor(const RangeDescriptor &other, UErrorCode &status); - ~RangeDescriptor(); - void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with - // where appearing in the second (higher) part. - void setDictionaryFlag(); // Check whether this range appears as part of - // the Unicode set named "dictionary" - -private: - RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class - RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class -}; - - -// -// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules. -// -// Starting with the rules parse tree from the scanner, -// -// - Enumerate the set of UnicodeSets that are referenced -// by the RBBI rules. -// - compute a derived set of non-overlapping UnicodeSets -// that will correspond to columns in the state table for -// the RBBI execution engine. -// - construct the trie table that maps input characters -// to set numbers in the non-overlapping set of sets. -// - - -class RBBISetBuilder : public UMemory { -public: - RBBISetBuilder(RBBIRuleBuilder *rb); - ~RBBISetBuilder(); - - void buildRanges(); - void buildTrie(); - void addValToSets(UVector *sets, uint32_t val); - void addValToSet (RBBINode *usetNode, uint32_t val); - int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the - // runtime state machine, which are the same as - // columns in the DFA state table - int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie. - void serializeTrie(uint8_t *where); // write out the serialized Trie. - UChar32 getFirstChar(int32_t val) const; - UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo - // character were encountered. - /** - * Merge two character categories that have been identified as having equivalent behavior. - * The ranges belonging to the second category (table column) will be added to the first. - * @param categories the pair of categories to be merged. - */ - void mergeCategories(IntPair categories); - - static constexpr int32_t DICT_BIT = 0x4000; - -#ifdef RBBI_DEBUG - void printSets(); - void printRanges(); - void printRangeGroups(); -#else - #define printSets() - #define printRanges() - #define printRangeGroups() -#endif - -private: - void numberSets(); - - RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us. - UErrorCode *fStatus; - - RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors - - UTrie2 *fTrie; // The mapping TRIE that is the end result of processing - uint32_t fTrieSize; // the Unicode Sets. - - // Groups correspond to character categories - - // groups of ranges that are in the same original UnicodeSets. - // fGroupCount is the index of the last used group. - // fGroupCount+1 is also the number of columns in the RBBI state table being compiled. - // State table column 0 is not used. Column 1 is for end-of-input. - // column 2 is for group 0. Funny counting. - int32_t fGroupCount; - - UBool fSawBOF; - - RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class - RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class -}; - - - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/rbbistbl.cpp b/deps/node/deps/icu-small/source/common/rbbistbl.cpp deleted file mode 100644 index d9099229..00000000 --- a/deps/node/deps/icu-small/source/common/rbbistbl.cpp +++ /dev/null @@ -1,270 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class -// -/* -*************************************************************************** -* Copyright (C) 2002-2014 International Business Machines Corporation -* and others. All rights reserved. -*************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/unistr.h" -#include "unicode/uniset.h" -#include "unicode/uchar.h" -#include "unicode/parsepos.h" - -#include "cstr.h" -#include "rbbinode.h" -#include "rbbirb.h" -#include "umutex.h" - - -// -// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents -// when the hash table is deleted. -// -U_CDECL_BEGIN -static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { - icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; - delete px; -} -U_CDECL_END - - - -U_NAMESPACE_BEGIN - -RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) - :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) -{ - fHashTable = NULL; - fCachedSetLookup = NULL; - - fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); - // uhash_open checks status - if (U_FAILURE(status)) { - return; - } - uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); -} - - - -RBBISymbolTable::~RBBISymbolTable() -{ - uhash_close(fHashTable); -} - - -// -// RBBISymbolTable::lookup This function from the abstract symbol table inteface -// looks up a variable name and returns a UnicodeString -// containing the substitution text. -// -// The variable name does NOT include the leading $. -// -const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const -{ - RBBISymbolTableEntry *el; - RBBINode *varRefNode; - RBBINode *exprNode; - RBBINode *usetNode; - const UnicodeString *retString; - RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const - - el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); - if (el == NULL) { - return NULL; - } - - varRefNode = el->val; - exprNode = varRefNode->fLeftChild; // Root node of expression for variable - if (exprNode->fType == RBBINode::setRef) { - // The $variable refers to a single UnicodeSet - // return the ffffString, which will subsequently be interpreted as a - // stand-in character for the set by RBBISymbolTable::lookupMatcher() - usetNode = exprNode->fLeftChild; - This->fCachedSetLookup = usetNode->fInputSet; - retString = &ffffString; - } - else - { - // The variable refers to something other than just a set. - // return the original source string for the expression - retString = &exprNode->fText; - This->fCachedSetLookup = NULL; - } - return retString; -} - - - -// -// RBBISymbolTable::lookupMatcher This function from the abstract symbol table -// interface maps a single stand-in character to a -// pointer to a Unicode Set. The Unicode Set code uses this -// mechanism to get all references to the same $variable -// name to refer to a single common Unicode Set instance. -// -// This implementation cheats a little, and does not maintain a map of stand-in chars -// to sets. Instead, it takes advantage of the fact that the UnicodeSet -// constructor will always call this function right after calling lookup(), -// and we just need to remember what set to return between these two calls. -const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const -{ - UnicodeSet *retVal = NULL; - RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const - if (ch == 0xffff) { - retVal = fCachedSetLookup; - This->fCachedSetLookup = 0; - } - return retVal; -} - -// -// RBBISymbolTable::parseReference This function from the abstract symbol table interface -// looks for a $variable name in the source text. -// It does not look it up, only scans for it. -// It is used by the UnicodeSet parser. -// -// This implementation is lifted pretty much verbatim -// from the rules based transliterator implementation. -// I didn't see an obvious way of sharing it. -// -UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, - ParsePosition& pos, int32_t limit) const -{ - int32_t start = pos.getIndex(); - int32_t i = start; - UnicodeString result; - while (i < limit) { - UChar c = text.charAt(i); - if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { - break; - } - ++i; - } - if (i == start) { // No valid name chars - return result; // Indicate failure with empty string - } - pos.setIndex(i); - text.extractBetween(start, i, result); - return result; -} - - - -// -// RBBISymbolTable::lookupNode Given a key (a variable name), return the -// corresponding RBBI Node. If there is no entry -// in the table for this name, return NULL. -// -RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ - - RBBINode *retNode = NULL; - RBBISymbolTableEntry *el; - - el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); - if (el != NULL) { - retNode = el->val; - } - return retNode; -} - - -// -// RBBISymbolTable::addEntry Add a new entry to the symbol table. -// Indicate an error if the name already exists - -// this will only occur in the case of duplicate -// variable assignments. -// -void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { - RBBISymbolTableEntry *e; - /* test for buffer overflows */ - if (U_FAILURE(err)) { - return; - } - e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); - if (e != NULL) { - err = U_BRK_VARIABLE_REDFINITION; - return; - } - - e = new RBBISymbolTableEntry; - if (e == NULL) { - err = U_MEMORY_ALLOCATION_ERROR; - return; - } - e->key = key; - e->val = val; - uhash_put( fHashTable, &e->key, e, &err); -} - - -RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} - -RBBISymbolTableEntry::~RBBISymbolTableEntry() { - // The "val" of a symbol table entry is a variable reference node. - // The l. child of the val is the rhs expression from the assignment. - // Unlike other node types, children of variable reference nodes are not - // automatically recursively deleted. We do it manually here. - delete val->fLeftChild; - val->fLeftChild = NULL; - - delete val; - - // Note: the key UnicodeString is destructed by virtue of being in the object by value. -} - - -// -// RBBISymbolTable::print Debugging function, dump out the symbol table contents. -// -#ifdef RBBI_DEBUG -void RBBISymbolTable::rbbiSymtablePrint() const { - RBBIDebugPrintf("Variable Definitions Symbol Table\n" - "Name Node serial String Val\n" - "-------------------------------------------------------------------\n"); - - int32_t pos = UHASH_FIRST; - const UHashElement *e = NULL; - for (;;) { - e = uhash_nextElement(fHashTable, &pos); - if (e == NULL ) { - break; - } - RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; - - RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum); - RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)()); - } - - RBBIDebugPrintf("\nParsed Variable Definitions\n"); - pos = -1; - for (;;) { - e = uhash_nextElement(fHashTable, &pos); - if (e == NULL ) { - break; - } - RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; - RBBIDebugPrintf("%s\n", CStr(s->key)()); - RBBINode::printTree(s->val, TRUE); - RBBINode::printTree(s->val->fLeftChild, FALSE); - RBBIDebugPrintf("\n"); - } -} -#endif - - - - - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbitblb.cpp b/deps/node/deps/icu-small/source/common/rbbitblb.cpp deleted file mode 100644 index e6ae2dc6..00000000 --- a/deps/node/deps/icu-small/source/common/rbbitblb.cpp +++ /dev/null @@ -1,1684 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ -// -// rbbitblb.cpp -// - - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/unistr.h" -#include "rbbitblb.h" -#include "rbbirb.h" -#include "rbbisetb.h" -#include "rbbidata.h" -#include "cstring.h" -#include "uassert.h" -#include "uvectr32.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) : - fRB(rb), - fTree(*rootNode), - fStatus(&status), - fDStates(nullptr), - fSafeTable(nullptr) { - if (U_FAILURE(status)) { - return; - } - // fDStates is UVector - fDStates = new UVector(status); - if (U_SUCCESS(status) && fDStates == nullptr ) { - status = U_MEMORY_ALLOCATION_ERROR; - } -} - - - -RBBITableBuilder::~RBBITableBuilder() { - int i; - for (i=0; isize(); i++) { - delete (RBBIStateDescriptor *)fDStates->elementAt(i); - } - delete fDStates; - delete fSafeTable; -} - - -//----------------------------------------------------------------------------- -// -// RBBITableBuilder::buildForwardTable - This is the main function for building -// the DFA state transition table from the RBBI rules parse tree. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::buildForwardTable() { - - if (U_FAILURE(*fStatus)) { - return; - } - - // If there were no rules, just return. This situation can easily arise - // for the reverse rules. - if (fTree==NULL) { - return; - } - - // - // Walk through the tree, replacing any references to $variables with a copy of the - // parse tree for the substition expression. - // - fTree = fTree->flattenVariables(); -#ifdef RBBI_DEBUG - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { - RBBIDebugPuts("\nParse tree after flattening variable references."); - RBBINode::printTree(fTree, TRUE); - } -#endif - - // - // If the rules contained any references to {bof} - // add a {bof} to the - // tree. Means that all matches must start out with the - // {bof} fake character. - // - if (fRB->fSetBuilder->sawBOF()) { - RBBINode *bofTop = new RBBINode(RBBINode::opCat); - RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); - // Delete and exit if memory allocation failed. - if (bofTop == NULL || bofLeaf == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - delete bofTop; - delete bofLeaf; - return; - } - bofTop->fLeftChild = bofLeaf; - bofTop->fRightChild = fTree; - bofLeaf->fParent = bofTop; - bofLeaf->fVal = 2; // Reserved value for {bof}. - fTree = bofTop; - } - - // - // Add a unique right-end marker to the expression. - // Appears as a cat-node, left child being the original tree, - // right child being the end marker. - // - RBBINode *cn = new RBBINode(RBBINode::opCat); - // Exit if memory allocation failed. - if (cn == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - return; - } - cn->fLeftChild = fTree; - fTree->fParent = cn; - cn->fRightChild = new RBBINode(RBBINode::endMark); - // Delete and exit if memory allocation failed. - if (cn->fRightChild == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - delete cn; - return; - } - cn->fRightChild->fParent = cn; - fTree = cn; - - // - // Replace all references to UnicodeSets with the tree for the equivalent - // expression. - // - fTree->flattenSets(); -#ifdef RBBI_DEBUG - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { - RBBIDebugPuts("\nParse tree after flattening Unicode Set references."); - RBBINode::printTree(fTree, TRUE); - } -#endif - - - // - // calculate the functions nullable, firstpos, lastpos and followpos on - // nodes in the parse tree. - // See the alogrithm description in Aho. - // Understanding how this works by looking at the code alone will be - // nearly impossible. - // - calcNullable(fTree); - calcFirstPos(fTree); - calcLastPos(fTree); - calcFollowPos(fTree); - if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { - RBBIDebugPuts("\n"); - printPosSets(fTree); - } - - // - // For "chained" rules, modify the followPos sets - // - if (fRB->fChainRules) { - calcChainedFollowPos(fTree); - } - - // - // BOF (start of input) test fixup. - // - if (fRB->fSetBuilder->sawBOF()) { - bofFixup(); - } - - // - // Build the DFA state transition tables. - // - buildStateTable(); - flagAcceptingStates(); - flagLookAheadStates(); - flagTaggedStates(); - - // - // Update the global table of rule status {tag} values - // The rule builder has a global vector of status values that are common - // for all tables. Merge the ones from this table into the global set. - // - mergeRuleStatusVals(); -} - - - -//----------------------------------------------------------------------------- -// -// calcNullable. Impossible to explain succinctly. See Aho, section 3.9 -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::calcNullable(RBBINode *n) { - if (n == NULL) { - return; - } - if (n->fType == RBBINode::setRef || - n->fType == RBBINode::endMark ) { - // These are non-empty leaf node types. - n->fNullable = FALSE; - return; - } - - if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { - // Lookahead marker node. It's a leaf, so no recursion on children. - // It's nullable because it does not match any literal text from the input stream. - n->fNullable = TRUE; - return; - } - - - // The node is not a leaf. - // Calculate nullable on its children. - calcNullable(n->fLeftChild); - calcNullable(n->fRightChild); - - // Apply functions from table 3.40 in Aho - if (n->fType == RBBINode::opOr) { - n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable; - } - else if (n->fType == RBBINode::opCat) { - n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable; - } - else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) { - n->fNullable = TRUE; - } - else { - n->fNullable = FALSE; - } -} - - - - -//----------------------------------------------------------------------------- -// -// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::calcFirstPos(RBBINode *n) { - if (n == NULL) { - return; - } - if (n->fType == RBBINode::leafChar || - n->fType == RBBINode::endMark || - n->fType == RBBINode::lookAhead || - n->fType == RBBINode::tag) { - // These are non-empty leaf node types. - // Note: In order to maintain the sort invariant on the set, - // this function should only be called on a node whose set is - // empty to start with. - n->fFirstPosSet->addElement(n, *fStatus); - return; - } - - // The node is not a leaf. - // Calculate firstPos on its children. - calcFirstPos(n->fLeftChild); - calcFirstPos(n->fRightChild); - - // Apply functions from table 3.40 in Aho - if (n->fType == RBBINode::opOr) { - setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); - setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); - } - else if (n->fType == RBBINode::opCat) { - setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); - if (n->fLeftChild->fNullable) { - setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); - } - } - else if (n->fType == RBBINode::opStar || - n->fType == RBBINode::opQuestion || - n->fType == RBBINode::opPlus) { - setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); - } -} - - - -//----------------------------------------------------------------------------- -// -// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::calcLastPos(RBBINode *n) { - if (n == NULL) { - return; - } - if (n->fType == RBBINode::leafChar || - n->fType == RBBINode::endMark || - n->fType == RBBINode::lookAhead || - n->fType == RBBINode::tag) { - // These are non-empty leaf node types. - // Note: In order to maintain the sort invariant on the set, - // this function should only be called on a node whose set is - // empty to start with. - n->fLastPosSet->addElement(n, *fStatus); - return; - } - - // The node is not a leaf. - // Calculate lastPos on its children. - calcLastPos(n->fLeftChild); - calcLastPos(n->fRightChild); - - // Apply functions from table 3.40 in Aho - if (n->fType == RBBINode::opOr) { - setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); - setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); - } - else if (n->fType == RBBINode::opCat) { - setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); - if (n->fRightChild->fNullable) { - setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); - } - } - else if (n->fType == RBBINode::opStar || - n->fType == RBBINode::opQuestion || - n->fType == RBBINode::opPlus) { - setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); - } -} - - - -//----------------------------------------------------------------------------- -// -// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::calcFollowPos(RBBINode *n) { - if (n == NULL || - n->fType == RBBINode::leafChar || - n->fType == RBBINode::endMark) { - return; - } - - calcFollowPos(n->fLeftChild); - calcFollowPos(n->fRightChild); - - // Aho rule #1 - if (n->fType == RBBINode::opCat) { - RBBINode *i; // is 'i' in Aho's description - uint32_t ix; - - UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet; - - for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) { - i = (RBBINode *)LastPosOfLeftChild->elementAt(ix); - setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet); - } - } - - // Aho rule #2 - if (n->fType == RBBINode::opStar || - n->fType == RBBINode::opPlus) { - RBBINode *i; // again, n and i are the names from Aho's description. - uint32_t ix; - - for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) { - i = (RBBINode *)n->fLastPosSet->elementAt(ix); - setAdd(i->fFollowPos, n->fFirstPosSet); - } - } - - - -} - -//----------------------------------------------------------------------------- -// -// addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged -// as roots of a rule to a destination vector. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) { - if (node == NULL || U_FAILURE(*fStatus)) { - return; - } - if (node->fRuleRoot) { - dest->addElement(node, *fStatus); - // Note: rules cannot nest. If we found a rule start node, - // no child node can also be a start node. - return; - } - addRuleRootNodes(dest, node->fLeftChild); - addRuleRootNodes(dest, node->fRightChild); -} - -//----------------------------------------------------------------------------- -// -// calcChainedFollowPos. Modify the previously calculated followPos sets -// to implement rule chaining. NOT described by Aho -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) { - - UVector endMarkerNodes(*fStatus); - UVector leafNodes(*fStatus); - int32_t i; - - if (U_FAILURE(*fStatus)) { - return; - } - - // get a list of all endmarker nodes. - tree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); - - // get a list all leaf nodes - tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - - // Collect all leaf nodes that can start matches for rules - // with inbound chaining enabled, which is the union of the - // firstPosition sets from each of the rule root nodes. - - UVector ruleRootNodes(*fStatus); - addRuleRootNodes(&ruleRootNodes, tree); - - UVector matchStartNodes(*fStatus); - for (int j=0; j(ruleRootNodes.elementAt(j)); - if (node->fChainIn) { - setAdd(&matchStartNodes, node->fFirstPosSet); - } - } - if (U_FAILURE(*fStatus)) { - return; - } - - int32_t endNodeIx; - int32_t startNodeIx; - - for (endNodeIx=0; endNodeIxfFollowPos->contains(endMarkerNodes.elementAt(i))) { - endNode = tNode; - break; - } - } - if (endNode == NULL) { - // node wasn't an end node. Try again with the next. - continue; - } - - // We've got a node that can end a match. - - // Line Break Specific hack: If this node's val correspond to the $CM char class, - // don't chain from it. - // TODO: Add rule syntax for this behavior, get specifics out of here and - // into the rule file. - if (fRB->fLBCMNoChain) { - UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); - if (c != -1) { - // c == -1 occurs with sets containing only the {eof} marker string. - ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); - if (cLBProp == U_LB_COMBINING_MARK) { - continue; - } - } - } - - - // Now iterate over the nodes that can start a match, looking for ones - // with the same char class as our ending node. - RBBINode *startNode; - for (startNodeIx = 0; startNodeIxfType != RBBINode::leafChar) { - continue; - } - - if (endNode->fVal == startNode->fVal) { - // The end val (character class) of one possible match is the - // same as the start of another. - - // Add all nodes from the followPos of the start node to the - // followPos set of the end node, which will have the effect of - // letting matches transition from a match state at endNode - // to the second char of a match starting with startNode. - setAdd(endNode->fFollowPos, startNode->fFollowPos); - } - } - } -} - - -//----------------------------------------------------------------------------- -// -// bofFixup. Fixup for state tables that include {bof} beginning of input testing. -// Do an swizzle similar to chaining, modifying the followPos set of -// the bofNode to include the followPos nodes from other {bot} nodes -// scattered through the tree. -// -// This function has much in common with calcChainedFollowPos(). -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::bofFixup() { - - if (U_FAILURE(*fStatus)) { - return; - } - - // The parse tree looks like this ... - // fTree root ---> - // / \ . - // <#end node> - // / \ . - // rest - // of tree - // - // We will be adding things to the followPos set of the - // - RBBINode *bofNode = fTree->fLeftChild->fLeftChild; - U_ASSERT(bofNode->fType == RBBINode::leafChar); - U_ASSERT(bofNode->fVal == 2); - - // Get all nodes that can be the start a match of the user-written rules - // (excluding the fake bofNode) - // We want the nodes that can start a match in the - // part labeled "rest of tree" - // - UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet; - - RBBINode *startNode; - int startNodeIx; - for (startNodeIx = 0; startNodeIxsize(); startNodeIx++) { - startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); - if (startNode->fType != RBBINode::leafChar) { - continue; - } - - if (startNode->fVal == bofNode->fVal) { - // We found a leaf node corresponding to a {bof} that was - // explicitly written into a rule. - // Add everything from the followPos set of this node to the - // followPos set of the fake bofNode at the start of the tree. - // - setAdd(bofNode->fFollowPos, startNode->fFollowPos); - } - } -} - -//----------------------------------------------------------------------------- -// -// buildStateTable() Determine the set of runtime DFA states and the -// transition tables for these states, by the algorithm -// of fig. 3.44 in Aho. -// -// Most of the comments are quotes of Aho's psuedo-code. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::buildStateTable() { - if (U_FAILURE(*fStatus)) { - return; - } - RBBIStateDescriptor *failState; - // Set it to NULL to avoid uninitialized warning - RBBIStateDescriptor *initialState = NULL; - // - // Add a dummy state 0 - the stop state. Not from Aho. - int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1; - failState = new RBBIStateDescriptor(lastInputSymbol, fStatus); - if (failState == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - goto ExitBuildSTdeleteall; - } - failState->fPositions = new UVector(*fStatus); - if (failState->fPositions == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - } - if (failState->fPositions == NULL || U_FAILURE(*fStatus)) { - goto ExitBuildSTdeleteall; - } - fDStates->addElement(failState, *fStatus); - if (U_FAILURE(*fStatus)) { - goto ExitBuildSTdeleteall; - } - - // initially, the only unmarked state in Dstates is firstpos(root), - // where toot is the root of the syntax tree for (r)#; - initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus); - if (initialState == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - } - if (U_FAILURE(*fStatus)) { - goto ExitBuildSTdeleteall; - } - initialState->fPositions = new UVector(*fStatus); - if (initialState->fPositions == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - } - if (U_FAILURE(*fStatus)) { - goto ExitBuildSTdeleteall; - } - setAdd(initialState->fPositions, fTree->fFirstPosSet); - fDStates->addElement(initialState, *fStatus); - if (U_FAILURE(*fStatus)) { - goto ExitBuildSTdeleteall; - } - - // while there is an unmarked state T in Dstates do begin - for (;;) { - RBBIStateDescriptor *T = NULL; - int32_t tx; - for (tx=1; txsize(); tx++) { - RBBIStateDescriptor *temp; - temp = (RBBIStateDescriptor *)fDStates->elementAt(tx); - if (temp->fMarked == FALSE) { - T = temp; - break; - } - } - if (T == NULL) { - break; - } - - // mark T; - T->fMarked = TRUE; - - // for each input symbol a do begin - int32_t a; - for (a = 1; a<=lastInputSymbol; a++) { - // let U be the set of positions that are in followpos(p) - // for some position p in T - // such that the symbol at position p is a; - UVector *U = NULL; - RBBINode *p; - int32_t px; - for (px=0; pxfPositions->size(); px++) { - p = (RBBINode *)T->fPositions->elementAt(px); - if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) { - if (U == NULL) { - U = new UVector(*fStatus); - if (U == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - goto ExitBuildSTdeleteall; - } - } - setAdd(U, p->fFollowPos); - } - } - - // if U is not empty and not in DStates then - int32_t ux = 0; - UBool UinDstates = FALSE; - if (U != NULL) { - U_ASSERT(U->size() > 0); - int ix; - for (ix=0; ixsize(); ix++) { - RBBIStateDescriptor *temp2; - temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix); - if (setEquals(U, temp2->fPositions)) { - delete U; - U = temp2->fPositions; - ux = ix; - UinDstates = TRUE; - break; - } - } - - // Add U as an unmarked state to Dstates - if (!UinDstates) - { - RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus); - if (newState == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - } - if (U_FAILURE(*fStatus)) { - goto ExitBuildSTdeleteall; - } - newState->fPositions = U; - fDStates->addElement(newState, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - ux = fDStates->size()-1; - } - - // Dtran[T, a] := U; - T->fDtran->setElementAt(ux, a); - } - } - } - return; - // delete local pointers only if error occured. -ExitBuildSTdeleteall: - delete initialState; - delete failState; -} - - - -//----------------------------------------------------------------------------- -// -// flagAcceptingStates Identify accepting states. -// First get a list of all of the end marker nodes. -// Then, for each state s, -// if s contains one of the end marker nodes in its list of tree positions then -// s is an accepting state. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::flagAcceptingStates() { - if (U_FAILURE(*fStatus)) { - return; - } - UVector endMarkerNodes(*fStatus); - RBBINode *endMarker; - int32_t i; - int32_t n; - - if (U_FAILURE(*fStatus)) { - return; - } - - fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - - for (i=0; isize(); n++) { - RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); - if (sd->fPositions->indexOf(endMarker) >= 0) { - // Any non-zero value for fAccepting means this is an accepting node. - // The value is what will be returned to the user as the break status. - // If no other value was specified, force it to -1. - - if (sd->fAccepting==0) { - // State hasn't been marked as accepting yet. Do it now. - sd->fAccepting = endMarker->fVal; - if (sd->fAccepting == 0) { - sd->fAccepting = -1; - } - } - if (sd->fAccepting==-1 && endMarker->fVal != 0) { - // Both lookahead and non-lookahead accepting for this state. - // Favor the look-ahead. Expedient for line break. - // TODO: need a more elegant resolution for conflicting rules. - sd->fAccepting = endMarker->fVal; - } - // implicit else: - // if sd->fAccepting already had a value other than 0 or -1, leave it be. - - // If the end marker node is from a look-ahead rule, set - // the fLookAhead field for this state also. - if (endMarker->fLookAheadEnd) { - // TODO: don't change value if already set? - // TODO: allow for more than one active look-ahead rule in engine. - // Make value here an index to a side array in engine? - sd->fLookAhead = sd->fAccepting; - } - } - } - } -} - - -//----------------------------------------------------------------------------- -// -// flagLookAheadStates Very similar to flagAcceptingStates, above. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::flagLookAheadStates() { - if (U_FAILURE(*fStatus)) { - return; - } - UVector lookAheadNodes(*fStatus); - RBBINode *lookAheadNode; - int32_t i; - int32_t n; - - fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - for (i=0; isize(); n++) { - RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); - if (sd->fPositions->indexOf(lookAheadNode) >= 0) { - sd->fLookAhead = lookAheadNode->fVal; - } - } - } -} - - - - -//----------------------------------------------------------------------------- -// -// flagTaggedStates -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::flagTaggedStates() { - if (U_FAILURE(*fStatus)) { - return; - } - UVector tagNodes(*fStatus); - RBBINode *tagNode; - int32_t i; - int32_t n; - - if (U_FAILURE(*fStatus)) { - return; - } - fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - for (i=0; isize(); n++) { // For each state s (row in the state table) - RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); - if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t - sortedAdd(&sd->fTagVals, tagNode->fVal); - } - } - } -} - - - - -//----------------------------------------------------------------------------- -// -// mergeRuleStatusVals -// -// Update the global table of rule status {tag} values -// The rule builder has a global vector of status values that are common -// for all tables. Merge the ones from this table into the global set. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::mergeRuleStatusVals() { - // - // The basic outline of what happens here is this... - // - // for each state in this state table - // if the status tag list for this state is in the global statuses list - // record where and - // continue with the next state - // else - // add the tag list for this state to the global list. - // - int i; - int n; - - // Pre-set a single tag of {0} into the table. - // We will need this as a default, for rule sets with no explicit tagging. - if (fRB->fRuleStatusVals->size() == 0) { - fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group - fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero - } - - // For each state - for (n=0; nsize(); n++) { - RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); - UVector *thisStatesTagValues = sd->fTagVals; - if (thisStatesTagValues == NULL) { - // No tag values are explicitly associated with this state. - // Set the default tag value. - sd->fTagsIdx = 0; - continue; - } - - // There are tag(s) associated with this state. - // fTagsIdx will be the index into the global tag list for this state's tag values. - // Initial value of -1 flags that we haven't got it set yet. - sd->fTagsIdx = -1; - int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list - int32_t nextTagGroupStart = 0; - - // Loop runs once per group of tags in the global list - while (nextTagGroupStart < fRB->fRuleStatusVals->size()) { - thisTagGroupStart = nextTagGroupStart; - nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1; - if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) { - // The number of tags for this state is different from - // the number of tags in this group from the global list. - // Continue with the next group from the global list. - continue; - } - // The lengths match, go ahead and compare the actual tag values - // between this state and the group from the global list. - for (i=0; isize(); i++) { - if (thisStatesTagValues->elementAti(i) != - fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) { - // Mismatch. - break; - } - } - - if (i == thisStatesTagValues->size()) { - // We found a set of tag values in the global list that match - // those for this state. Use them. - sd->fTagsIdx = thisTagGroupStart; - break; - } - } - - if (sd->fTagsIdx == -1) { - // No suitable entry in the global tag list already. Add one - sd->fTagsIdx = fRB->fRuleStatusVals->size(); - fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus); - for (i=0; isize(); i++) { - fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus); - } - } - } -} - - - - - - - -//----------------------------------------------------------------------------- -// -// sortedAdd Add a value to a vector of sorted values (ints). -// Do not replicate entries; if the value is already there, do not -// add a second one. -// Lazily create the vector if it does not already exist. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) { - int32_t i; - - if (*vector == NULL) { - *vector = new UVector(*fStatus); - } - if (*vector == NULL || U_FAILURE(*fStatus)) { - return; - } - UVector *vec = *vector; - int32_t vSize = vec->size(); - for (i=0; ielementAti(i); - if (valAtI == val) { - // The value is already in the vector. Don't add it again. - return; - } - if (valAtI > val) { - break; - } - } - vec->insertElementAt(val, i, *fStatus); -} - - - -//----------------------------------------------------------------------------- -// -// setAdd Set operation on UVector -// dest = dest union source -// Elements may only appear once and must be sorted. -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::setAdd(UVector *dest, UVector *source) { - int32_t destOriginalSize = dest->size(); - int32_t sourceSize = source->size(); - int32_t di = 0; - MaybeStackArray destArray, sourceArray; // Handle small cases without malloc - void **destPtr, **sourcePtr; - void **destLim, **sourceLim; - - if (destOriginalSize > destArray.getCapacity()) { - if (destArray.resize(destOriginalSize) == NULL) { - return; - } - } - destPtr = destArray.getAlias(); - destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()? - - if (sourceSize > sourceArray.getCapacity()) { - if (sourceArray.resize(sourceSize) == NULL) { - return; - } - } - sourcePtr = sourceArray.getAlias(); - sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()? - - // Avoid multiple "get element" calls by getting the contents into arrays - (void) dest->toArray(destPtr); - (void) source->toArray(sourcePtr); - - dest->setSize(sourceSize+destOriginalSize, *fStatus); - - while (sourcePtr < sourceLim && destPtr < destLim) { - if (*destPtr == *sourcePtr) { - dest->setElementAt(*sourcePtr++, di++); - destPtr++; - } - // This check is required for machines with segmented memory, like i5/OS. - // Direct pointer comparison is not recommended. - else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) { - dest->setElementAt(*destPtr++, di++); - } - else { /* *sourcePtr < *destPtr */ - dest->setElementAt(*sourcePtr++, di++); - } - } - - // At most one of these two cleanup loops will execute - while (destPtr < destLim) { - dest->setElementAt(*destPtr++, di++); - } - while (sourcePtr < sourceLim) { - dest->setElementAt(*sourcePtr++, di++); - } - - dest->setSize(di, *fStatus); -} - - - -//----------------------------------------------------------------------------- -// -// setEqual Set operation on UVector. -// Compare for equality. -// Elements must be sorted. -// -//----------------------------------------------------------------------------- -UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) { - return a->equals(*b); -} - - -//----------------------------------------------------------------------------- -// -// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos -// for each node in the tree. -// -//----------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -void RBBITableBuilder::printPosSets(RBBINode *n) { - if (n==NULL) { - return; - } - printf("\n"); - RBBINode::printNodeHeader(); - RBBINode::printNode(n); - RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE"); - - RBBIDebugPrintf(" firstpos: "); - printSet(n->fFirstPosSet); - - RBBIDebugPrintf(" lastpos: "); - printSet(n->fLastPosSet); - - RBBIDebugPrintf(" followpos: "); - printSet(n->fFollowPos); - - printPosSets(n->fLeftChild); - printPosSets(n->fRightChild); -} -#endif - -// -// findDuplCharClassFrom() -// -bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) { - int32_t numStates = fDStates->size(); - int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); - - for (; categories->first < numCols-1; categories->first++) { - for (categories->second=categories->first+1; categories->second < numCols; categories->second++) { - // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates). - uint16_t table_base = 0; - uint16_t table_dupl = 1; - for (int32_t state=0; stateelementAt(state); - table_base = (uint16_t)sd->fDtran->elementAti(categories->first); - table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second); - if (table_base != table_dupl) { - break; - } - } - if (table_base == table_dupl) { - return true; - } - } - } - return false; -} - - -// -// removeColumn() -// -void RBBITableBuilder::removeColumn(int32_t column) { - int32_t numStates = fDStates->size(); - for (int32_t state=0; stateelementAt(state); - U_ASSERT(column < sd->fDtran->size()); - sd->fDtran->removeElementAt(column); - } -} - -/* - * findDuplicateState - */ -bool RBBITableBuilder::findDuplicateState(IntPair *states) { - int32_t numStates = fDStates->size(); - int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); - - for (; states->firstfirst++) { - RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first); - for (states->second=states->first+1; states->secondsecond++) { - RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second); - if (firstSD->fAccepting != duplSD->fAccepting || - firstSD->fLookAhead != duplSD->fLookAhead || - firstSD->fTagsIdx != duplSD->fTagsIdx) { - continue; - } - bool rowsMatch = true; - for (int32_t col=0; col < numCols; ++col) { - int32_t firstVal = firstSD->fDtran->elementAti(col); - int32_t duplVal = duplSD->fDtran->elementAti(col); - if (!((firstVal == duplVal) || - ((firstVal == states->first || firstVal == states->second) && - (duplVal == states->first || duplVal == states->second)))) { - rowsMatch = false; - break; - } - } - if (rowsMatch) { - return true; - } - } - } - return false; -} - - -bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) { - int32_t numStates = fSafeTable->size(); - - for (; states->firstfirst++) { - UnicodeString *firstRow = static_cast(fSafeTable->elementAt(states->first)); - for (states->second=states->first+1; states->secondsecond++) { - UnicodeString *duplRow = static_cast(fSafeTable->elementAt(states->second)); - bool rowsMatch = true; - int32_t numCols = firstRow->length(); - for (int32_t col=0; col < numCols; ++col) { - int32_t firstVal = firstRow->charAt(col); - int32_t duplVal = duplRow->charAt(col); - if (!((firstVal == duplVal) || - ((firstVal == states->first || firstVal == states->second) && - (duplVal == states->first || duplVal == states->second)))) { - rowsMatch = false; - break; - } - } - if (rowsMatch) { - return true; - } - } - } - return false; -} - - -void RBBITableBuilder::removeState(IntPair duplStates) { - const int32_t keepState = duplStates.first; - const int32_t duplState = duplStates.second; - U_ASSERT(keepState < duplState); - U_ASSERT(duplState < fDStates->size()); - - RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); - fDStates->removeElementAt(duplState); - delete duplSD; - - int32_t numStates = fDStates->size(); - int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); - for (int32_t state=0; stateelementAt(state); - for (int32_t col=0; colfDtran->elementAti(col); - int32_t newVal = existingVal; - if (existingVal == duplState) { - newVal = keepState; - } else if (existingVal > duplState) { - newVal = existingVal - 1; - } - sd->fDtran->setElementAt(newVal, col); - } - if (sd->fAccepting == duplState) { - sd->fAccepting = keepState; - } else if (sd->fAccepting > duplState) { - sd->fAccepting--; - } - if (sd->fLookAhead == duplState) { - sd->fLookAhead = keepState; - } else if (sd->fLookAhead > duplState) { - sd->fLookAhead--; - } - } -} - -void RBBITableBuilder::removeSafeState(IntPair duplStates) { - const int32_t keepState = duplStates.first; - const int32_t duplState = duplStates.second; - U_ASSERT(keepState < duplState); - U_ASSERT(duplState < fSafeTable->size()); - - fSafeTable->removeElementAt(duplState); // Note that fSafeTable has a deleter function - // and will auto-delete the removed element. - int32_t numStates = fSafeTable->size(); - for (int32_t state=0; stateelementAt(state); - int32_t numCols = sd->length(); - for (int32_t col=0; colcharAt(col); - int32_t newVal = existingVal; - if (existingVal == duplState) { - newVal = keepState; - } else if (existingVal > duplState) { - newVal = existingVal - 1; - } - sd->setCharAt(col, static_cast(newVal)); - } - } -} - - -/* - * RemoveDuplicateStates - */ -int32_t RBBITableBuilder::removeDuplicateStates() { - IntPair dupls = {3, 0}; - int32_t numStatesRemoved = 0; - - while (findDuplicateState(&dupls)) { - // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); - removeState(dupls); - ++numStatesRemoved; - } - return numStatesRemoved; -} - - -//----------------------------------------------------------------------------- -// -// getTableSize() Calculate the size of the runtime form of this -// state transition table. -// -//----------------------------------------------------------------------------- -int32_t RBBITableBuilder::getTableSize() const { - int32_t size = 0; - int32_t numRows; - int32_t numCols; - int32_t rowSize; - - if (fTree == NULL) { - return 0; - } - - size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table. - - numRows = fDStates->size(); - numCols = fRB->fSetBuilder->getNumCharCategories(); - - rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols; - size += numRows * rowSize; - return size; -} - - -//----------------------------------------------------------------------------- -// -// exportTable() export the state transition table in the format required -// by the runtime engine. getTableSize() bytes of memory -// must be available at the output address "where". -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::exportTable(void *where) { - RBBIStateTable *table = (RBBIStateTable *)where; - uint32_t state; - int col; - - if (U_FAILURE(*fStatus) || fTree == NULL) { - return; - } - - int32_t catCount = fRB->fSetBuilder->getNumCharCategories(); - if (catCount > 0x7fff || - fDStates->size() > 0x7fff) { - *fStatus = U_BRK_INTERNAL_ERROR; - return; - } - - table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount; - table->fNumStates = fDStates->size(); - table->fFlags = 0; - if (fRB->fLookAheadHardBreak) { - table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK; - } - if (fRB->fSetBuilder->sawBOF()) { - table->fFlags |= RBBI_BOF_REQUIRED; - } - table->fReserved = 0; - - for (state=0; statefNumStates; state++) { - RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); - RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); - U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767); - U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767); - row->fAccepting = (int16_t)sd->fAccepting; - row->fLookAhead = (int16_t)sd->fLookAhead; - row->fTagIdx = (int16_t)sd->fTagsIdx; - for (col=0; colfNextState[col] = (uint16_t)sd->fDtran->elementAti(col); - } - } -} - - -/** - * Synthesize a safe state table from the main state table. - */ -void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) { - // The safe table creation has three steps: - - // 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries - // following the pair do not depend on context or state before the pair. To test - // whether a pair is safe, run it through the main forward state table, starting - // from each state. If the the final state is the same, no matter what the starting state, - // the pair is safe. - // - // 2. Build a state table that recognizes the safe pairs. It's similar to their - // forward table, with a column for each input character [class], and a row for - // each state. Row 1 is the start state, and row 0 is the stop state. Initially - // create an additional state for each input character category; being in - // one of these states means that the character has been seen, and is potentially - // the first of a pair. In each of these rows, the entry for the second character - // of a safe pair is set to the stop state (0), indicating that a match was found. - // All other table entries are set to the state corresponding the current input - // character, allowing that charcter to be the of a start following pair. - // - // Because the safe rules are to be run in reverse, moving backwards in the text, - // the first and second pair categories are swapped when building the table. - // - // 3. Compress the table. There are typically many rows (states) that are - // equivalent - that have zeroes (match completed) in the same columns - - // and can be folded together. - - // Each safe pair is stored as two UChars in the safePair string. - UnicodeString safePairs; - - int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories(); - int32_t numStates = fDStates->size(); - - for (int32_t c1=0; c1(fDStates->elementAt(startState)); - int32_t s2 = startStateD->fDtran->elementAti(c1); - RBBIStateDescriptor *s2StateD = static_cast(fDStates->elementAt(s2)); - endState = s2StateD->fDtran->elementAti(c2); - if (wantedEndState < 0) { - wantedEndState = endState; - } else { - if (wantedEndState != endState) { - break; - } - } - } - if (wantedEndState == endState) { - safePairs.append((char16_t)c1); - safePairs.append((char16_t)c2); - // printf("(%d, %d) ", c1, c2); - } - } - // printf("\n"); - } - - // Populate the initial safe table. - // The table as a whole is UVector - // Each row is represented by a UnicodeString, being used as a Vector. - // Row 0 is the stop state. - // Row 1 is the start sate. - // Row 2 and beyond are other states, initially one per char class, but - // after initial construction, many of the states will be combined, compacting the table. - // The String holds the nextState data only. The four leading fields of a row, fAccepting, - // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building. - - U_ASSERT(fSafeTable == nullptr); - fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status); - for (int32_t row=0; rowaddElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status); - } - - // From the start state, each input char class transitions to the state for that input. - UnicodeString &startState = *static_cast(fSafeTable->elementAt(1)); - for (int32_t charClass=0; charClass < numCharClasses; ++charClass) { - // Note: +2 for the start & stop state. - startState.setCharAt(charClass, static_cast(charClass+2)); - } - - // Initially make every other state table row look like the start state row, - for (int32_t row=2; row(fSafeTable->elementAt(row)); - rowState = startState; // UnicodeString assignment, copies contents. - } - - // Run through the safe pairs, set the next state to zero when pair has been seen. - // Zero being the stop state, meaning we found a safe point. - for (int32_t pairIdx=0; pairIdx(fSafeTable->elementAt(c2 + 2)); - rowState.setCharAt(c1, 0); - } - - // Remove duplicate or redundant rows from the table. - IntPair states = {1, 0}; - while (findDuplicateSafeState(&states)) { - // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); - removeSafeState(states); - } -} - - -//----------------------------------------------------------------------------- -// -// getSafeTableSize() Calculate the size of the runtime form of this -// safe state table. -// -//----------------------------------------------------------------------------- -int32_t RBBITableBuilder::getSafeTableSize() const { - int32_t size = 0; - int32_t numRows; - int32_t numCols; - int32_t rowSize; - - if (fSafeTable == nullptr) { - return 0; - } - - size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table. - - numRows = fSafeTable->size(); - numCols = fRB->fSetBuilder->getNumCharCategories(); - - rowSize = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t)*numCols; - size += numRows * rowSize; - return size; -} - - -//----------------------------------------------------------------------------- -// -// exportSafeTable() export the state transition table in the format required -// by the runtime engine. getTableSize() bytes of memory -// must be available at the output address "where". -// -//----------------------------------------------------------------------------- -void RBBITableBuilder::exportSafeTable(void *where) { - RBBIStateTable *table = (RBBIStateTable *)where; - uint32_t state; - int col; - - if (U_FAILURE(*fStatus) || fSafeTable == nullptr) { - return; - } - - int32_t catCount = fRB->fSetBuilder->getNumCharCategories(); - if (catCount > 0x7fff || - fSafeTable->size() > 0x7fff) { - *fStatus = U_BRK_INTERNAL_ERROR; - return; - } - - table->fRowLen = offsetof(RBBIStateTableRow, fNextState) + sizeof(uint16_t) * catCount; - table->fNumStates = fSafeTable->size(); - table->fFlags = 0; - table->fReserved = 0; - - for (state=0; statefNumStates; state++) { - UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state); - RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); - row->fAccepting = 0; - row->fLookAhead = 0; - row->fTagIdx = 0; - row->fReserved = 0; - for (col=0; colfNextState[col] = rowString->charAt(col); - } - } -} - - - - -//----------------------------------------------------------------------------- -// -// printSet Debug function. Print the contents of a UVector -// -//----------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -void RBBITableBuilder::printSet(UVector *s) { - int32_t i; - for (i=0; isize(); i++) { - const RBBINode *v = static_cast(s->elementAt(i)); - RBBIDebugPrintf("%5d", v==NULL? -1 : v->fSerialNum); - } - RBBIDebugPrintf("\n"); -} -#endif - - -//----------------------------------------------------------------------------- -// -// printStates Debug Function. Dump the fully constructed state transition table. -// -//----------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -void RBBITableBuilder::printStates() { - int c; // input "character" - int n; // state number - - RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); - RBBIDebugPrintf(" | Acc LA Tag"); - for (c=0; cfSetBuilder->getNumCharCategories(); c++) { - RBBIDebugPrintf(" %2d", c); - } - RBBIDebugPrintf("\n"); - RBBIDebugPrintf(" |---------------"); - for (c=0; cfSetBuilder->getNumCharCategories(); c++) { - RBBIDebugPrintf("---"); - } - RBBIDebugPrintf("\n"); - - for (n=0; nsize(); n++) { - RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); - RBBIDebugPrintf(" %3d | " , n); - RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx); - for (c=0; cfSetBuilder->getNumCharCategories(); c++) { - RBBIDebugPrintf(" %2d", sd->fDtran->elementAti(c)); - } - RBBIDebugPrintf("\n"); - } - RBBIDebugPrintf("\n\n"); -} -#endif - - -//----------------------------------------------------------------------------- -// -// printSafeTable Debug Function. Dump the fully constructed safe table. -// -//----------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -void RBBITableBuilder::printReverseTable() { - int c; // input "character" - int n; // state number - - RBBIDebugPrintf(" Safe Reverse Table \n"); - if (fSafeTable == nullptr) { - RBBIDebugPrintf(" --- nullptr ---\n"); - return; - } - RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); - RBBIDebugPrintf(" | Acc LA Tag"); - for (c=0; cfSetBuilder->getNumCharCategories(); c++) { - RBBIDebugPrintf(" %2d", c); - } - RBBIDebugPrintf("\n"); - RBBIDebugPrintf(" |---------------"); - for (c=0; cfSetBuilder->getNumCharCategories(); c++) { - RBBIDebugPrintf("---"); - } - RBBIDebugPrintf("\n"); - - for (n=0; nsize(); n++) { - UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n); - RBBIDebugPrintf(" %3d | " , n); - RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags - for (c=0; cfSetBuilder->getNumCharCategories(); c++) { - RBBIDebugPrintf(" %2d", rowString->charAt(c)); - } - RBBIDebugPrintf("\n"); - } - RBBIDebugPrintf("\n\n"); -} -#endif - - - -//----------------------------------------------------------------------------- -// -// printRuleStatusTable Debug Function. Dump the common rule status table -// -//----------------------------------------------------------------------------- -#ifdef RBBI_DEBUG -void RBBITableBuilder::printRuleStatusTable() { - int32_t thisRecord = 0; - int32_t nextRecord = 0; - int i; - UVector *tbl = fRB->fRuleStatusVals; - - RBBIDebugPrintf("index | tags \n"); - RBBIDebugPrintf("-------------------\n"); - - while (nextRecord < tbl->size()) { - thisRecord = nextRecord; - nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1; - RBBIDebugPrintf("%4d ", thisRecord); - for (i=thisRecord+1; ielementAti(i)); - } - RBBIDebugPrintf("\n"); - } - RBBIDebugPrintf("\n\n"); -} -#endif - - -//----------------------------------------------------------------------------- -// -// RBBIStateDescriptor Methods. This is a very struct-like class -// Most access is directly to the fields. -// -//----------------------------------------------------------------------------- - -RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) { - fMarked = FALSE; - fAccepting = 0; - fLookAhead = 0; - fTagsIdx = 0; - fTagVals = NULL; - fPositions = NULL; - fDtran = NULL; - - fDtran = new UVector32(lastInputSymbol+1, *fStatus); - if (U_FAILURE(*fStatus)) { - return; - } - if (fDtran == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - return; - } - fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized. - // It is indexed by input symbols, and will - // hold the next state number for each - // symbol. -} - - -RBBIStateDescriptor::~RBBIStateDescriptor() { - delete fPositions; - delete fDtran; - delete fTagVals; - fPositions = NULL; - fDtran = NULL; - fTagVals = NULL; -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/rbbitblb.h b/deps/node/deps/icu-small/source/common/rbbitblb.h deleted file mode 100644 index bc6077bb..00000000 --- a/deps/node/deps/icu-small/source/common/rbbitblb.h +++ /dev/null @@ -1,216 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// rbbitblb.h -// - -/* -********************************************************************** -* Copyright (c) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef RBBITBLB_H -#define RBBITBLB_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/uobject.h" -#include "unicode/rbbi.h" -#include "rbbirb.h" -#include "rbbinode.h" - - -U_NAMESPACE_BEGIN - -class RBBIRuleScanner; -class RBBIRuleBuilder; -class UVector32; - -// -// class RBBITableBuilder is part of the RBBI rule compiler. -// It builds the state transition table used by the RBBI runtime -// from the expression syntax tree generated by the rule scanner. -// -// This class is part of the RBBI implementation only. -// There is no user-visible public API here. -// - -class RBBITableBuilder : public UMemory { -public: - RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status); - ~RBBITableBuilder(); - - void buildForwardTable(); - - /** Return the runtime size in bytes of the built state table. */ - int32_t getTableSize() const; - - /** Fill in the runtime state table. Sufficient memory must exist at the specified location. - */ - void exportTable(void *where); - - /** - * Find duplicate (redundant) character classes. Begin looking with categories.first. - * Duplicate, if found are returned in the categories parameter. - * This is an iterator-like function, used to identify character classes - * (state table columns) that can be eliminated. - * @param categories in/out parameter, specifies where to start looking for duplicates, - * and returns the first pair of duplicates found, if any. - * @return true if duplicate char classes were found, false otherwise. - */ - bool findDuplCharClassFrom(IntPair *categories); - - /** Remove a column from the state table. Used when two character categories - * have been found equivalent, and merged together, to eliminate the uneeded table column. - */ - void removeColumn(int32_t column); - - /** - * Check for, and remove dupicate states (table rows). - * @return the number of states removed. - */ - int32_t removeDuplicateStates(); - - /** Build the safe reverse table from the already-constructed forward table. */ - void buildSafeReverseTable(UErrorCode &status); - - /** Return the runtime size in bytes of the built safe reverse state table. */ - int32_t getSafeTableSize() const; - - /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location. - */ - void exportSafeTable(void *where); - - -private: - void calcNullable(RBBINode *n); - void calcFirstPos(RBBINode *n); - void calcLastPos(RBBINode *n); - void calcFollowPos(RBBINode *n); - void calcChainedFollowPos(RBBINode *n); - void bofFixup(); - void buildStateTable(); - void flagAcceptingStates(); - void flagLookAheadStates(); - void flagTaggedStates(); - void mergeRuleStatusVals(); - - /** - * Merge redundant state table columns, eliminating character classes with identical behavior. - * Done after the state tables are generated, just before converting to their run-time format. - */ - int32_t mergeColumns(); - - void addRuleRootNodes(UVector *dest, RBBINode *node); - - /** - * Find duplicate (redundant) states, beginning at the specified pair, - * within this state table. This is an iterator-like function, used to - * identify states (state table rows) that can be eliminated. - * @param states in/out parameter, specifies where to start looking for duplicates, - * and returns the first pair of duplicates found, if any. - * @return true if duplicate states were found, false otherwise. - */ - bool findDuplicateState(IntPair *states); - - /** Remove a duplicate state. - * @param duplStates The duplicate states. The first is kept, the second is removed. - * All references to the second in the state table are retargeted - * to the first. - */ - void removeState(IntPair duplStates); - - /** Find the next duplicate state in the safe reverse table. An iterator function. - * @param states in/out parameter, specifies where to start looking for duplicates, - * and returns the first pair of duplicates found, if any. - * @return true if a duplicate pair of states was found. - */ - bool findDuplicateSafeState(IntPair *states); - - /** Remove a duplicate state from the safe table. - * @param duplStates The duplicate states. The first is kept, the second is removed. - * All references to the second in the state table are retargeted - * to the first. - */ - void removeSafeState(IntPair duplStates); - - // Set functions for UVector. - // TODO: make a USet subclass of UVector - - void setAdd(UVector *dest, UVector *source); - UBool setEquals(UVector *a, UVector *b); - - void sortedAdd(UVector **dest, int32_t val); - -public: -#ifdef RBBI_DEBUG - void printSet(UVector *s); - void printPosSets(RBBINode *n /* = NULL*/); - void printStates(); - void printRuleStatusTable(); - void printReverseTable(); -#else - #define printSet(s) - #define printPosSets(n) - #define printStates() - #define printRuleStatusTable() - #define printReverseTable() -#endif - -private: - RBBIRuleBuilder *fRB; - RBBINode *&fTree; // The root node of the parse tree to build a - // table for. - UErrorCode *fStatus; - - /** State Descriptors, UVector */ - UVector *fDStates; // D states (Aho's terminology) - // Index is state number - // Contents are RBBIStateDescriptor pointers. - - /** Synthesized safe table, UVector of UnicodeString, one string per table row. */ - UVector *fSafeTable; - - - RBBITableBuilder(const RBBITableBuilder &other); // forbid copying of this class - RBBITableBuilder &operator=(const RBBITableBuilder &other); // forbid copying of this class -}; - -// -// RBBIStateDescriptor - The DFA is constructed as a set of these descriptors, -// one for each state. -class RBBIStateDescriptor : public UMemory { -public: - UBool fMarked; - int32_t fAccepting; - int32_t fLookAhead; - UVector *fTagVals; - int32_t fTagsIdx; - UVector *fPositions; // Set of parse tree positions associated - // with this state. Unordered (it's a set). - // UVector contents are RBBINode * - - UVector32 *fDtran; // Transitions out of this state. - // indexed by input character - // contents is int index of dest state - // in RBBITableBuilder.fDStates - - RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus); - ~RBBIStateDescriptor(); - -private: - RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class - RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class -}; - - - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/resbund.cpp b/deps/node/deps/icu-small/source/common/resbund.cpp deleted file mode 100644 index 29c3463e..00000000 --- a/deps/node/deps/icu-small/source/common/resbund.cpp +++ /dev/null @@ -1,398 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2013, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File resbund.cpp -* -* Modification History: -* -* Date Name Description -* 02/05/97 aliu Fixed bug in chopLocale. Added scanForLocaleInFile -* based on code taken from scanForLocale. Added -* constructor which attempts to read resource bundle -* from a specific file, without searching other files. -* 02/11/97 aliu Added UErrorCode return values to constructors. Fixed -* infinite loops in scanForFile and scanForLocale. -* Modified getRawResourceData to not delete storage in -* localeData and resourceData which it doesn't own. -* Added Mac compatibility #ifdefs for tellp() and -* ios::nocreate. -* 03/04/97 aliu Modified to use ExpandingDataSink objects instead of -* the highly inefficient ostrstream objects. -* 03/13/97 aliu Rewrote to load in entire resource bundle and store -* it as a Hashtable of ResourceBundleData objects. -* Added state table to govern parsing of files. -* Modified to load locale index out of new file distinct -* from default.txt. -* 03/25/97 aliu Modified to support 2-d arrays, needed for timezone data. -* Added support for custom file suffixes. Again, needed -* to support timezone data. Improved error handling to -* detect duplicate tags and subtags. -* 04/07/97 aliu Fixed bug in getHashtableForLocale(). Fixed handling -* of failing UErrorCode values on entry to API methods. -* Fixed bugs in getArrayItem() for negative indices. -* 04/29/97 aliu Update to use new Hashtable deletion protocol. -* 05/06/97 aliu Flattened kTransitionTable for HP compiler. -* Fixed usage of CharString. -* 06/11/99 stephen Removed parsing of .txt files. -* Reworked to use new binary format. -* Cleaned up. -* 06/14/99 stephen Removed methods taking a filename suffix. -* 06/22/99 stephen Added missing T_FileStream_close in parse() -* 11/09/99 weiv Added getLocale(), rewritten constructForLocale() -* March 2000 weiv complete overhaul. -****************************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/resbund.h" - -#include "mutex.h" -#include "uassert.h" -#include "umutex.h" - -#include "uresimp.h" - -U_NAMESPACE_BEGIN - -/*----------------------------------------------------------------------------- - * Implementation Notes - * - * Resource bundles are read in once, and thereafter cached. - * ResourceBundle statically keeps track of which files have been - * read, so we are guaranteed that each file is read at most once. - * Resource bundles can be loaded from different data directories and - * will be treated as distinct, even if they are for the same locale. - * - * Resource bundles are lightweight objects, which have pointers to - * one or more shared Hashtable objects containing all the data. - * Copying would be cheap, but there is no copy constructor, since - * there wasn't one in the original API. - * - * The ResourceBundle parsing mechanism is implemented as a transition - * network, for easy maintenance and modification. The network is - * implemented as a matrix (instead of in code) to make this even - * easier. The matrix contains Transition objects. Each Transition - * object describes a destination node and an action to take before - * moving to the destination node. The source node is encoded by the - * index of the object in the array that contains it. The pieces - * needed to understand the transition network are the enums for node - * IDs and actions, the parse() method, which walks through the - * network and implements the actions, and the network itself. The - * network guarantees certain conditions, for example, that a new - * resource will not be closed until one has been opened first; or - * that data will not be stored into a TaggedList until a TaggedList - * has been created. Nonetheless, the code in parse() does some - * consistency checks as it runs the network, and fails with an - * U_INTERNAL_PROGRAM_ERROR if one of these checks fails. If the input - * data has a bad format, an U_INVALID_FORMAT_ERROR is returned. If you - * see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in - * it. - * - * Old functionality of multiple locales in a single file is still - * supported. For this reason, LOCALE names override FILE names. If - * data for en_US is located in the en.txt file, once it is loaded, - * the code will not care where it came from (other than remembering - * which directory it came from). However, if there is an en_US - * resource in en_US.txt, that will take precedence. There is no - * limit to the number or type of resources that can be stored in a - * file, however, files are only searched in a specific way. If - * en_US_CA is requested, then first en_US_CA.txt is searched, then - * en_US.txt, then en.txt, then default.txt. So it only makes sense - * to put certain locales in certain files. In this example, it would - * be logical to put en_US_CA, en_US, and en into the en.txt file, - * since they would be found there if asked for. The extreme example - * is to place all locale resources into default.txt, which should - * also work. - * - * Inheritance is implemented. For example, xx_YY_zz inherits as - * follows: xx_YY_zz, xx_YY, xx, default. Inheritance is implemented - * as an array of hashtables. There will be from 1 to 4 hashtables in - * the array. - * - * Fallback files are implemented. The fallback pattern is Language - * Country Variant (LCV) -> LC -> L. Fallback is first done for the - * requested locale. Then it is done for the default locale, as - * returned by Locale::getDefault(). Then the special file - * default.txt is searched for the default locale. The overall FILE - * fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default. - * - * Note that although file name searching includes the default locale, - * once a ResourceBundle object is constructed, the inheritance path - * no longer includes the default locale. The path is LCV -> LC -> L - * -> default. - * - * File parsing is lazy. Nothing is parsed unless it is called for by - * someone. So when a ResourceBundle for xx_YY_zz is constructed, - * only that locale is parsed (along with anything else in the same - * file). Later, if the FooBar tag is asked for, and if it isn't - * found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and - * so forth, until the chain is exhausted or the tag is found. - * - * Thread-safety is implemented around caches, both the cache that - * stores all the resouce data, and the cache that stores flags - * indicating whether or not a file has been visited. These caches - * delete their storage at static cleanup time, when the process - * quits. - * - * ResourceBundle supports TableCollation as a special case. This - * involves having special ResourceBundle objects which DO own their - * data, since we don't want large collation rule strings in the - * ResourceBundle cache (these are already cached in the - * TableCollation cache). TableCollation files (.ctx files) have the - * same format as normal resource data files, with a different - * interpretation, from the standpoint of ResourceBundle. .ctx files - * are loaded into otherwise ordinary ResourceBundle objects. They - * don't inherit (that's implemented by TableCollation) and they own - * their data (as mentioned above). However, they still support - * possible multiple locales in a single .ctx file. (This is in - * practice a bad idea, since you only want the one locale you're - * looking for, and only one tag will be present - * ("CollationElements"), so you don't need an inheritance chain of - * multiple locales.) Up to 4 locale resources will be loaded from a - * .ctx file; everything after the first 4 is ignored (parsed and - * deleted). (Normal .txt files have no limit.) Instead of being - * loaded into the cache, and then looked up as needed, the locale - * resources are read straight into the ResourceBundle object. - * - * The Index, which used to reside in default.txt, has been moved to a - * new file, index.txt. This file contains a slightly modified format - * with the addition of the "InstalledLocales" tag; it looks like: - * - * Index { - * InstalledLocales { - * ar - * .. - * zh_TW - * } - * } - */ -//----------------------------------------------------------------------------- - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle) - -ResourceBundle::ResourceBundle(UErrorCode &err) - :UObject(), fLocale(NULL) -{ - fResource = ures_open(0, Locale::getDefault().getName(), &err); -} - -ResourceBundle::ResourceBundle(const ResourceBundle &other) - :UObject(other), fLocale(NULL) -{ - UErrorCode status = U_ZERO_ERROR; - - if (other.fResource) { - fResource = ures_copyResb(0, other.fResource, &status); - } else { - /* Copying a bad resource bundle */ - fResource = NULL; - } -} - -ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err) - :UObject(), fLocale(NULL) -{ - if (res) { - fResource = ures_copyResb(0, res, &err); - } else { - /* Copying a bad resource bundle */ - fResource = NULL; - } -} - -ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err) - :UObject(), fLocale(NULL) -{ - fResource = ures_open(path, locale.getName(), &err); -} - - -ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other) -{ - if(this == &other) { - return *this; - } - if(fResource != 0) { - ures_close(fResource); - fResource = NULL; - } - if (fLocale != NULL) { - delete fLocale; - fLocale = NULL; - } - UErrorCode status = U_ZERO_ERROR; - if (other.fResource) { - fResource = ures_copyResb(0, other.fResource, &status); - } else { - /* Copying a bad resource bundle */ - fResource = NULL; - } - return *this; -} - -ResourceBundle::~ResourceBundle() -{ - if(fResource != 0) { - ures_close(fResource); - } - if(fLocale != NULL) { - delete(fLocale); - } -} - -ResourceBundle * -ResourceBundle::clone() const { - return new ResourceBundle(*this); -} - -UnicodeString ResourceBundle::getString(UErrorCode& status) const { - int32_t len = 0; - const UChar *r = ures_getString(fResource, &len, &status); - return UnicodeString(TRUE, r, len); -} - -const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const { - return ures_getBinary(fResource, &len, &status); -} - -const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const { - return ures_getIntVector(fResource, &len, &status); -} - -uint32_t ResourceBundle::getUInt(UErrorCode& status) const { - return ures_getUInt(fResource, &status); -} - -int32_t ResourceBundle::getInt(UErrorCode& status) const { - return ures_getInt(fResource, &status); -} - -const char *ResourceBundle::getName(void) const { - return ures_getName(fResource); -} - -const char *ResourceBundle::getKey(void) const { - return ures_getKey(fResource); -} - -UResType ResourceBundle::getType(void) const { - return ures_getType(fResource); -} - -int32_t ResourceBundle::getSize(void) const { - return ures_getSize(fResource); -} - -UBool ResourceBundle::hasNext(void) const { - return ures_hasNext(fResource); -} - -void ResourceBundle::resetIterator(void) { - ures_resetIterator(fResource); -} - -ResourceBundle ResourceBundle::getNext(UErrorCode& status) { - UResourceBundle r; - - ures_initStackObject(&r); - ures_getNextResource(fResource, &r, &status); - ResourceBundle res(&r, status); - if (U_SUCCESS(status)) { - ures_close(&r); - } - return res; -} - -UnicodeString ResourceBundle::getNextString(UErrorCode& status) { - int32_t len = 0; - const UChar* r = ures_getNextString(fResource, &len, 0, &status); - return UnicodeString(TRUE, r, len); -} - -UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) { - int32_t len = 0; - const UChar* r = ures_getNextString(fResource, &len, key, &status); - return UnicodeString(TRUE, r, len); -} - -ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const { - UResourceBundle r; - - ures_initStackObject(&r); - ures_getByIndex(fResource, indexR, &r, &status); - ResourceBundle res(&r, status); - if (U_SUCCESS(status)) { - ures_close(&r); - } - return res; -} - -UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const { - int32_t len = 0; - const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status); - return UnicodeString(TRUE, r, len); -} - -ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const { - UResourceBundle r; - - ures_initStackObject(&r); - ures_getByKey(fResource, key, &r, &status); - ResourceBundle res(&r, status); - if (U_SUCCESS(status)) { - ures_close(&r); - } - return res; -} - -ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){ - UResourceBundle r; - ures_initStackObject(&r); - ures_getByKeyWithFallback(fResource, key, &r, &status); - ResourceBundle res(&r, status); - if(U_SUCCESS(status)){ - ures_close(&r); - } - return res; -} -UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const { - int32_t len = 0; - const UChar* r = ures_getStringByKey(fResource, key, &len, &status); - return UnicodeString(TRUE, r, len); -} - -const char* -ResourceBundle::getVersionNumber() const -{ - return ures_getVersionNumberInternal(fResource); -} - -void ResourceBundle::getVersion(UVersionInfo versionInfo) const { - ures_getVersion(fResource, versionInfo); -} - -static UMutex gLocaleLock = U_MUTEX_INITIALIZER; -const Locale &ResourceBundle::getLocale(void) const { - Mutex lock(&gLocaleLock); - if (fLocale != NULL) { - return *fLocale; - } - UErrorCode status = U_ZERO_ERROR; - const char *localeName = ures_getLocaleInternal(fResource, &status); - ResourceBundle *ncThis = const_cast(this); - ncThis->fLocale = new Locale(localeName); - return ncThis->fLocale != NULL ? *ncThis->fLocale : Locale::getDefault(); -} - -const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const -{ - return ures_getLocaleByType(fResource, type, &status); -} - -U_NAMESPACE_END -//eof diff --git a/deps/node/deps/icu-small/source/common/resbund_cnv.cpp b/deps/node/deps/icu-small/source/common/resbund_cnv.cpp deleted file mode 100644 index ae854fe7..00000000 --- a/deps/node/deps/icu-small/source/common/resbund_cnv.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1997-2006, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: resbund_cnv.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004aug25 -* created by: Markus W. Scherer -* -* Character conversion functions moved here from resbund.cpp -*/ - -#include "unicode/utypes.h" -#include "unicode/resbund.h" -#include "uinvchar.h" - -U_NAMESPACE_BEGIN - -ResourceBundle::ResourceBundle( const UnicodeString& path, - const Locale& locale, - UErrorCode& error) - :UObject(), fLocale(NULL) -{ - constructForLocale(path, locale, error); -} - -ResourceBundle::ResourceBundle( const UnicodeString& path, - UErrorCode& error) - :UObject(), fLocale(NULL) -{ - constructForLocale(path, Locale::getDefault(), error); -} - -void -ResourceBundle::constructForLocale(const UnicodeString& path, - const Locale& locale, - UErrorCode& error) -{ - if (path.isEmpty()) { - fResource = ures_open(NULL, locale.getName(), &error); - } - else { - UnicodeString nullTerminatedPath(path); - nullTerminatedPath.append((UChar)0); - fResource = ures_openU(nullTerminatedPath.getBuffer(), locale.getName(), &error); - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/resource.cpp b/deps/node/deps/icu-small/source/common/resource.cpp deleted file mode 100644 index 3d41a160..00000000 --- a/deps/node/deps/icu-small/source/common/resource.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2015-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* resource.cpp -* -* created on: 2015nov04 -* created by: Markus W. Scherer -*/ - -#include "resource.h" - -U_NAMESPACE_BEGIN - -ResourceValue::~ResourceValue() {} - -ResourceSink::~ResourceSink() {} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/resource.h b/deps/node/deps/icu-small/source/common/resource.h deleted file mode 100644 index 3dbff785..00000000 --- a/deps/node/deps/icu-small/source/common/resource.h +++ /dev/null @@ -1,279 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2015-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* resource.h -* -* created on: 2015nov04 -* created by: Markus W. Scherer -*/ - -#ifndef __URESOURCE_H__ -#define __URESOURCE_H__ - -/** - * \file - * \brief ICU resource bundle key and value types. - */ - -// Note: Ported from ICU4J class UResource and its nested classes, -// but the C++ classes are separate, not nested. - -// We use the Resource prefix for C++ classes, as usual. -// The UResource prefix would be used for C types. - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "unicode/ures.h" - -struct ResourceData; - -U_NAMESPACE_BEGIN - -class ResourceValue; - -// Note: In C++, we use const char * pointers for keys, -// rather than an abstraction like Java UResource.Key. - -/** - * Interface for iterating over a resource bundle array resource. - */ -class U_COMMON_API ResourceArray { -public: - /** Constructs an empty array object. */ - ResourceArray() : items16(NULL), items32(NULL), length(0) {} - - /** Only for implementation use. @internal */ - ResourceArray(const uint16_t *i16, const uint32_t *i32, int32_t len) : - items16(i16), items32(i32), length(len) {} - - /** - * @return The number of items in the array resource. - */ - int32_t getSize() const { return length; } - /** - * @param i Array item index. - * @param value Output-only, receives the value of the i'th item. - * @return TRUE if i is non-negative and less than getSize(). - */ - UBool getValue(int32_t i, ResourceValue &value) const; - - /** Only for implementation use. @internal */ - uint32_t internalGetResource(const ResourceData *pResData, int32_t i) const; - -private: - const uint16_t *items16; - const uint32_t *items32; - int32_t length; -}; - -/** - * Interface for iterating over a resource bundle table resource. - */ -class U_COMMON_API ResourceTable { -public: - /** Constructs an empty table object. */ - ResourceTable() : keys16(NULL), keys32(NULL), items16(NULL), items32(NULL), length(0) {} - - /** Only for implementation use. @internal */ - ResourceTable(const uint16_t *k16, const int32_t *k32, - const uint16_t *i16, const uint32_t *i32, int32_t len) : - keys16(k16), keys32(k32), items16(i16), items32(i32), length(len) {} - - /** - * @return The number of items in the array resource. - */ - int32_t getSize() const { return length; } - /** - * @param i Array item index. - * @param key Output-only, receives the key of the i'th item. - * @param value Output-only, receives the value of the i'th item. - * @return TRUE if i is non-negative and less than getSize(). - */ - UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const; - -private: - const uint16_t *keys16; - const int32_t *keys32; - const uint16_t *items16; - const uint32_t *items32; - int32_t length; -}; - -/** - * Represents a resource bundle item's value. - * Avoids object creations as much as possible. - * Mutable, not thread-safe. - */ -class U_COMMON_API ResourceValue : public UObject { -public: - virtual ~ResourceValue(); - - /** - * @return ICU resource type, for example, URES_STRING - */ - virtual UResType getType() const = 0; - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not a string resource. - * - * @see ures_getString() - */ - virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const = 0; - - inline UnicodeString getUnicodeString(UErrorCode &errorCode) const { - int32_t len = 0; - const UChar *r = getString(len, errorCode); - return UnicodeString(TRUE, r, len); - } - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not an alias resource. - */ - virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const = 0; - - inline UnicodeString getAliasUnicodeString(UErrorCode &errorCode) const { - int32_t len = 0; - const UChar *r = getAliasString(len, errorCode); - return UnicodeString(TRUE, r, len); - } - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource. - * - * @see ures_getInt() - */ - virtual int32_t getInt(UErrorCode &errorCode) const = 0; - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource. - * - * @see ures_getUInt() - */ - virtual uint32_t getUInt(UErrorCode &errorCode) const = 0; - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not an intvector resource. - * - * @see ures_getIntVector() - */ - virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const = 0; - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not a binary-blob resource. - * - * @see ures_getBinary() - */ - virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const = 0; - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource - */ - virtual ResourceArray getArray(UErrorCode &errorCode) const = 0; - - /** - * Sets U_RESOURCE_TYPE_MISMATCH if this is not a table resource - */ - virtual ResourceTable getTable(UErrorCode &errorCode) const = 0; - - /** - * Is this a no-fallback/no-inheritance marker string? - * Such a marker is used for - * CLDR no-fallback data values of (three empty-set symbols)=={2205, 2205, 2205} - * when enumerating tables with fallback from the specific resource bundle to root. - * - * @return TRUE if this is a no-inheritance marker string - */ - virtual UBool isNoInheritanceMarker() const = 0; - - /** - * Sets the dest strings from the string values in this array resource. - * - * @return the number of strings in this array resource. - * If greater than capacity, then an overflow error is set. - * - * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource - * or if any of the array items is not a string - */ - virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity, - UErrorCode &errorCode) const = 0; - - /** - * Same as - *

-     * if (getType() == URES_STRING) {
-     *     return new String[] { getString(); }
-     * } else {
-     *     return getStringArray();
-     * }
-     * 
- * - * Sets U_RESOURCE_TYPE_MISMATCH if this is - * neither a string resource nor an array resource containing strings - * @see getString() - * @see getStringArray() - */ - virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity, - UErrorCode &errorCode) const = 0; - - /** - * Same as - *
-     * if (getType() == URES_STRING) {
-     *     return getString();
-     * } else {
-     *     return getStringArray()[0];
-     * }
-     * 
- * - * Sets U_RESOURCE_TYPE_MISMATCH if this is - * neither a string resource nor an array resource containing strings - * @see getString() - * @see getStringArray() - */ - virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const = 0; - -protected: - ResourceValue() {} - -private: - ResourceValue(const ResourceValue &); // no copy constructor - ResourceValue &operator=(const ResourceValue &); // no assignment operator -}; - -/** - * Sink for ICU resource bundle contents. - */ -class U_COMMON_API ResourceSink : public UObject { -public: - ResourceSink() {} - virtual ~ResourceSink(); - - /** - * Called once for each bundle (child-parent-...-root). - * The value is normally an array or table resource, - * and implementations of this method normally iterate over the - * tree of resource items stored there. - * - * @param key The key string of the enumeration-start resource. - * Empty if the enumeration starts at the top level of the bundle. - * @param value Call getArray() or getTable() as appropriate. - * Then reuse for output values from Array and Table getters. - * @param noFallback true if the bundle has no parent; - * that is, its top-level table has the nofallback attribute, - * or it is the root bundle of a locale tree. - */ - virtual void put(const char *key, ResourceValue &value, UBool noFallback, - UErrorCode &errorCode) = 0; - -private: - ResourceSink(const ResourceSink &); // no copy constructor - ResourceSink &operator=(const ResourceSink &); // no assignment operator -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/ruleiter.cpp b/deps/node/deps/icu-small/source/common/ruleiter.cpp deleted file mode 100644 index 41eea23c..00000000 --- a/deps/node/deps/icu-small/source/common/ruleiter.cpp +++ /dev/null @@ -1,162 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2003-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: September 24 2003 -* Since: ICU 2.8 -********************************************************************** -*/ -#include "ruleiter.h" -#include "unicode/parsepos.h" -#include "unicode/symtable.h" -#include "unicode/unistr.h" -#include "unicode/utf16.h" -#include "patternprops.h" - -/* \U87654321 or \ud800\udc00 */ -#define MAX_U_NOTATION_LEN 12 - -U_NAMESPACE_BEGIN - -RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, - ParsePosition& thePos) : - text(theText), - pos(thePos), - sym(theSym), - buf(0), - bufPos(0) -{} - -UBool RuleCharacterIterator::atEnd() const { - return buf == 0 && pos.getIndex() == text.length(); -} - -UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { - if (U_FAILURE(ec)) return DONE; - - UChar32 c = DONE; - isEscaped = FALSE; - - for (;;) { - c = _current(); - _advance(U16_LENGTH(c)); - - if (c == SymbolTable::SYMBOL_REF && buf == 0 && - (options & PARSE_VARIABLES) != 0 && sym != 0) { - UnicodeString name = sym->parseReference(text, pos, text.length()); - // If name is empty there was an isolated SYMBOL_REF; - // return it. Caller must be prepared for this. - if (name.length() == 0) { - break; - } - bufPos = 0; - buf = sym->lookup(name); - if (buf == 0) { - ec = U_UNDEFINED_VARIABLE; - return DONE; - } - // Handle empty variable value - if (buf->length() == 0) { - buf = 0; - } - continue; - } - - if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { - continue; - } - - if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { - UnicodeString tempEscape; - int32_t offset = 0; - c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); - jumpahead(offset); - isEscaped = TRUE; - if (c < 0) { - ec = U_MALFORMED_UNICODE_ESCAPE; - return DONE; - } - } - - break; - } - - return c; -} - -void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { - p.buf = buf; - p.pos = pos.getIndex(); - p.bufPos = bufPos; -} - -void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { - buf = p.buf; - pos.setIndex(p.pos); - bufPos = p.bufPos; -} - -void RuleCharacterIterator::skipIgnored(int32_t options) { - if ((options & SKIP_WHITESPACE) != 0) { - for (;;) { - UChar32 a = _current(); - if (!PatternProps::isWhiteSpace(a)) break; - _advance(U16_LENGTH(a)); - } - } -} - -UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { - if (maxLookAhead < 0) { - maxLookAhead = 0x7FFFFFFF; - } - if (buf != 0) { - buf->extract(bufPos, maxLookAhead, result); - } else { - text.extract(pos.getIndex(), maxLookAhead, result); - } - return result; -} - -void RuleCharacterIterator::jumpahead(int32_t count) { - _advance(count); -} - -/* -UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { - int32_t b = pos.getIndex(); - text.extract(0, b, result); - return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index -} -*/ - -UChar32 RuleCharacterIterator::_current() const { - if (buf != 0) { - return buf->char32At(bufPos); - } else { - int i = pos.getIndex(); - return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; - } -} - -void RuleCharacterIterator::_advance(int32_t count) { - if (buf != 0) { - bufPos += count; - if (bufPos == buf->length()) { - buf = 0; - } - } else { - pos.setIndex(pos.getIndex() + count); - if (pos.getIndex() > text.length()) { - pos.setIndex(text.length()); - } - } -} - -U_NAMESPACE_END - -//eof diff --git a/deps/node/deps/icu-small/source/common/ruleiter.h b/deps/node/deps/icu-small/source/common/ruleiter.h deleted file mode 100644 index b6edc657..00000000 --- a/deps/node/deps/icu-small/source/common/ruleiter.h +++ /dev/null @@ -1,233 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2003-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: September 24 2003 -* Since: ICU 2.8 -********************************************************************** -*/ -#ifndef _RULEITER_H_ -#define _RULEITER_H_ - -#include "unicode/uobject.h" - -U_NAMESPACE_BEGIN - -class UnicodeString; -class ParsePosition; -class SymbolTable; - -/** - * An iterator that returns 32-bit code points. This class is deliberately - * not related to any of the ICU character iterator classes - * in order to minimize complexity. - * @author Alan Liu - * @since ICU 2.8 - */ -class RuleCharacterIterator : public UMemory { - - // TODO: Ideas for later. (Do not implement if not needed, lest the - // code coverage numbers go down due to unused methods.) - // 1. Add a copy constructor, operator==() method. - // 2. Rather than return DONE, throw an exception if the end - // is reached -- this is an alternate usage model, probably not useful. - -private: - /** - * Text being iterated. - */ - const UnicodeString& text; - - /** - * Position of iterator. - */ - ParsePosition& pos; - - /** - * Symbol table used to parse and dereference variables. May be 0. - */ - const SymbolTable* sym; - - /** - * Current variable expansion, or 0 if none. - */ - const UnicodeString* buf; - - /** - * Position within buf. Meaningless if buf == 0. - */ - int32_t bufPos; - -public: - /** - * Value returned when there are no more characters to iterate. - */ - enum { DONE = -1 }; - - /** - * Bitmask option to enable parsing of variable names. If (options & - * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to - * its value. Variables are parsed using the SymbolTable API. - */ - enum { PARSE_VARIABLES = 1 }; - - /** - * Bitmask option to enable parsing of escape sequences. If (options & - * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded - * to its value. Escapes are parsed using Utility.unescapeAt(). - */ - enum { PARSE_ESCAPES = 2 }; - - /** - * Bitmask option to enable skipping of whitespace. If (options & - * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently - * skipped, as if they were not present in the input. - */ - enum { SKIP_WHITESPACE = 4 }; - - /** - * Constructs an iterator over the given text, starting at the given - * position. - * @param text the text to be iterated - * @param sym the symbol table, or null if there is none. If sym is null, - * then variables will not be deferenced, even if the PARSE_VARIABLES - * option is set. - * @param pos upon input, the index of the next character to return. If a - * variable has been dereferenced, then pos will not increment as - * characters of the variable value are iterated. - */ - RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym, - ParsePosition& pos); - - /** - * Returns true if this iterator has no more characters to return. - */ - UBool atEnd() const; - - /** - * Returns the next character using the given options, or DONE if there - * are no more characters, and advance the position to the next - * character. - * @param options one or more of the following options, bitwise-OR-ed - * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. - * @param isEscaped output parameter set to TRUE if the character - * was escaped - * @param ec input-output error code. An error will only be set by - * this routing if options includes PARSE_VARIABLES and an unknown - * variable name is seen, or if options includes PARSE_ESCAPES and - * an invalid escape sequence is seen. - * @return the current 32-bit code point, or DONE - */ - UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec); - - /** - * Returns true if this iterator is currently within a variable expansion. - */ - inline UBool inVariable() const; - - /** - * An opaque object representing the position of a RuleCharacterIterator. - */ - struct Pos : public UMemory { - private: - const UnicodeString* buf; - int32_t pos; - int32_t bufPos; - friend class RuleCharacterIterator; - }; - - /** - * Sets an object which, when later passed to setPos(), will - * restore this iterator's position. Usage idiom: - * - * RuleCharacterIterator iterator = ...; - * RuleCharacterIterator::Pos pos; - * iterator.getPos(pos); - * for (;;) { - * iterator.getPos(pos); - * int c = iterator.next(...); - * ... - * } - * iterator.setPos(pos); - * - * @param p a position object to be set to this iterator's - * current position. - */ - void getPos(Pos& p) const; - - /** - * Restores this iterator to the position it had when getPos() - * set the given object. - * @param p a position object previously set by getPos() - */ - void setPos(const Pos& p); - - /** - * Skips ahead past any ignored characters, as indicated by the given - * options. This is useful in conjunction with the lookahead() method. - * - * Currently, this only has an effect for SKIP_WHITESPACE. - * @param options one or more of the following options, bitwise-OR-ed - * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. - */ - void skipIgnored(int32_t options); - - /** - * Returns a string containing the remainder of the characters to be - * returned by this iterator, without any option processing. If the - * iterator is currently within a variable expansion, this will only - * extend to the end of the variable expansion. This method is provided - * so that iterators may interoperate with string-based APIs. The typical - * sequence of calls is to call skipIgnored(), then call lookahead(), then - * parse the string returned by lookahead(), then call jumpahead() to - * resynchronize the iterator. - * @param result a string to receive the characters to be returned - * by future calls to next() - * @param maxLookAhead The maximum to copy into the result. - * @return a reference to result - */ - UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const; - - /** - * Advances the position by the given number of 16-bit code units. - * This is useful in conjunction with the lookahead() method. - * @param count the number of 16-bit code units to jump over - */ - void jumpahead(int32_t count); - - /** - * Returns a string representation of this object, consisting of the - * characters being iterated, with a '|' marking the current position. - * Position within an expanded variable is not indicated. - * @param result output parameter to receive a string - * representation of this object - */ -// UnicodeString& toString(UnicodeString& result) const; - -private: - /** - * Returns the current 32-bit code point without parsing escapes, parsing - * variables, or skipping whitespace. - * @return the current 32-bit code point - */ - UChar32 _current() const; - - /** - * Advances the position by the given amount. - * @param count the number of 16-bit code units to advance past - */ - void _advance(int32_t count); -}; - -inline UBool RuleCharacterIterator::inVariable() const { - return buf != 0; -} - -U_NAMESPACE_END - -#endif // _RULEITER_H_ -//eof diff --git a/deps/node/deps/icu-small/source/common/schriter.cpp b/deps/node/deps/icu-small/source/common/schriter.cpp deleted file mode 100644 index f852800a..00000000 --- a/deps/node/deps/icu-small/source/common/schriter.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1998-2012, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* -* File schriter.cpp -* -* Modification History: -* -* Date Name Description -* 05/05/99 stephen Cleaned up. -****************************************************************************** -*/ - -#include "utypeinfo.h" // for 'typeid' to work - -#include "unicode/chariter.h" -#include "unicode/schriter.h" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringCharacterIterator) - -StringCharacterIterator::StringCharacterIterator() - : UCharCharacterIterator(), - text() -{ - // NEVER DEFAULT CONSTRUCT! -} - -StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr) - : UCharCharacterIterator(textStr.getBuffer(), textStr.length()), - text(textStr) -{ - // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.getBuffer(); -} - -StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr, - int32_t textPos) - : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos), - text(textStr) -{ - // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.getBuffer(); -} - -StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr, - int32_t textBegin, - int32_t textEnd, - int32_t textPos) - : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos), - text(textStr) -{ - // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.getBuffer(); -} - -StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that) - : UCharCharacterIterator(that), - text(that.text) -{ - // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.getBuffer(); -} - -StringCharacterIterator::~StringCharacterIterator() { -} - -StringCharacterIterator& -StringCharacterIterator::operator=(const StringCharacterIterator& that) { - UCharCharacterIterator::operator=(that); - text = that.text; - // we had set the input parameter's array, now we need to set our copy's array - UCharCharacterIterator::text = this->text.getBuffer(); - return *this; -} - -UBool -StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const { - if (this == &that) { - return TRUE; - } - - // do not call UCharCharacterIterator::operator==() - // because that checks for array pointer equality - // while we compare UnicodeString objects - - if (typeid(*this) != typeid(that)) { - return FALSE; - } - - StringCharacterIterator& realThat = (StringCharacterIterator&)that; - - return text == realThat.text - && pos == realThat.pos - && begin == realThat.begin - && end == realThat.end; -} - -CharacterIterator* -StringCharacterIterator::clone() const { - return new StringCharacterIterator(*this); -} - -void -StringCharacterIterator::setText(const UnicodeString& newText) { - text = newText; - UCharCharacterIterator::setText(text.getBuffer(), text.length()); -} - -void -StringCharacterIterator::getText(UnicodeString& result) { - result = text; -} -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/serv.cpp b/deps/node/deps/icu-small/source/common/serv.cpp deleted file mode 100644 index 2fb35bd1..00000000 --- a/deps/node/deps/icu-small/source/common/serv.cpp +++ /dev/null @@ -1,982 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** -******************************************************************************* -* Copyright (C) 2001-2014, International Business Machines Corporation. -* All Rights Reserved. -******************************************************************************* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE - -#include "serv.h" -#include "umutex.h" - -#undef SERVICE_REFCOUNT - -// in case we use the refcount stuff - -U_NAMESPACE_BEGIN - -/* -****************************************************************** -*/ - -const UChar ICUServiceKey::PREFIX_DELIMITER = 0x002F; /* '/' */ - -ICUServiceKey::ICUServiceKey(const UnicodeString& id) -: _id(id) { -} - -ICUServiceKey::~ICUServiceKey() -{ -} - -const UnicodeString& -ICUServiceKey::getID() const -{ - return _id; -} - -UnicodeString& -ICUServiceKey::canonicalID(UnicodeString& result) const -{ - return result.append(_id); -} - -UnicodeString& -ICUServiceKey::currentID(UnicodeString& result) const -{ - return canonicalID(result); -} - -UnicodeString& -ICUServiceKey::currentDescriptor(UnicodeString& result) const -{ - prefix(result); - result.append(PREFIX_DELIMITER); - return currentID(result); -} - -UBool -ICUServiceKey::fallback() -{ - return FALSE; -} - -UBool -ICUServiceKey::isFallbackOf(const UnicodeString& id) const -{ - return id == _id; -} - -UnicodeString& -ICUServiceKey::prefix(UnicodeString& result) const -{ - return result; -} - -UnicodeString& -ICUServiceKey::parsePrefix(UnicodeString& result) -{ - int32_t n = result.indexOf(PREFIX_DELIMITER); - if (n < 0) { - n = 0; - } - result.remove(n); - return result; -} - -UnicodeString& -ICUServiceKey::parseSuffix(UnicodeString& result) -{ - int32_t n = result.indexOf(PREFIX_DELIMITER); - if (n >= 0) { - result.remove(0, n+1); - } - return result; -} - -#ifdef SERVICE_DEBUG -UnicodeString& -ICUServiceKey::debug(UnicodeString& result) const -{ - debugClass(result); - result.append((UnicodeString)" id: "); - result.append(_id); - return result; -} - -UnicodeString& -ICUServiceKey::debugClass(UnicodeString& result) const -{ - return result.append((UnicodeString)"ICUServiceKey"); -} -#endif - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUServiceKey) - -/* -****************************************************************** -*/ - -ICUServiceFactory::~ICUServiceFactory() {} - -SimpleFactory::SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible) -: _instance(instanceToAdopt), _id(id), _visible(visible) -{ -} - -SimpleFactory::~SimpleFactory() -{ - delete _instance; -} - -UObject* -SimpleFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const -{ - if (U_SUCCESS(status)) { - UnicodeString temp; - if (_id == key.currentID(temp)) { - return service->cloneInstance(_instance); - } - } - return NULL; -} - -void -SimpleFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const -{ - if (_visible) { - result.put(_id, (void*)this, status); // cast away const - } else { - result.remove(_id); - } -} - -UnicodeString& -SimpleFactory::getDisplayName(const UnicodeString& id, const Locale& /* locale */, UnicodeString& result) const -{ - if (_visible && _id == id) { - result = _id; - } else { - result.setToBogus(); - } - return result; -} - -#ifdef SERVICE_DEBUG -UnicodeString& -SimpleFactory::debug(UnicodeString& toAppendTo) const -{ - debugClass(toAppendTo); - toAppendTo.append((UnicodeString)" id: "); - toAppendTo.append(_id); - toAppendTo.append((UnicodeString)", visible: "); - toAppendTo.append(_visible ? (UnicodeString)"T" : (UnicodeString)"F"); - return toAppendTo; -} - -UnicodeString& -SimpleFactory::debugClass(UnicodeString& toAppendTo) const -{ - return toAppendTo.append((UnicodeString)"SimpleFactory"); -} -#endif - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleFactory) - -/* -****************************************************************** -*/ - -ServiceListener::~ServiceListener() {} - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceListener) - -/* -****************************************************************** -*/ - -// Record the actual id for this service in the cache, so we can return it -// even if we succeed later with a different id. -class CacheEntry : public UMemory { -private: - int32_t refcount; - -public: - UnicodeString actualDescriptor; - UObject* service; - - /** - * Releases a reference to the shared resource. - */ - ~CacheEntry() { - delete service; - } - - CacheEntry(const UnicodeString& _actualDescriptor, UObject* _service) - : refcount(1), actualDescriptor(_actualDescriptor), service(_service) { - } - - /** - * Instantiation creates an initial reference, so don't call this - * unless you're creating a new pointer to this. Management of - * that pointer will have to know how to deal with refcounts. - * Return true if the resource has not already been released. - */ - CacheEntry* ref() { - ++refcount; - return this; - } - - /** - * Destructions removes a reference, so don't call this unless - * you're removing pointer to this somewhere. Management of that - * pointer will have to know how to deal with refcounts. Once - * the refcount drops to zero, the resource is released. Return - * false if the resouce has been released. - */ - CacheEntry* unref() { - if ((--refcount) == 0) { - delete this; - return NULL; - } - return this; - } - - /** - * Return TRUE if there is at least one reference to this and the - * resource has not been released. - */ - UBool isShared() const { - return refcount > 1; - } -}; - -// UObjectDeleter for serviceCache -U_CDECL_BEGIN -static void U_CALLCONV -cacheDeleter(void* obj) { - U_NAMESPACE_USE ((CacheEntry*)obj)->unref(); -} - -/** -* Deleter for UObjects -*/ -static void U_CALLCONV -deleteUObject(void *obj) { - U_NAMESPACE_USE delete (UObject*) obj; -} -U_CDECL_END - -/* -****************************************************************** -*/ - -class DNCache : public UMemory { -public: - Hashtable cache; - const Locale locale; - - DNCache(const Locale& _locale) - : cache(), locale(_locale) - { - // cache.setKeyDeleter(uprv_deleteUObject); - } -}; - - -/* -****************************************************************** -*/ - -StringPair* -StringPair::create(const UnicodeString& displayName, - const UnicodeString& id, - UErrorCode& status) -{ - if (U_SUCCESS(status)) { - StringPair* sp = new StringPair(displayName, id); - if (sp == NULL || sp->isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - delete sp; - return NULL; - } - return sp; - } - return NULL; -} - -UBool -StringPair::isBogus() const { - return displayName.isBogus() || id.isBogus(); -} - -StringPair::StringPair(const UnicodeString& _displayName, - const UnicodeString& _id) -: displayName(_displayName) -, id(_id) -{ -} - -U_CDECL_BEGIN -static void U_CALLCONV -userv_deleteStringPair(void *obj) { - U_NAMESPACE_USE delete (StringPair*) obj; -} -U_CDECL_END - -/* -****************************************************************** -*/ - -static UMutex lock = U_MUTEX_INITIALIZER; - -ICUService::ICUService() -: name() -, timestamp(0) -, factories(NULL) -, serviceCache(NULL) -, idCache(NULL) -, dnCache(NULL) -{ -} - -ICUService::ICUService(const UnicodeString& newName) -: name(newName) -, timestamp(0) -, factories(NULL) -, serviceCache(NULL) -, idCache(NULL) -, dnCache(NULL) -{ -} - -ICUService::~ICUService() -{ - { - Mutex mutex(&lock); - clearCaches(); - delete factories; - factories = NULL; - } -} - -UObject* -ICUService::get(const UnicodeString& descriptor, UErrorCode& status) const -{ - return get(descriptor, NULL, status); -} - -UObject* -ICUService::get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const -{ - UObject* result = NULL; - ICUServiceKey* key = createKey(&descriptor, status); - if (key) { - result = getKey(*key, actualReturn, status); - delete key; - } - return result; -} - -UObject* -ICUService::getKey(ICUServiceKey& key, UErrorCode& status) const -{ - return getKey(key, NULL, status); -} - -// this is a vector that subclasses of ICUService can override to further customize the result object -// before returning it. All other public get functions should call this one. - -UObject* -ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const -{ - return getKey(key, actualReturn, NULL, status); -} - -// make it possible to call reentrantly on systems that don't have reentrant mutexes. -// we can use this simple approach since we know the situation where we're calling -// reentrantly even without knowing the thread. -class XMutex : public UMemory { -public: - inline XMutex(UMutex *mutex, UBool reentering) - : fMutex(mutex) - , fActive(!reentering) - { - if (fActive) umtx_lock(fMutex); - } - inline ~XMutex() { - if (fActive) umtx_unlock(fMutex); - } - -private: - UMutex *fMutex; - UBool fActive; -}; - -struct UVectorDeleter { - UVector* _obj; - UVectorDeleter() : _obj(NULL) {} - ~UVectorDeleter() { delete _obj; } -}; - -// called only by factories, treat as private -UObject* -ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const -{ - if (U_FAILURE(status)) { - return NULL; - } - - if (isDefault()) { - return handleDefault(key, actualReturn, status); - } - - ICUService* ncthis = (ICUService*)this; // cast away semantic const - - CacheEntry* result = NULL; - { - // The factory list can't be modified until we're done, - // otherwise we might update the cache with an invalid result. - // The cache has to stay in synch with the factory list. - // ICU doesn't have monitors so we can't use rw locks, so - // we single-thread everything using this service, for now. - - // if factory is not null, we're calling from within the mutex, - // and since some unix machines don't have reentrant mutexes we - // need to make sure not to try to lock it again. - XMutex mutex(&lock, factory != NULL); - - if (serviceCache == NULL) { - ncthis->serviceCache = new Hashtable(status); - if (ncthis->serviceCache == NULL) { - return NULL; - } - if (U_FAILURE(status)) { - delete serviceCache; - return NULL; - } - serviceCache->setValueDeleter(cacheDeleter); - } - - UnicodeString currentDescriptor; - UVectorDeleter cacheDescriptorList; - UBool putInCache = FALSE; - - int32_t startIndex = 0; - int32_t limit = factories->size(); - UBool cacheResult = TRUE; - - if (factory != NULL) { - for (int32_t i = 0; i < limit; ++i) { - if (factory == (const ICUServiceFactory*)factories->elementAt(i)) { - startIndex = i + 1; - break; - } - } - if (startIndex == 0) { - // throw new InternalError("Factory " + factory + "not registered with service: " + this); - status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - cacheResult = FALSE; - } - - do { - currentDescriptor.remove(); - key.currentDescriptor(currentDescriptor); - result = (CacheEntry*)serviceCache->get(currentDescriptor); - if (result != NULL) { - break; - } - - // first test of cache failed, so we'll have to update - // the cache if we eventually succeed-- that is, if we're - // going to update the cache at all. - putInCache = TRUE; - - int32_t index = startIndex; - while (index < limit) { - ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++); - UObject* service = f->create(key, this, status); - if (U_FAILURE(status)) { - delete service; - return NULL; - } - if (service != NULL) { - result = new CacheEntry(currentDescriptor, service); - if (result == NULL) { - delete service; - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - goto outerEnd; - } - } - - // prepare to load the cache with all additional ids that - // will resolve to result, assuming we'll succeed. We - // don't want to keep querying on an id that's going to - // fallback to the one that succeeded, we want to hit the - // cache the first time next goaround. - if (cacheDescriptorList._obj == NULL) { - cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status); - if (U_FAILURE(status)) { - return NULL; - } - } - UnicodeString* idToCache = new UnicodeString(currentDescriptor); - if (idToCache == NULL || idToCache->isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - cacheDescriptorList._obj->addElement(idToCache, status); - if (U_FAILURE(status)) { - return NULL; - } - } while (key.fallback()); -outerEnd: - - if (result != NULL) { - if (putInCache && cacheResult) { - serviceCache->put(result->actualDescriptor, result, status); - if (U_FAILURE(status)) { - return NULL; - } - - if (cacheDescriptorList._obj != NULL) { - for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) { - UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i); - - serviceCache->put(*desc, result, status); - if (U_FAILURE(status)) { - return NULL; - } - - result->ref(); - cacheDescriptorList._obj->removeElementAt(i); - } - } - } - - if (actualReturn != NULL) { - // strip null prefix - if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/) - actualReturn->remove(); - actualReturn->append(result->actualDescriptor, - 1, - result->actualDescriptor.length() - 1); - } else { - *actualReturn = result->actualDescriptor; - } - - if (actualReturn->isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - delete result; - return NULL; - } - } - - UObject* service = cloneInstance(result->service); - if (putInCache && !cacheResult) { - delete result; - } - return service; - } - } - - return handleDefault(key, actualReturn, status); -} - -UObject* -ICUService::handleDefault(const ICUServiceKey& /* key */, UnicodeString* /* actualIDReturn */, UErrorCode& /* status */) const -{ - return NULL; -} - -UVector& -ICUService::getVisibleIDs(UVector& result, UErrorCode& status) const { - return getVisibleIDs(result, NULL, status); -} - -UVector& -ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const -{ - result.removeAllElements(); - - if (U_FAILURE(status)) { - return result; - } - - { - Mutex mutex(&lock); - const Hashtable* map = getVisibleIDMap(status); - if (map != NULL) { - ICUServiceKey* fallbackKey = createKey(matchID, status); - - for (int32_t pos = UHASH_FIRST;;) { - const UHashElement* e = map->nextElement(pos); - if (e == NULL) { - break; - } - - const UnicodeString* id = (const UnicodeString*)e->key.pointer; - if (fallbackKey != NULL) { - if (!fallbackKey->isFallbackOf(*id)) { - continue; - } - } - - UnicodeString* idClone = new UnicodeString(*id); - if (idClone == NULL || idClone->isBogus()) { - delete idClone; - status = U_MEMORY_ALLOCATION_ERROR; - break; - } - result.addElement(idClone, status); - if (U_FAILURE(status)) { - delete idClone; - break; - } - } - delete fallbackKey; - } - } - if (U_FAILURE(status)) { - result.removeAllElements(); - } - return result; -} - -const Hashtable* -ICUService::getVisibleIDMap(UErrorCode& status) const { - if (U_FAILURE(status)) return NULL; - - // must only be called when lock is already held - - ICUService* ncthis = (ICUService*)this; // cast away semantic const - if (idCache == NULL) { - ncthis->idCache = new Hashtable(status); - if (idCache == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } else if (factories != NULL) { - for (int32_t pos = factories->size(); --pos >= 0;) { - ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(pos); - f->updateVisibleIDs(*idCache, status); - } - if (U_FAILURE(status)) { - delete idCache; - ncthis->idCache = NULL; - } - } - } - - return idCache; -} - - -UnicodeString& -ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result) const -{ - return getDisplayName(id, result, Locale::getDefault()); -} - -UnicodeString& -ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const -{ - { - UErrorCode status = U_ZERO_ERROR; - Mutex mutex(&lock); - const Hashtable* map = getVisibleIDMap(status); - if (map != NULL) { - ICUServiceFactory* f = (ICUServiceFactory*)map->get(id); - if (f != NULL) { - f->getDisplayName(id, locale, result); - return result; - } - - // fallback - status = U_ZERO_ERROR; - ICUServiceKey* fallbackKey = createKey(&id, status); - while (fallbackKey != NULL && fallbackKey->fallback()) { - UnicodeString us; - fallbackKey->currentID(us); - f = (ICUServiceFactory*)map->get(us); - if (f != NULL) { - f->getDisplayName(id, locale, result); - delete fallbackKey; - return result; - } - } - delete fallbackKey; - } - } - result.setToBogus(); - return result; -} - -UVector& -ICUService::getDisplayNames(UVector& result, UErrorCode& status) const -{ - return getDisplayNames(result, Locale::getDefault(), NULL, status); -} - - -UVector& -ICUService::getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const -{ - return getDisplayNames(result, locale, NULL, status); -} - -UVector& -ICUService::getDisplayNames(UVector& result, - const Locale& locale, - const UnicodeString* matchID, - UErrorCode& status) const -{ - result.removeAllElements(); - result.setDeleter(userv_deleteStringPair); - if (U_SUCCESS(status)) { - ICUService* ncthis = (ICUService*)this; // cast away semantic const - Mutex mutex(&lock); - - if (dnCache != NULL && dnCache->locale != locale) { - delete dnCache; - ncthis->dnCache = NULL; - } - - if (dnCache == NULL) { - const Hashtable* m = getVisibleIDMap(status); - if (U_FAILURE(status)) { - return result; - } - ncthis->dnCache = new DNCache(locale); - if (dnCache == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return result; - } - - int32_t pos = UHASH_FIRST; - const UHashElement* entry = NULL; - while ((entry = m->nextElement(pos)) != NULL) { - const UnicodeString* id = (const UnicodeString*)entry->key.pointer; - ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer; - UnicodeString dname; - f->getDisplayName(*id, locale, dname); - if (dname.isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap - if (U_SUCCESS(status)) { - continue; - } - } - delete dnCache; - ncthis->dnCache = NULL; - return result; - } - } - } - - ICUServiceKey* matchKey = createKey(matchID, status); - /* To ensure that all elements in the hashtable are iterated, set pos to -1. - * nextElement(pos) will skip the position at pos and begin the iteration - * at the next position, which in this case will be 0. - */ - int32_t pos = UHASH_FIRST; - const UHashElement *entry = NULL; - while ((entry = dnCache->cache.nextElement(pos)) != NULL) { - const UnicodeString* id = (const UnicodeString*)entry->value.pointer; - if (matchKey != NULL && !matchKey->isFallbackOf(*id)) { - continue; - } - const UnicodeString* dn = (const UnicodeString*)entry->key.pointer; - StringPair* sp = StringPair::create(*id, *dn, status); - result.addElement(sp, status); - if (U_FAILURE(status)) { - result.removeAllElements(); - break; - } - } - delete matchKey; - - return result; -} - -URegistryKey -ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status) -{ - return registerInstance(objToAdopt, id, TRUE, status); -} - -URegistryKey -ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status) -{ - ICUServiceKey* key = createKey(&id, status); - if (key != NULL) { - UnicodeString canonicalID; - key->canonicalID(canonicalID); - delete key; - - ICUServiceFactory* f = createSimpleFactory(objToAdopt, canonicalID, visible, status); - if (f != NULL) { - return registerFactory(f, status); - } - } - delete objToAdopt; - return NULL; -} - -ICUServiceFactory* -ICUService::createSimpleFactory(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status) -{ - if (U_SUCCESS(status)) { - if ((objToAdopt != NULL) && (!id.isBogus())) { - return new SimpleFactory(objToAdopt, id, visible); - } - status = U_ILLEGAL_ARGUMENT_ERROR; - } - return NULL; -} - -URegistryKey -ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status) -{ - if (U_SUCCESS(status) && factoryToAdopt != NULL) { - Mutex mutex(&lock); - - if (factories == NULL) { - factories = new UVector(deleteUObject, NULL, status); - if (U_FAILURE(status)) { - delete factories; - return NULL; - } - } - factories->insertElementAt(factoryToAdopt, 0, status); - if (U_SUCCESS(status)) { - clearCaches(); - } else { - delete factoryToAdopt; - factoryToAdopt = NULL; - } - } - - if (factoryToAdopt != NULL) { - notifyChanged(); - } - - return (URegistryKey)factoryToAdopt; -} - -UBool -ICUService::unregister(URegistryKey rkey, UErrorCode& status) -{ - ICUServiceFactory *factory = (ICUServiceFactory*)rkey; - UBool result = FALSE; - if (factory != NULL && factories != NULL) { - Mutex mutex(&lock); - - if (factories->removeElement(factory)) { - clearCaches(); - result = TRUE; - } else { - status = U_ILLEGAL_ARGUMENT_ERROR; - delete factory; - } - } - if (result) { - notifyChanged(); - } - return result; -} - -void -ICUService::reset() -{ - { - Mutex mutex(&lock); - reInitializeFactories(); - clearCaches(); - } - notifyChanged(); -} - -void -ICUService::reInitializeFactories() -{ - if (factories != NULL) { - factories->removeAllElements(); - } -} - -UBool -ICUService::isDefault() const -{ - return countFactories() == 0; -} - -ICUServiceKey* -ICUService::createKey(const UnicodeString* id, UErrorCode& status) const -{ - return (U_FAILURE(status) || id == NULL) ? NULL : new ICUServiceKey(*id); -} - -void -ICUService::clearCaches() -{ - // callers synchronize before use - ++timestamp; - delete dnCache; - dnCache = NULL; - delete idCache; - idCache = NULL; - delete serviceCache; serviceCache = NULL; -} - -void -ICUService::clearServiceCache() -{ - // callers synchronize before use - delete serviceCache; serviceCache = NULL; -} - -UBool -ICUService::acceptsListener(const EventListener& l) const -{ - return dynamic_cast(&l) != NULL; -} - -void -ICUService::notifyListener(EventListener& l) const -{ - ((ServiceListener&)l).serviceChanged(*this); -} - -UnicodeString& -ICUService::getName(UnicodeString& result) const -{ - return result.append(name); -} - -int32_t -ICUService::countFactories() const -{ - return factories == NULL ? 0 : factories->size(); -} - -int32_t -ICUService::getTimestamp() const -{ - return timestamp; -} - -U_NAMESPACE_END - -/* UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/serv.h b/deps/node/deps/icu-small/source/common/serv.h deleted file mode 100644 index 70695839..00000000 --- a/deps/node/deps/icu-small/source/common/serv.h +++ /dev/null @@ -1,995 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2011, International Business Machines Corporation. * - * All Rights Reserved. * - ******************************************************************************* - */ - -#ifndef ICUSERV_H -#define ICUSERV_H - -#include "unicode/utypes.h" - -#if UCONFIG_NO_SERVICE - -U_NAMESPACE_BEGIN - -/* - * Allow the declaration of APIs with pointers to ICUService - * even when service is removed from the build. - */ -class ICUService; - -U_NAMESPACE_END - -#else - -#include "unicode/unistr.h" -#include "unicode/locid.h" -#include "unicode/umisc.h" - -#include "hash.h" -#include "uvector.h" -#include "servnotf.h" - -class ICUServiceTest; - -U_NAMESPACE_BEGIN - -class ICUServiceKey; -class ICUServiceFactory; -class SimpleFactory; -class ServiceListener; -class ICUService; - -class DNCache; - -/******************************************************************* - * ICUServiceKey - */ - -/** - *

ICUServiceKeys are used to communicate with factories to - * generate an instance of the service. ICUServiceKeys define how - * ids are canonicalized, provide both a current id and a current - * descriptor to use in querying the cache and factories, and - * determine the fallback strategy.

- * - *

ICUServiceKeys provide both a currentDescriptor and a currentID. - * The descriptor contains an optional prefix, followed by '/' - * and the currentID. Factories that handle complex keys, - * for example number format factories that generate multiple - * kinds of formatters for the same locale, use the descriptor - * to provide a fully unique identifier for the service object, - * while using the currentID (in this case, the locale string), - * as the visible IDs that can be localized.

- * - *

The default implementation of ICUServiceKey has no fallbacks and - * has no custom descriptors.

- */ -class U_COMMON_API ICUServiceKey : public UObject { - private: - const UnicodeString _id; - - protected: - static const UChar PREFIX_DELIMITER; - - public: - - /** - *

Construct a key from an id.

- * - * @param id the ID from which to construct the key. - */ - ICUServiceKey(const UnicodeString& id); - - /** - *

Virtual destructor.

- */ - virtual ~ICUServiceKey(); - - /** - *

Return the original ID used to construct this key.

- * - * @return the ID used to construct this key. - */ - virtual const UnicodeString& getID() const; - - /** - *

Return the canonical version of the original ID. This implementation - * appends the original ID to result. Result is returned as a convenience.

- * - * @param result the output parameter to which the id will be appended. - * @return the modified result. - */ - virtual UnicodeString& canonicalID(UnicodeString& result) const; - - /** - *

Return the (canonical) current ID. This implementation appends - * the canonical ID to result. Result is returned as a convenience.

- * - * @param result the output parameter to which the current id will be appended. - * @return the modified result. - */ - virtual UnicodeString& currentID(UnicodeString& result) const; - - /** - *

Return the current descriptor. This implementation appends - * the current descriptor to result. Result is returned as a convenience.

- * - *

The current descriptor is used to fully - * identify an instance of the service in the cache. A - * factory may handle all descriptors for an ID, or just a - * particular descriptor. The factory can either parse the - * descriptor or use custom API on the key in order to - * instantiate the service.

- * - * @param result the output parameter to which the current id will be appended. - * @return the modified result. - */ - virtual UnicodeString& currentDescriptor(UnicodeString& result) const; - - /** - *

If the key has a fallback, modify the key and return true, - * otherwise return false. The current ID will change if there - * is a fallback. No currentIDs should be repeated, and fallback - * must eventually return false. This implementation has no fallbacks - * and always returns false.

- * - * @return TRUE if the ICUServiceKey changed to a valid fallback value. - */ - virtual UBool fallback(); - - /** - *

Return TRUE if a key created from id matches, or would eventually - * fallback to match, the canonical ID of this ICUServiceKey.

- * - * @param id the id to test. - * @return TRUE if this ICUServiceKey's canonical ID is a fallback of id. - */ - virtual UBool isFallbackOf(const UnicodeString& id) const; - - /** - *

Return the prefix. This implementation leaves result unchanged. - * Result is returned as a convenience.

- * - * @param result the output parameter to which the prefix will be appended. - * @return the modified result. - */ - virtual UnicodeString& prefix(UnicodeString& result) const; - - /** - *

A utility to parse the prefix out of a descriptor string. Only - * the (undelimited) prefix, if any, remains in result. Result is returned as a - * convenience.

- * - * @param result an input/output parameter that on entry is a descriptor, and - * on exit is the prefix of that descriptor. - * @return the modified result. - */ - static UnicodeString& parsePrefix(UnicodeString& result); - - /** - *

A utility to parse the suffix out of a descriptor string. Only - * the (undelimited) suffix, if any, remains in result. Result is returned as a - * convenience.

- * - * @param result an input/output parameter that on entry is a descriptor, and - * on exit is the suffix of that descriptor. - * @return the modified result. - */ - static UnicodeString& parseSuffix(UnicodeString& result); - -public: - /** - * UObject RTTI boilerplate. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * UObject RTTI boilerplate. - */ - virtual UClassID getDynamicClassID() const; - -#ifdef SERVICE_DEBUG - public: - virtual UnicodeString& debug(UnicodeString& result) const; - virtual UnicodeString& debugClass(UnicodeString& result) const; -#endif - -}; - - /******************************************************************* - * ICUServiceFactory - */ - - /** - *

An implementing ICUServiceFactory generates the service objects maintained by the - * service. A factory generates a service object from a key, - * updates id->factory mappings, and returns the display name for - * a supported id.

- */ -class U_COMMON_API ICUServiceFactory : public UObject { - public: - virtual ~ICUServiceFactory(); - - /** - *

Create a service object from the key, if this factory - * supports the key. Otherwise, return NULL.

- * - *

If the factory supports the key, then it can call - * the service's getKey(ICUServiceKey, String[], ICUServiceFactory) method - * passing itself as the factory to get the object that - * the service would have created prior to the factory's - * registration with the service. This can change the - * key, so any information required from the key should - * be extracted before making such a callback.

- * - * @param key the service key. - * @param service the service with which this factory is registered. - * @param status the error code status. - * @return the service object, or NULL if the factory does not support the key. - */ - virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const = 0; - - /** - *

Update result to reflect the IDs (not descriptors) that this - * factory publicly handles. Result contains mappings from ID to - * factory. On entry it will contain all (visible) mappings from - * previously-registered factories.

- * - *

This function, together with getDisplayName, are used to - * support ICUService::getDisplayNames. The factory determines - * which IDs (of those it supports) it will make visible, and of - * those, which it will provide localized display names for. In - * most cases it will register mappings from all IDs it supports - * to itself.

- * - * @param result the mapping table to update. - * @param status the error code status. - */ - virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const = 0; - - /** - *

Return, in result, the display name of the id in the provided locale. - * This is an id, not a descriptor. If the id is - * not visible, sets result to bogus. If the - * incoming result is bogus, it remains bogus. Result is returned as a - * convenience. Results are not defined if id is not one supported by this - * factory.

- * - * @param id a visible id supported by this factory. - * @param locale the locale for which to generate the corresponding localized display name. - * @param result output parameter to hold the display name. - * @return result. - */ - virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const = 0; -}; - -/* - ****************************************************************** - */ - - /** - *

A default implementation of factory. This provides default - * implementations for subclasses, and implements a singleton - * factory that matches a single ID and returns a single - * (possibly deferred-initialized) instance. This implements - * updateVisibleIDs to add a mapping from its ID to itself - * if visible is true, or to remove any existing mapping - * for its ID if visible is false. No localization of display - * names is performed.

- */ -class U_COMMON_API SimpleFactory : public ICUServiceFactory { - protected: - UObject* _instance; - const UnicodeString _id; - const UBool _visible; - - public: - /** - *

Construct a SimpleFactory that maps a single ID to a single - * service instance. If visible is TRUE, the ID will be visible. - * The instance must not be NULL. The SimpleFactory will adopt - * the instance, which must not be changed subsequent to this call.

- * - * @param instanceToAdopt the service instance to adopt. - * @param id the ID to assign to this service instance. - * @param visible if TRUE, the ID will be visible. - */ - SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible = TRUE); - - /** - *

Destructor.

- */ - virtual ~SimpleFactory(); - - /** - *

This implementation returns a clone of the service instance if the factory's ID is equal to - * the key's currentID. Service and prefix are ignored.

- * - * @param key the service key. - * @param service the service with which this factory is registered. - * @param status the error code status. - * @return the service object, or NULL if the factory does not support the key. - */ - virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; - - /** - *

This implementation adds a mapping from ID -> this to result if visible is TRUE, - * otherwise it removes ID from result.

- * - * @param result the mapping table to update. - * @param status the error code status. - */ - virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const; - - /** - *

This implementation returns the factory ID if it equals id and visible is TRUE, - * otherwise it returns the empty string. (This implementation provides - * no localized id information.)

- * - * @param id a visible id supported by this factory. - * @param locale the locale for which to generate the corresponding localized display name. - * @param result output parameter to hold the display name. - * @return result. - */ - virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const; - -public: - /** - * UObject RTTI boilerplate. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * UObject RTTI boilerplate. - */ - virtual UClassID getDynamicClassID() const; - -#ifdef SERVICE_DEBUG - public: - virtual UnicodeString& debug(UnicodeString& toAppendTo) const; - virtual UnicodeString& debugClass(UnicodeString& toAppendTo) const; -#endif - -}; - -/* - ****************************************************************** - */ - -/** - *

ServiceListener is the listener that ICUService provides by default. - * ICUService will notifiy this listener when factories are added to - * or removed from the service. Subclasses can provide - * different listener interfaces that extend EventListener, and modify - * acceptsListener and notifyListener as appropriate.

- */ -class U_COMMON_API ServiceListener : public EventListener { -public: - virtual ~ServiceListener(); - - /** - *

This method is called when the service changes. At the time of the - * call this listener is registered with the service. It must - * not modify the notifier in the context of this call.

- * - * @param service the service that changed. - */ - virtual void serviceChanged(const ICUService& service) const = 0; - -public: - /** - * UObject RTTI boilerplate. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * UObject RTTI boilerplate. - */ - virtual UClassID getDynamicClassID() const; - -}; - -/* - ****************************************************************** - */ - -/** - *

A StringPair holds a displayName/ID pair. ICUService uses it - * as the array elements returned by getDisplayNames. - */ -class U_COMMON_API StringPair : public UMemory { -public: - /** - *

The display name of the pair.

- */ - const UnicodeString displayName; - - /** - *

The ID of the pair.

- */ - const UnicodeString id; - - /** - *

Creates a string pair from a displayName and an ID.

- * - * @param displayName the displayName. - * @param id the ID. - * @param status the error code status. - * @return a StringPair if the creation was successful, otherwise NULL. - */ - static StringPair* create(const UnicodeString& displayName, - const UnicodeString& id, - UErrorCode& status); - - /** - *

Return TRUE if either string of the pair is bogus.

- * @return TRUE if either string of the pair is bogus. - */ - UBool isBogus() const; - -private: - StringPair(const UnicodeString& displayName, const UnicodeString& id); -}; - -/******************************************************************* - * ICUService - */ - - /** - *

A Service provides access to service objects that implement a - * particular service, e.g. transliterators. Users provide a String - * id (for example, a locale string) to the service, and get back an - * object for that id. Service objects can be any kind of object. A - * new service object is returned for each query. The caller is - * responsible for deleting it.

- * - *

Services 'canonicalize' the query ID and use the canonical ID to - * query for the service. The service also defines a mechanism to - * 'fallback' the ID multiple times. Clients can optionally request - * the actual ID that was matched by a query when they use an ID to - * retrieve a service object.

- * - *

Service objects are instantiated by ICUServiceFactory objects - * registered with the service. The service queries each - * ICUServiceFactory in turn, from most recently registered to - * earliest registered, until one returns a service object. If none - * responds with a service object, a fallback ID is generated, and the - * process repeats until a service object is returned or until the ID - * has no further fallbacks.

- * - *

In ICU 2.4, UObject (the base class of service instances) does - * not define a polymorphic clone function. ICUService uses clones to - * manage ownership. Thus, for now, ICUService defines an abstract - * method, cloneInstance, that clients must implement to create clones - * of the service instances. This may change in future releases of - * ICU.

- * - *

ICUServiceFactories can be dynamically registered and - * unregistered with the service. When registered, an - * ICUServiceFactory is installed at the head of the factory list, and - * so gets 'first crack' at any keys or fallback keys. When - * unregistered, it is removed from the service and can no longer be - * located through it. Service objects generated by this factory and - * held by the client are unaffected.

- * - *

If a service has variants (e.g., the different variants of - * BreakIterator) an ICUServiceFactory can use the prefix of the - * ICUServiceKey to determine the variant of a service to generate. - * If it does not support all variants, it can request - * previously-registered factories to handle the ones it does not - * support.

- * - *

ICUService uses ICUServiceKeys to query factories and perform - * fallback. The ICUServiceKey defines the canonical form of the ID, - * and implements the fallback strategy. Custom ICUServiceKeys can be - * defined that parse complex IDs into components that - * ICUServiceFactories can more easily use. The ICUServiceKey can - * cache the results of this parsing to save repeated effort. - * ICUService provides convenience APIs that take UnicodeStrings and - * generate default ICUServiceKeys for use in querying.

- * - *

ICUService provides API to get the list of IDs publicly - * supported by the service (although queries aren't restricted to - * this list). This list contains only 'simple' IDs, and not fully - * unique IDs. ICUServiceFactories are associated with each simple ID - * and the responsible factory can also return a human-readable - * localized version of the simple ID, for use in user interfaces. - * ICUService can also provide an array of the all the localized - * visible IDs and their corresponding internal IDs.

- * - *

ICUService implements ICUNotifier, so that clients can register - * to receive notification when factories are added or removed from - * the service. ICUService provides a default EventListener - * subinterface, ServiceListener, which can be registered with the - * service. When the service changes, the ServiceListener's - * serviceChanged method is called with the service as the - * argument.

- * - *

The ICUService API is both rich and generic, and it is expected - * that most implementations will statically 'wrap' ICUService to - * present a more appropriate API-- for example, to declare the type - * of the objects returned from get, to limit the factories that can - * be registered with the service, or to define their own listener - * interface with a custom callback method. They might also customize - * ICUService by overriding it, for example, to customize the - * ICUServiceKey and fallback strategy. ICULocaleService is a - * subclass of ICUService that uses Locale names as IDs and uses - * ICUServiceKeys that implement the standard resource bundle fallback - * strategy. Most clients will wish to subclass it instead of - * ICUService.

- */ -class U_COMMON_API ICUService : public ICUNotifier { - protected: - /** - * Name useful for debugging. - */ - const UnicodeString name; - - private: - - /** - * Timestamp so iterators can be fail-fast. - */ - uint32_t timestamp; - - /** - * All the factories registered with this service. - */ - UVector* factories; - - /** - * The service cache. - */ - Hashtable* serviceCache; - - /** - * The ID cache. - */ - Hashtable* idCache; - - /** - * The name cache. - */ - DNCache* dnCache; - - /** - * Constructor. - */ - public: - /** - *

Construct a new ICUService.

- */ - ICUService(); - - /** - *

Construct with a name (useful for debugging).

- * - * @param name a name to use in debugging. - */ - ICUService(const UnicodeString& name); - - /** - *

Destructor.

- */ - virtual ~ICUService(); - - /** - *

Return the name of this service. This will be the empty string if none was assigned. - * Returns result as a convenience.

- * - * @param result an output parameter to contain the name of this service. - * @return the name of this service. - */ - UnicodeString& getName(UnicodeString& result) const; - - /** - *

Convenience override for get(ICUServiceKey&, UnicodeString*). This uses - * createKey to create a key for the provided descriptor.

- * - * @param descriptor the descriptor. - * @param status the error code status. - * @return the service instance, or NULL. - */ - UObject* get(const UnicodeString& descriptor, UErrorCode& status) const; - - /** - *

Convenience override for get(ICUServiceKey&, UnicodeString*). This uses - * createKey to create a key from the provided descriptor.

- * - * @param descriptor the descriptor. - * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. - * @param status the error code status. - * @return the service instance, or NULL. - */ - UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const; - - /** - *

Convenience override for get(ICUServiceKey&, UnicodeString*).

- * - * @param key the key. - * @param status the error code status. - * @return the service instance, or NULL. - */ - UObject* getKey(ICUServiceKey& key, UErrorCode& status) const; - - /** - *

Given a key, return a service object, and, if actualReturn - * is not NULL, the descriptor with which it was found in the - * first element of actualReturn. If no service object matches - * this key, returns NULL and leaves actualReturn unchanged.

- * - *

This queries the cache using the key's descriptor, and if no - * object in the cache matches, tries the key on each - * registered factory, in order. If none generates a service - * object for the key, repeats the process with each fallback of - * the key, until either a factory returns a service object, or the key - * has no fallback. If no object is found, the result of handleDefault - * is returned.

- * - *

Subclasses can override this method to further customize the - * result before returning it. - * - * @param key the key. - * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. - * @param status the error code status. - * @return the service instance, or NULL. - */ - virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const; - - /** - *

This version of getKey is only called by ICUServiceFactories within the scope - * of a previous getKey call, to determine what previously-registered factories would - * have returned. For details, see getKey(ICUServiceKey&, UErrorCode&). Subclasses - * should not call it directly, but call through one of the other get functions.

- * - * @param key the key. - * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. - * @param factory the factory making the recursive call. - * @param status the error code status. - * @return the service instance, or NULL. - */ - UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const; - - /** - *

Convenience override for getVisibleIDs(String) that passes null - * as the fallback, thus returning all visible IDs.

- * - * @param result a vector to hold the returned IDs. - * @param status the error code status. - * @return the result vector. - */ - UVector& getVisibleIDs(UVector& result, UErrorCode& status) const; - - /** - *

Return a snapshot of the visible IDs for this service. This - * list will not change as ICUServiceFactories are added or removed, but the - * supported IDs will, so there is no guarantee that all and only - * the IDs in the returned list will be visible and supported by the - * service in subsequent calls.

- * - *

The IDs are returned as pointers to UnicodeStrings. The - * caller owns the IDs. Previous contents of result are discarded before - * new elements, if any, are added.

- * - *

matchID is passed to createKey to create a key. If the key - * is not NULL, its isFallbackOf method is used to filter out IDs - * that don't match the key or have it as a fallback.

- * - * @param result a vector to hold the returned IDs. - * @param matchID an ID used to filter the result, or NULL if all IDs are desired. - * @param status the error code status. - * @return the result vector. - */ - UVector& getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const; - - /** - *

Convenience override for getDisplayName(const UnicodeString&, const Locale&, UnicodeString&) that - * uses the current default locale.

- * - * @param id the ID for which to retrieve the localized displayName. - * @param result an output parameter to hold the display name. - * @return the modified result. - */ - UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result) const; - - /** - *

Given a visible ID, return the display name in the requested locale. - * If there is no directly supported ID corresponding to this ID, result is - * set to bogus.

- * - * @param id the ID for which to retrieve the localized displayName. - * @param result an output parameter to hold the display name. - * @param locale the locale in which to localize the ID. - * @return the modified result. - */ - UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const; - - /** - *

Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that - * uses the current default Locale as the locale and NULL for - * the matchID.

- * - * @param result a vector to hold the returned displayName/id StringPairs. - * @param status the error code status. - * @return the modified result vector. - */ - UVector& getDisplayNames(UVector& result, UErrorCode& status) const; - - /** - *

Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that - * uses NULL for the matchID.

- * - * @param result a vector to hold the returned displayName/id StringPairs. - * @param locale the locale in which to localize the ID. - * @param status the error code status. - * @return the modified result vector. - */ - UVector& getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const; - - /** - *

Return a snapshot of the mapping from display names to visible - * IDs for this service. This set will not change as factories - * are added or removed, but the supported IDs will, so there is - * no guarantee that all and only the IDs in the returned map will - * be visible and supported by the service in subsequent calls, - * nor is there any guarantee that the current display names match - * those in the result.

- * - *

The names are returned as pointers to StringPairs, which - * contain both the displayName and the corresponding ID. The - * caller owns the StringPairs. Previous contents of result are - * discarded before new elements, if any, are added.

- * - *

matchID is passed to createKey to create a key. If the key - * is not NULL, its isFallbackOf method is used to filter out IDs - * that don't match the key or have it as a fallback.

- * - * @param result a vector to hold the returned displayName/id StringPairs. - * @param locale the locale in which to localize the ID. - * @param matchID an ID used to filter the result, or NULL if all IDs are desired. - * @param status the error code status. - * @return the result vector. */ - UVector& getDisplayNames(UVector& result, - const Locale& locale, - const UnicodeString* matchID, - UErrorCode& status) const; - - /** - *

A convenience override of registerInstance(UObject*, const UnicodeString&, UBool) - * that defaults visible to TRUE.

- * - * @param objToAdopt the object to register and adopt. - * @param id the ID to assign to this object. - * @param status the error code status. - * @return a registry key that can be passed to unregister to unregister - * (and discard) this instance. - */ - URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status); - - /** - *

Register a service instance with the provided ID. The ID will be - * canonicalized. The canonicalized ID will be returned by - * getVisibleIDs if visible is TRUE. The service instance will be adopted and - * must not be modified subsequent to this call.

- * - *

This issues a serviceChanged notification to registered listeners.

- * - *

This implementation wraps the object using - * createSimpleFactory, and calls registerFactory.

- * - * @param objToAdopt the object to register and adopt. - * @param id the ID to assign to this object. - * @param visible TRUE if getVisibleIDs is to return this ID. - * @param status the error code status. - * @return a registry key that can be passed to unregister() to unregister - * (and discard) this instance. - */ - virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status); - - /** - *

Register an ICUServiceFactory. Returns a registry key that - * can be used to unregister the factory. The factory - * must not be modified subsequent to this call. The service owns - * all registered factories. In case of an error, the factory is - * deleted.

- * - *

This issues a serviceChanged notification to registered listeners.

- * - *

The default implementation accepts all factories.

- * - * @param factoryToAdopt the factory to register and adopt. - * @param status the error code status. - * @return a registry key that can be passed to unregister to unregister - * (and discard) this factory. - */ - virtual URegistryKey registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status); - - /** - *

Unregister a factory using a registry key returned by - * registerInstance or registerFactory. After a successful call, - * the factory will be removed from the service factory list and - * deleted, and the key becomes invalid.

- * - *

This issues a serviceChanged notification to registered - * listeners.

- * - * @param rkey the registry key. - * @param status the error code status. - * @return TRUE if the call successfully unregistered the factory. - */ - virtual UBool unregister(URegistryKey rkey, UErrorCode& status); - - /** - *

Reset the service to the default factories. The factory - * lock is acquired and then reInitializeFactories is called.

- * - *

This issues a serviceChanged notification to registered listeners.

- */ - virtual void reset(void); - - /** - *

Return TRUE if the service is in its default state.

- * - *

The default implementation returns TRUE if there are no - * factories registered.

- */ - virtual UBool isDefault(void) const; - - /** - *

Create a key from an ID. If ID is NULL, returns NULL.

- * - *

The default implementation creates an ICUServiceKey instance. - * Subclasses can override to define more useful keys appropriate - * to the factories they accept.

- * - * @param a pointer to the ID for which to create a default ICUServiceKey. - * @param status the error code status. - * @return the ICUServiceKey corresponding to ID, or NULL. - */ - virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const; - - /** - *

Clone object so that caller can own the copy. In ICU2.4, UObject doesn't define - * clone, so we need an instance-aware method that knows how to do this. - * This is public so factories can call it, but should really be protected.

- * - * @param instance the service instance to clone. - * @return a clone of the passed-in instance, or NULL if cloning was unsuccessful. - */ - virtual UObject* cloneInstance(UObject* instance) const = 0; - - - /************************************************************************ - * Subclassing API - */ - - protected: - - /** - *

Create a factory that wraps a single service object. Called by registerInstance.

- * - *

The default implementation returns an instance of SimpleFactory.

- * - * @param instanceToAdopt the service instance to adopt. - * @param id the ID to assign to this service instance. - * @param visible if TRUE, the ID will be visible. - * @param status the error code status. - * @return an instance of ICUServiceFactory that maps this instance to the provided ID. - */ - virtual ICUServiceFactory* createSimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status); - - /** - *

Reinitialize the factory list to its default state. After this call, isDefault() - * must return TRUE.

- * - *

This issues a serviceChanged notification to registered listeners.

- * - *

The default implementation clears the factory list. - * Subclasses can override to provide other default initialization - * of the factory list. Subclasses must not call this method - * directly, since it must only be called while holding write - * access to the factory list.

- */ - virtual void reInitializeFactories(void); - - /** - *

Default handler for this service if no factory in the factory list - * handled the key passed to getKey.

- * - *

The default implementation returns NULL.

- * - * @param key the key. - * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. - * @param status the error code status. - * @return the service instance, or NULL. - */ - virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const; - - /** - *

Clear caches maintained by this service.

- * - *

Subclasses can override if they implement additional caches - * that need to be cleared when the service changes. Subclasses - * should generally not call this method directly, as it must only - * be called while synchronized on the factory lock.

- */ - virtual void clearCaches(void); - - /** - *

Return true if the listener is accepted.

- * - *

The default implementation accepts the listener if it is - * a ServiceListener. Subclasses can override this to accept - * different listeners.

- * - * @param l the listener to test. - * @return TRUE if the service accepts the listener. - */ - virtual UBool acceptsListener(const EventListener& l) const; - - /** - *

Notify the listener of a service change.

- * - *

The default implementation assumes a ServiceListener. - * If acceptsListener has been overridden to accept different - * listeners, this should be overridden as well.

- * - * @param l the listener to notify. - */ - virtual void notifyListener(EventListener& l) const; - - /************************************************************************ - * Utilities for subclasses. - */ - - /** - *

Clear only the service cache.

- * - *

This can be called by subclasses when a change affects the service - * cache but not the ID caches, e.g., when the default locale changes - * the resolution of IDs also changes, requiring the cache to be - * flushed, but not the visible IDs themselves.

- */ - void clearServiceCache(void); - - /** - *

Return a map from visible IDs to factories. - * This must only be called when the mutex is held.

- * - * @param status the error code status. - * @return a Hashtable containing mappings from visible - * IDs to factories. - */ - const Hashtable* getVisibleIDMap(UErrorCode& status) const; - - /** - *

Allow subclasses to read the time stamp.

- * - * @return the timestamp. - */ - int32_t getTimestamp(void) const; - - /** - *

Return the number of registered factories.

- * - * @return the number of factories registered at the time of the call. - */ - int32_t countFactories(void) const; - -private: - - friend class ::ICUServiceTest; // give tests access to countFactories. -}; - -U_NAMESPACE_END - - /* UCONFIG_NO_SERVICE */ -#endif - - /* ICUSERV_H */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servlk.cpp b/deps/node/deps/icu-small/source/common/servlk.cpp deleted file mode 100644 index 27b046f1..00000000 --- a/deps/node/deps/icu-small/source/common/servlk.cpp +++ /dev/null @@ -1,186 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2014, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE - -#include "unicode/resbund.h" -#include "uresimp.h" -#include "cmemory.h" -#include "servloc.h" -#include "ustrfmt.h" -#include "uhash.h" -#include "charstr.h" -#include "uassert.h" - -#define UNDERSCORE_CHAR ((UChar)0x005f) -#define AT_SIGN_CHAR ((UChar)64) -#define PERIOD_CHAR ((UChar)46) - -U_NAMESPACE_BEGIN - -LocaleKey* -LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID, - const UnicodeString* canonicalFallbackID, - UErrorCode& status) -{ - return LocaleKey::createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY, status); -} - -LocaleKey* -LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID, - const UnicodeString* canonicalFallbackID, - int32_t kind, - UErrorCode& status) -{ - if (primaryID == NULL || U_FAILURE(status)) { - return NULL; - } - UnicodeString canonicalPrimaryID; - LocaleUtility::canonicalLocaleString(primaryID, canonicalPrimaryID); - return new LocaleKey(*primaryID, canonicalPrimaryID, canonicalFallbackID, kind); -} - -LocaleKey::LocaleKey(const UnicodeString& primaryID, - const UnicodeString& canonicalPrimaryID, - const UnicodeString* canonicalFallbackID, - int32_t kind) - : ICUServiceKey(primaryID) - , _kind(kind) - , _primaryID(canonicalPrimaryID) - , _fallbackID() - , _currentID() -{ - _fallbackID.setToBogus(); - if (_primaryID.length() != 0) { - if (canonicalFallbackID != NULL && _primaryID != *canonicalFallbackID) { - _fallbackID = *canonicalFallbackID; - } - } - - _currentID = _primaryID; -} - -LocaleKey::~LocaleKey() {} - -UnicodeString& -LocaleKey::prefix(UnicodeString& result) const { - if (_kind != KIND_ANY) { - UChar buffer[64]; - uprv_itou(buffer, 64, _kind, 10, 0); - UnicodeString temp(buffer); - result.append(temp); - } - return result; -} - -int32_t -LocaleKey::kind() const { - return _kind; -} - -UnicodeString& -LocaleKey::canonicalID(UnicodeString& result) const { - return result.append(_primaryID); -} - -UnicodeString& -LocaleKey::currentID(UnicodeString& result) const { - if (!_currentID.isBogus()) { - result.append(_currentID); - } - return result; -} - -UnicodeString& -LocaleKey::currentDescriptor(UnicodeString& result) const { - if (!_currentID.isBogus()) { - prefix(result).append(PREFIX_DELIMITER).append(_currentID); - } else { - result.setToBogus(); - } - return result; -} - -Locale& -LocaleKey::canonicalLocale(Locale& result) const { - return LocaleUtility::initLocaleFromName(_primaryID, result); -} - -Locale& -LocaleKey::currentLocale(Locale& result) const { - return LocaleUtility::initLocaleFromName(_currentID, result); -} - -UBool -LocaleKey::fallback() { - if (!_currentID.isBogus()) { - int x = _currentID.lastIndexOf(UNDERSCORE_CHAR); - if (x != -1) { - _currentID.remove(x); // truncate current or fallback, whichever we're pointing to - return TRUE; - } - - if (!_fallbackID.isBogus()) { - _currentID = _fallbackID; - _fallbackID.setToBogus(); - return TRUE; - } - - if (_currentID.length() > 0) { - _currentID.remove(0); // completely truncate - return TRUE; - } - - _currentID.setToBogus(); - } - - return FALSE; -} - -UBool -LocaleKey::isFallbackOf(const UnicodeString& id) const { - UnicodeString temp(id); - parseSuffix(temp); - return temp.indexOf(_primaryID) == 0 && - (temp.length() == _primaryID.length() || - temp.charAt(_primaryID.length()) == UNDERSCORE_CHAR); -} - -#ifdef SERVICE_DEBUG -UnicodeString& -LocaleKey::debug(UnicodeString& result) const -{ - ICUServiceKey::debug(result); - result.append((UnicodeString)" kind: "); - result.append(_kind); - result.append((UnicodeString)" primaryID: "); - result.append(_primaryID); - result.append((UnicodeString)" fallbackID: "); - result.append(_fallbackID); - result.append((UnicodeString)" currentID: "); - result.append(_currentID); - return result; -} - -UnicodeString& -LocaleKey::debugClass(UnicodeString& result) const -{ - return result.append((UnicodeString)"LocaleKey "); -} -#endif - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKey) - -U_NAMESPACE_END - -/* !UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servlkf.cpp b/deps/node/deps/icu-small/source/common/servlkf.cpp deleted file mode 100644 index 6e46bd20..00000000 --- a/deps/node/deps/icu-small/source/common/servlkf.cpp +++ /dev/null @@ -1,150 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2014, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE - -#include "unicode/resbund.h" -#include "uresimp.h" -#include "cmemory.h" -#include "servloc.h" -#include "ustrfmt.h" -#include "uhash.h" -#include "charstr.h" -#include "ucln_cmn.h" -#include "uassert.h" - -#define UNDERSCORE_CHAR ((UChar)0x005f) -#define AT_SIGN_CHAR ((UChar)64) -#define PERIOD_CHAR ((UChar)46) - - -U_NAMESPACE_BEGIN - -LocaleKeyFactory::LocaleKeyFactory(int32_t coverage) - : _name() - , _coverage(coverage) -{ -} - -LocaleKeyFactory::LocaleKeyFactory(int32_t coverage, const UnicodeString& name) - : _name(name) - , _coverage(coverage) -{ -} - -LocaleKeyFactory::~LocaleKeyFactory() { -} - -UObject* -LocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const { - if (handlesKey(key, status)) { - const LocaleKey& lkey = (const LocaleKey&)key; - int32_t kind = lkey.kind(); - Locale loc; - lkey.currentLocale(loc); - - return handleCreate(loc, kind, service, status); - } - return NULL; -} - -UBool -LocaleKeyFactory::handlesKey(const ICUServiceKey& key, UErrorCode& status) const { - const Hashtable* supported = getSupportedIDs(status); - if (supported) { - UnicodeString id; - key.currentID(id); - return supported->get(id) != NULL; - } - return FALSE; -} - -void -LocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const { - const Hashtable* supported = getSupportedIDs(status); - if (supported) { - UBool visible = (_coverage & 0x1) == 0; - const UHashElement* elem = NULL; - int32_t pos = UHASH_FIRST; - while ((elem = supported->nextElement(pos)) != NULL) { - const UnicodeString& id = *((const UnicodeString*)elem->key.pointer); - if (!visible) { - result.remove(id); - } else { - result.put(id, (void*)this, status); // this is dummy non-void marker used for set semantics - if (U_FAILURE(status)) { - break; - } - } - } - } -} - -UnicodeString& -LocaleKeyFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const { - if ((_coverage & 0x1) == 0) { - //UErrorCode status = U_ZERO_ERROR; - // assume if this is called on us, we support some fallback of this id - // if (isSupportedID(id, status)) { - Locale loc; - LocaleUtility::initLocaleFromName(id, loc); - return loc.getDisplayName(locale, result); - // } - } - result.setToBogus(); - return result; -} - -UObject* -LocaleKeyFactory::handleCreate(const Locale& /* loc */, - int32_t /* kind */, - const ICUService* /* service */, - UErrorCode& /* status */) const { - return NULL; -} - -//UBool -//LocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& status) const { -// const Hashtable* ids = getSupportedIDs(status); -// return ids && ids->get(id); -//} - -const Hashtable* -LocaleKeyFactory::getSupportedIDs(UErrorCode& /* status */) const { - return NULL; -} - -#ifdef SERVICE_DEBUG -UnicodeString& -LocaleKeyFactory::debug(UnicodeString& result) const -{ - debugClass(result); - result.append((UnicodeString)", name: "); - result.append(_name); - result.append((UnicodeString)", coverage: "); - result.append(_coverage); - return result; -} - -UnicodeString& -LocaleKeyFactory::debugClass(UnicodeString& result) const -{ - return result.append((UnicodeString)"LocaleKeyFactory"); -} -#endif - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKeyFactory) - -U_NAMESPACE_END - -/* !UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servloc.h b/deps/node/deps/icu-small/source/common/servloc.h deleted file mode 100644 index 50198942..00000000 --- a/deps/node/deps/icu-small/source/common/servloc.h +++ /dev/null @@ -1,550 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2011, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * - ******************************************************************************* - */ -#ifndef ICULSERV_H -#define ICULSERV_H - -#include "unicode/utypes.h" - -#if UCONFIG_NO_SERVICE - -U_NAMESPACE_BEGIN - -/* - * Allow the declaration of APIs with pointers to ICUService - * even when service is removed from the build. - */ -class ICULocaleService; - -U_NAMESPACE_END - -#else - -#include "unicode/unistr.h" -#include "unicode/locid.h" -#include "unicode/strenum.h" - -#include "hash.h" -#include "uvector.h" - -#include "serv.h" -#include "locutil.h" - -U_NAMESPACE_BEGIN - -class ICULocaleService; - -class LocaleKey; -class LocaleKeyFactory; -class SimpleLocaleKeyFactory; -class ServiceListener; - -/* - ****************************************************************** - */ - -/** - * A subclass of Key that implements a locale fallback mechanism. - * The first locale to search for is the locale provided by the - * client, and the fallback locale to search for is the current - * default locale. If a prefix is present, the currentDescriptor - * includes it before the locale proper, separated by "/". This - * is the default key instantiated by ICULocaleService.

- * - *

Canonicalization adjusts the locale string so that the - * section before the first understore is in lower case, and the rest - * is in upper case, with no trailing underscores.

- */ - -class U_COMMON_API LocaleKey : public ICUServiceKey { - private: - int32_t _kind; - UnicodeString _primaryID; - UnicodeString _fallbackID; - UnicodeString _currentID; - - public: - enum { - KIND_ANY = -1 - }; - - /** - * Create a LocaleKey with canonical primary and fallback IDs. - */ - static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID, - const UnicodeString* canonicalFallbackID, - UErrorCode& status); - - /** - * Create a LocaleKey with canonical primary and fallback IDs. - */ - static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID, - const UnicodeString* canonicalFallbackID, - int32_t kind, - UErrorCode& status); - - protected: - /** - * PrimaryID is the user's requested locale string, - * canonicalPrimaryID is this string in canonical form, - * fallbackID is the current default locale's string in - * canonical form. - */ - LocaleKey(const UnicodeString& primaryID, - const UnicodeString& canonicalPrimaryID, - const UnicodeString* canonicalFallbackID, - int32_t kind); - - public: - /** - * Append the prefix associated with the kind, or nothing if the kind is KIND_ANY. - */ - virtual UnicodeString& prefix(UnicodeString& result) const; - - /** - * Return the kind code associated with this key. - */ - virtual int32_t kind() const; - - /** - * Return the canonicalID. - */ - virtual UnicodeString& canonicalID(UnicodeString& result) const; - - /** - * Return the currentID. - */ - virtual UnicodeString& currentID(UnicodeString& result) const; - - /** - * Return the (canonical) current descriptor, or null if no current id. - */ - virtual UnicodeString& currentDescriptor(UnicodeString& result) const; - - /** - * Convenience method to return the locale corresponding to the (canonical) original ID. - */ - virtual Locale& canonicalLocale(Locale& result) const; - - /** - * Convenience method to return the locale corresponding to the (canonical) current ID. - */ - virtual Locale& currentLocale(Locale& result) const; - - /** - * If the key has a fallback, modify the key and return true, - * otherwise return false.

- * - *

First falls back through the primary ID, then through - * the fallbackID. The final fallback is the empty string, - * unless the primary id was the empty string, in which case - * there is no fallback. - */ - virtual UBool fallback(); - - /** - * Return true if a key created from id matches, or would eventually - * fallback to match, the canonical ID of this key. - */ - virtual UBool isFallbackOf(const UnicodeString& id) const; - - public: - /** - * UObject boilerplate. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - virtual UClassID getDynamicClassID() const; - - /** - * Destructor. - */ - virtual ~LocaleKey(); - -#ifdef SERVICE_DEBUG - public: - virtual UnicodeString& debug(UnicodeString& result) const; - virtual UnicodeString& debugClass(UnicodeString& result) const; -#endif - -}; - -/* - ****************************************************************** - */ - -/** - * A subclass of ICUServiceFactory that uses LocaleKeys, and is able to - * 'cover' more specific locales with more general locales that it - * supports. - * - *

Coverage may be either of the values VISIBLE or INVISIBLE. - * - *

'Visible' indicates that the specific locale(s) supported by - * the factory are registered in getSupportedIDs, 'Invisible' - * indicates that they are not. - * - *

Localization of visible ids is handled - * by the handling factory, regardless of kind. - */ -class U_COMMON_API LocaleKeyFactory : public ICUServiceFactory { -protected: - const UnicodeString _name; - const int32_t _coverage; - -public: - enum { - /** - * Coverage value indicating that the factory makes - * its locales visible, and does not cover more specific - * locales. - */ - VISIBLE = 0, - - /** - * Coverage value indicating that the factory does not make - * its locales visible, and does not cover more specific - * locales. - */ - INVISIBLE = 1 - }; - - /** - * Destructor. - */ - virtual ~LocaleKeyFactory(); - -protected: - /** - * Constructor used by subclasses. - */ - LocaleKeyFactory(int32_t coverage); - - /** - * Constructor used by subclasses. - */ - LocaleKeyFactory(int32_t coverage, const UnicodeString& name); - - /** - * Implement superclass abstract method. This checks the currentID of - * the key against the supported IDs, and passes the canonicalLocale and - * kind off to handleCreate (which subclasses must implement). - */ -public: - virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; - -protected: - virtual UBool handlesKey(const ICUServiceKey& key, UErrorCode& status) const; - -public: - /** - * Override of superclass method. This adjusts the result based - * on the coverage rule for this factory. - */ - virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const; - - /** - * Return a localized name for the locale represented by id. - */ - virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const; - -protected: - /** - * Utility method used by create(ICUServiceKey, ICUService). Subclasses can implement - * this instead of create. The default returns NULL. - */ - virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const; - - /** - * Return true if this id is one the factory supports (visible or - * otherwise). - */ - // virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const; - - /** - * Return the set of ids that this factory supports (visible or - * otherwise). This can be called often and might need to be - * cached if it is expensive to create. - */ - virtual const Hashtable* getSupportedIDs(UErrorCode& status) const; - -public: - /** - * UObject boilerplate. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - virtual UClassID getDynamicClassID() const; - -#ifdef SERVICE_DEBUG - public: - virtual UnicodeString& debug(UnicodeString& result) const; - virtual UnicodeString& debugClass(UnicodeString& result) const; -#endif - -}; - -/* - ****************************************************************** - */ - -/** - * A LocaleKeyFactory that just returns a single object for a kind/locale. - */ - -class U_COMMON_API SimpleLocaleKeyFactory : public LocaleKeyFactory { - private: - UObject* _obj; - UnicodeString _id; - const int32_t _kind; - - public: - SimpleLocaleKeyFactory(UObject* objToAdopt, - const UnicodeString& locale, - int32_t kind, - int32_t coverage); - - SimpleLocaleKeyFactory(UObject* objToAdopt, - const Locale& locale, - int32_t kind, - int32_t coverage); - - /** - * Destructor. - */ - virtual ~SimpleLocaleKeyFactory(); - - /** - * Override of superclass method. Returns the service object if kind/locale match. Service is not used. - */ - virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; - - /** - * Override of superclass method. This adjusts the result based - * on the coverage rule for this factory. - */ - virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const; - - protected: - /** - * Return true if this id is equal to the locale name. - */ - //virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const; - - -public: - /** - * UObject boilerplate. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - virtual UClassID getDynamicClassID() const; - -#ifdef SERVICE_DEBUG - public: - virtual UnicodeString& debug(UnicodeString& result) const; - virtual UnicodeString& debugClass(UnicodeString& result) const; -#endif - -}; - -/* - ****************************************************************** - */ - -/** - * A LocaleKeyFactory that creates a service based on the ICU locale data. - * This is a base class for most ICU factories. Subclasses instantiate it - * with a constructor that takes a bundle name, which determines the supported - * IDs. Subclasses then override handleCreate to create the actual service - * object. The default implementation returns a resource bundle. - */ -class U_COMMON_API ICUResourceBundleFactory : public LocaleKeyFactory -{ - protected: - UnicodeString _bundleName; - - public: - /** - * Convenience constructor that uses the main ICU bundle name. - */ - ICUResourceBundleFactory(); - - /** - * A service factory based on ICU resource data in resources with - * the given name. This should be a 'path' that can be passed to - * ures_openAvailableLocales, such as U_ICUDATA or U_ICUDATA_COLL. - * The empty string is equivalent to U_ICUDATA. - */ - ICUResourceBundleFactory(const UnicodeString& bundleName); - - /** - * Destructor - */ - virtual ~ICUResourceBundleFactory(); - -protected: - /** - * Return the supported IDs. This is the set of all locale names in ICULocaleData. - */ - virtual const Hashtable* getSupportedIDs(UErrorCode& status) const; - - /** - * Create the service. The default implementation returns the resource bundle - * for the locale, ignoring kind, and service. - */ - virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const; - -public: - /** - * UObject boilerplate. - */ - static UClassID U_EXPORT2 getStaticClassID(); - virtual UClassID getDynamicClassID() const; - - -#ifdef SERVICE_DEBUG - public: - virtual UnicodeString& debug(UnicodeString& result) const; - virtual UnicodeString& debugClass(UnicodeString& result) const; -#endif - -}; - -/* - ****************************************************************** - */ - -class U_COMMON_API ICULocaleService : public ICUService -{ - private: - Locale fallbackLocale; - UnicodeString fallbackLocaleName; - - public: - /** - * Construct an ICULocaleService. - */ - ICULocaleService(); - - /** - * Construct an ICULocaleService with a name (useful for debugging). - */ - ICULocaleService(const UnicodeString& name); - - /** - * Destructor. - */ - virtual ~ICULocaleService(); - -#if 0 - // redeclare because of overload resolution rules? - // no, causes ambiguities since both UnicodeString and Locale have constructors that take a const char* - // need some compiler flag to remove warnings - UObject* get(const UnicodeString& descriptor, UErrorCode& status) const { - return ICUService::get(descriptor, status); - } - - UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const { - return ICUService::get(descriptor, actualReturn, status); - } -#endif - - /** - * Convenience override for callers using locales. This calls - * get(Locale, int, Locale[]) with KIND_ANY for kind and null for - * actualReturn. - */ - UObject* get(const Locale& locale, UErrorCode& status) const; - - /** - * Convenience override for callers using locales. This calls - * get(Locale, int, Locale[]) with a null actualReturn. - */ - UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const; - - /** - * Convenience override for callers using locales. This calls - * get(Locale, String, Locale[]) with a null kind. - */ - UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const; - - /** - * Convenience override for callers using locales. This uses - * createKey(Locale.toString(), kind) to create a key, calls getKey, and then - * if actualReturn is not null, returns the actualResult from - * getKey (stripping any prefix) into a Locale. - */ - UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const; - - /** - * Convenience override for callers using locales. This calls - * registerObject(Object, Locale, int32_t kind, int coverage) - * passing KIND_ANY for the kind, and VISIBLE for the coverage. - */ - virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status); - - /** - * Convenience function for callers using locales. This calls - * registerObject(Object, Locale, int kind, int coverage) - * passing VISIBLE for the coverage. - */ - virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status); - - /** - * Convenience function for callers using locales. This instantiates - * a SimpleLocaleKeyFactory, and registers the factory. - */ - virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status); - - - /** - * (Stop compiler from complaining about hidden overrides.) - * Since both UnicodeString and Locale have constructors that take const char*, adding a public - * method that takes UnicodeString causes ambiguity at call sites that use const char*. - * We really need a flag that is understood by all compilers that will suppress the warning about - * hidden overrides. - */ - virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status); - - /** - * Convenience method for callers using locales. This returns the standard - * service ID enumeration. - */ - virtual StringEnumeration* getAvailableLocales(void) const; - - protected: - - /** - * Return the name of the current fallback locale. If it has changed since this was - * last accessed, the service cache is cleared. - */ - const UnicodeString& validateFallbackLocale() const; - - /** - * Override superclass createKey method. - */ - virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const; - - /** - * Additional createKey that takes a kind. - */ - virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const; - - friend class ServiceEnumeration; -}; - -U_NAMESPACE_END - - /* UCONFIG_NO_SERVICE */ -#endif - - /* ICULSERV_H */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servls.cpp b/deps/node/deps/icu-small/source/common/servls.cpp deleted file mode 100644 index 907fe7fe..00000000 --- a/deps/node/deps/icu-small/source/common/servls.cpp +++ /dev/null @@ -1,293 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2014, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE - -#include "unicode/resbund.h" -#include "uresimp.h" -#include "cmemory.h" -#include "servloc.h" -#include "ustrfmt.h" -#include "charstr.h" -#include "uassert.h" - -#define UNDERSCORE_CHAR ((UChar)0x005f) -#define AT_SIGN_CHAR ((UChar)64) -#define PERIOD_CHAR ((UChar)46) - -U_NAMESPACE_BEGIN - -static UMutex llock = U_MUTEX_INITIALIZER; -ICULocaleService::ICULocaleService() - : fallbackLocale(Locale::getDefault()) -{ -} - -ICULocaleService::ICULocaleService(const UnicodeString& dname) - : ICUService(dname) - , fallbackLocale(Locale::getDefault()) -{ -} - -ICULocaleService::~ICULocaleService() -{ -} - -UObject* -ICULocaleService::get(const Locale& locale, UErrorCode& status) const -{ - return get(locale, LocaleKey::KIND_ANY, NULL, status); -} - -UObject* -ICULocaleService::get(const Locale& locale, int32_t kind, UErrorCode& status) const -{ - return get(locale, kind, NULL, status); -} - -UObject* -ICULocaleService::get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const -{ - return get(locale, LocaleKey::KIND_ANY, actualReturn, status); -} - -UObject* -ICULocaleService::get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const -{ - UObject* result = NULL; - if (U_FAILURE(status)) { - return result; - } - - UnicodeString locName(locale.getName(), -1, US_INV); - if (locName.isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - ICUServiceKey* key = createKey(&locName, kind, status); - if (key) { - if (actualReturn == NULL) { - result = getKey(*key, status); - } else { - UnicodeString temp; - result = getKey(*key, &temp, status); - - if (result != NULL) { - key->parseSuffix(temp); - LocaleUtility::initLocaleFromName(temp, *actualReturn); - } - } - delete key; - } - } - return result; -} - - -URegistryKey -ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, - UBool visible, UErrorCode& status) -{ - Locale loc; - LocaleUtility::initLocaleFromName(locale, loc); - return registerInstance(objToAdopt, loc, LocaleKey::KIND_ANY, - visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, status); -} - -URegistryKey -ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status) -{ - return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status); -} - -URegistryKey -ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status) -{ - return registerInstance(objToAdopt, locale, kind, LocaleKeyFactory::VISIBLE, status); -} - -URegistryKey -ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status) -{ - ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage); - if (factory != NULL) { - return registerFactory(factory, status); - } - delete objToAdopt; - return NULL; -} - -#if 0 -URegistryKey -ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UErrorCode& status) -{ - return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status); -} - -URegistryKey -ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status) -{ - return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, - visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, - status); -} - -URegistryKey -ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, int32_t kind, int32_t coverage, UErrorCode& status) -{ - ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage); - if (factory != NULL) { - return registerFactory(factory, status); - } - delete objToAdopt; - return NULL; -} -#endif - -class ServiceEnumeration : public StringEnumeration { -private: - const ICULocaleService* _service; - int32_t _timestamp; - UVector _ids; - int32_t _pos; - -private: - ServiceEnumeration(const ICULocaleService* service, UErrorCode &status) - : _service(service) - , _timestamp(service->getTimestamp()) - , _ids(uprv_deleteUObject, NULL, status) - , _pos(0) - { - _service->getVisibleIDs(_ids, status); - } - - ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status) - : _service(other._service) - , _timestamp(other._timestamp) - , _ids(uprv_deleteUObject, NULL, status) - , _pos(0) - { - if(U_SUCCESS(status)) { - int32_t i, length; - - length = other._ids.size(); - for(i = 0; i < length; ++i) { - _ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status); - } - - if(U_SUCCESS(status)) { - _pos = other._pos; - } - } - } - -public: - static ServiceEnumeration* create(const ICULocaleService* service) { - UErrorCode status = U_ZERO_ERROR; - ServiceEnumeration* result = new ServiceEnumeration(service, status); - if (U_SUCCESS(status)) { - return result; - } - delete result; - return NULL; - } - - virtual ~ServiceEnumeration(); - - virtual StringEnumeration *clone() const { - UErrorCode status = U_ZERO_ERROR; - ServiceEnumeration *cl = new ServiceEnumeration(*this, status); - if(U_FAILURE(status)) { - delete cl; - cl = NULL; - } - return cl; - } - - UBool upToDate(UErrorCode& status) const { - if (U_SUCCESS(status)) { - if (_timestamp == _service->getTimestamp()) { - return TRUE; - } - status = U_ENUM_OUT_OF_SYNC_ERROR; - } - return FALSE; - } - - virtual int32_t count(UErrorCode& status) const { - return upToDate(status) ? _ids.size() : 0; - } - - virtual const UnicodeString* snext(UErrorCode& status) { - if (upToDate(status) && (_pos < _ids.size())) { - return (const UnicodeString*)_ids[_pos++]; - } - return NULL; - } - - virtual void reset(UErrorCode& status) { - if (status == U_ENUM_OUT_OF_SYNC_ERROR) { - status = U_ZERO_ERROR; - } - if (U_SUCCESS(status)) { - _timestamp = _service->getTimestamp(); - _pos = 0; - _service->getVisibleIDs(_ids, status); - } - } - -public: - static UClassID U_EXPORT2 getStaticClassID(void); - virtual UClassID getDynamicClassID(void) const; -}; - -ServiceEnumeration::~ServiceEnumeration() {} - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceEnumeration) - -StringEnumeration* -ICULocaleService::getAvailableLocales(void) const -{ - return ServiceEnumeration::create(this); -} - -const UnicodeString& -ICULocaleService::validateFallbackLocale() const -{ - const Locale& loc = Locale::getDefault(); - ICULocaleService* ncThis = (ICULocaleService*)this; - { - Mutex mutex(&llock); - if (loc != fallbackLocale) { - ncThis->fallbackLocale = loc; - LocaleUtility::initNameFromLocale(loc, ncThis->fallbackLocaleName); - ncThis->clearServiceCache(); - } - } - return fallbackLocaleName; -} - -ICUServiceKey* -ICULocaleService::createKey(const UnicodeString* id, UErrorCode& status) const -{ - return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), status); -} - -ICUServiceKey* -ICULocaleService::createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const -{ - return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), kind, status); -} - -U_NAMESPACE_END - -/* !UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servnotf.cpp b/deps/node/deps/icu-small/source/common/servnotf.cpp deleted file mode 100644 index 5159452f..00000000 --- a/deps/node/deps/icu-small/source/common/servnotf.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2012, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE - -#include "servnotf.h" -#ifdef NOTIFIER_DEBUG -#include -#endif - -U_NAMESPACE_BEGIN - -EventListener::~EventListener() {} -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EventListener) - -static UMutex notifyLock = U_MUTEX_INITIALIZER; - -ICUNotifier::ICUNotifier(void) -: listeners(NULL) -{ -} - -ICUNotifier::~ICUNotifier(void) { - { - Mutex lmx(¬ifyLock); - delete listeners; - listeners = NULL; - } -} - - -void -ICUNotifier::addListener(const EventListener* l, UErrorCode& status) -{ - if (U_SUCCESS(status)) { - if (l == NULL) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if (acceptsListener(*l)) { - Mutex lmx(¬ifyLock); - if (listeners == NULL) { - listeners = new UVector(5, status); - } else { - for (int i = 0, e = listeners->size(); i < e; ++i) { - const EventListener* el = (const EventListener*)(listeners->elementAt(i)); - if (l == el) { - return; - } - } - } - - listeners->addElement((void*)l, status); // cast away const - } -#ifdef NOTIFIER_DEBUG - else { - fprintf(stderr, "Listener invalid for this notifier."); - exit(1); - } -#endif - } -} - -void -ICUNotifier::removeListener(const EventListener *l, UErrorCode& status) -{ - if (U_SUCCESS(status)) { - if (l == NULL) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - { - Mutex lmx(¬ifyLock); - if (listeners != NULL) { - // identity equality check - for (int i = 0, e = listeners->size(); i < e; ++i) { - const EventListener* el = (const EventListener*)listeners->elementAt(i); - if (l == el) { - listeners->removeElementAt(i); - if (listeners->size() == 0) { - delete listeners; - listeners = NULL; - } - return; - } - } - } - } - } -} - -void -ICUNotifier::notifyChanged(void) -{ - if (listeners != NULL) { - Mutex lmx(¬ifyLock); - if (listeners != NULL) { - for (int i = 0, e = listeners->size(); i < e; ++i) { - EventListener* el = (EventListener*)listeners->elementAt(i); - notifyListener(*el); - } - } - } -} - -U_NAMESPACE_END - -/* UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servnotf.h b/deps/node/deps/icu-small/source/common/servnotf.h deleted file mode 100644 index cf92fc16..00000000 --- a/deps/node/deps/icu-small/source/common/servnotf.h +++ /dev/null @@ -1,125 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2014, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -#ifndef ICUNOTIF_H -#define ICUNOTIF_H - -#include "unicode/utypes.h" - -#if UCONFIG_NO_SERVICE - -U_NAMESPACE_BEGIN - -/* - * Allow the declaration of APIs with pointers to BreakIterator - * even when break iteration is removed from the build. - */ -class ICUNotifier; - -U_NAMESPACE_END - -#else - -#include "unicode/uobject.h" -#include "unicode/unistr.h" - -#include "mutex.h" -#include "uvector.h" - -U_NAMESPACE_BEGIN - -class U_COMMON_API EventListener : public UObject { -public: - virtual ~EventListener(); - -public: - static UClassID U_EXPORT2 getStaticClassID(); - - virtual UClassID getDynamicClassID() const; - -public: -#ifdef SERVICE_DEBUG - virtual UnicodeString& debug(UnicodeString& result) const { - return debugClass(result); - } - - virtual UnicodeString& debugClass(UnicodeString& result) const { - return result.append((UnicodeString)"Key"); - } -#endif -}; - -/** - *

Abstract implementation of a notification facility. Clients add - * EventListeners with addListener and remove them with removeListener. - * Notifiers call notifyChanged when they wish to notify listeners. - * This queues the listener list on the notification thread, which - * eventually dequeues the list and calls notifyListener on each - * listener in the list.

- * - *

Subclasses override acceptsListener and notifyListener - * to add type-safe notification. AcceptsListener should return - * true if the listener is of the appropriate type; ICUNotifier - * itself will ensure the listener is non-null and that the - * identical listener is not already registered with the Notifier. - * NotifyListener should cast the listener to the appropriate - * type and call the appropriate method on the listener. - */ - -class U_COMMON_API ICUNotifier : public UMemory { -private: UVector* listeners; - -public: - ICUNotifier(void); - - virtual ~ICUNotifier(void); - - /** - * Add a listener to be notified when notifyChanged is called. - * The listener must not be null. AcceptsListener must return - * true for the listener. Attempts to concurrently - * register the identical listener more than once will be - * silently ignored. - */ - virtual void addListener(const EventListener* l, UErrorCode& status); - - /** - * Stop notifying this listener. The listener must - * not be null. Attemps to remove a listener that is - * not registered will be silently ignored. - */ - virtual void removeListener(const EventListener* l, UErrorCode& status); - - /** - * ICU doesn't spawn its own threads. All listeners are notified in - * the thread of the caller. Misbehaved listeners can therefore - * indefinitely block the calling thread. Callers should beware of - * deadlock situations. - */ - virtual void notifyChanged(void); - -protected: - /** - * Subclasses implement this to return TRUE if the listener is - * of the appropriate type. - */ - virtual UBool acceptsListener(const EventListener& l) const = 0; - - /** - * Subclasses implement this to notify the listener. - */ - virtual void notifyListener(EventListener& l) const = 0; -}; - -U_NAMESPACE_END - -/* UCONFIG_NO_SERVICE */ -#endif - -/* ICUNOTIF_H */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servrbf.cpp b/deps/node/deps/icu-small/source/common/servrbf.cpp deleted file mode 100644 index 3f143afa..00000000 --- a/deps/node/deps/icu-small/source/common/servrbf.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2014, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE - -#include "unicode/resbund.h" -#include "uresimp.h" -#include "cmemory.h" -#include "servloc.h" -#include "ustrfmt.h" -#include "uhash.h" -#include "charstr.h" -#include "ucln_cmn.h" -#include "uassert.h" - -#define UNDERSCORE_CHAR ((UChar)0x005f) -#define AT_SIGN_CHAR ((UChar)64) -#define PERIOD_CHAR ((UChar)46) - -U_NAMESPACE_BEGIN - -ICUResourceBundleFactory::ICUResourceBundleFactory() - : LocaleKeyFactory(VISIBLE) - , _bundleName() -{ -} - -ICUResourceBundleFactory::ICUResourceBundleFactory(const UnicodeString& bundleName) - : LocaleKeyFactory(VISIBLE) - , _bundleName(bundleName) -{ -} - -ICUResourceBundleFactory::~ICUResourceBundleFactory() {} - -const Hashtable* -ICUResourceBundleFactory::getSupportedIDs(UErrorCode& status) const -{ - if (U_SUCCESS(status)) { - return LocaleUtility::getAvailableLocaleNames(_bundleName); - } - return NULL; -} - -UObject* -ICUResourceBundleFactory::handleCreate(const Locale& loc, int32_t /* kind */, const ICUService* /* service */, UErrorCode& status) const -{ - if (U_SUCCESS(status)) { - // _bundleName is a package name - // and should only contain invariant characters - // ??? is it always true that the max length of the bundle name is 19? - // who made this change? -- dlf - char pkg[20]; - int32_t length; - length=_bundleName.extract(0, INT32_MAX, pkg, (int32_t)sizeof(pkg), US_INV); - if(length>=(int32_t)sizeof(pkg)) { - return NULL; - } - return new ResourceBundle(pkg, loc, status); - } - return NULL; -} - -#ifdef SERVICE_DEBUG -UnicodeString& -ICUResourceBundleFactory::debug(UnicodeString& result) const -{ - LocaleKeyFactory::debug(result); - result.append((UnicodeString)", bundle: "); - return result.append(_bundleName); -} - -UnicodeString& -ICUResourceBundleFactory::debugClass(UnicodeString& result) const -{ - return result.append((UnicodeString)"ICUResourceBundleFactory"); -} -#endif - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUResourceBundleFactory) - -U_NAMESPACE_END - -/* !UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/servslkf.cpp b/deps/node/deps/icu-small/source/common/servslkf.cpp deleted file mode 100644 index 4aa10414..00000000 --- a/deps/node/deps/icu-small/source/common/servslkf.cpp +++ /dev/null @@ -1,121 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/** - ******************************************************************************* - * Copyright (C) 2001-2014, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - * - ******************************************************************************* - */ -#include "unicode/utypes.h" - -#if !UCONFIG_NO_SERVICE - -#include "unicode/resbund.h" -#include "uresimp.h" -#include "cmemory.h" -#include "servloc.h" -#include "ustrfmt.h" -#include "uhash.h" -#include "charstr.h" -#include "uassert.h" - -#define UNDERSCORE_CHAR ((UChar)0x005f) -#define AT_SIGN_CHAR ((UChar)64) -#define PERIOD_CHAR ((UChar)46) - -U_NAMESPACE_BEGIN - -/* - ****************************************************************** - */ - -SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt, - const UnicodeString& locale, - int32_t kind, - int32_t coverage) - : LocaleKeyFactory(coverage) - , _obj(objToAdopt) - , _id(locale) - , _kind(kind) -{ -} - -SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt, - const Locale& locale, - int32_t kind, - int32_t coverage) - : LocaleKeyFactory(coverage) - , _obj(objToAdopt) - , _id() - , _kind(kind) -{ - LocaleUtility::initNameFromLocale(locale, _id); -} - -SimpleLocaleKeyFactory::~SimpleLocaleKeyFactory() -{ - delete _obj; - _obj = NULL; -} - -UObject* -SimpleLocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const -{ - if (U_SUCCESS(status)) { - const LocaleKey& lkey = (const LocaleKey&)key; - if (_kind == LocaleKey::KIND_ANY || _kind == lkey.kind()) { - UnicodeString keyID; - lkey.currentID(keyID); - if (_id == keyID) { - return service->cloneInstance(_obj); - } - } - } - return NULL; -} - -//UBool -//SimpleLocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& /* status */) const -//{ -// return id == _id; -//} - -void -SimpleLocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const -{ - if (U_SUCCESS(status)) { - if (_coverage & 0x1) { - result.remove(_id); - } else { - result.put(_id, (void*)this, status); - } - } -} - -#ifdef SERVICE_DEBUG -UnicodeString& -SimpleLocaleKeyFactory::debug(UnicodeString& result) const -{ - LocaleKeyFactory::debug(result); - result.append((UnicodeString)", id: "); - result.append(_id); - result.append((UnicodeString)", kind: "); - result.append(_kind); - return result; -} - -UnicodeString& -SimpleLocaleKeyFactory::debugClass(UnicodeString& result) const -{ - return result.append((UnicodeString)"SimpleLocaleKeyFactory"); -} -#endif - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleLocaleKeyFactory) - -U_NAMESPACE_END - -/* !UCONFIG_NO_SERVICE */ -#endif diff --git a/deps/node/deps/icu-small/source/common/sharedobject.cpp b/deps/node/deps/icu-small/source/common/sharedobject.cpp deleted file mode 100644 index 6eeca860..00000000 --- a/deps/node/deps/icu-small/source/common/sharedobject.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2015, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* sharedobject.cpp -*/ -#include "sharedobject.h" -#include "mutex.h" -#include "uassert.h" -#include "umutex.h" -#include "unifiedcache.h" - -U_NAMESPACE_BEGIN - -SharedObject::~SharedObject() {} - -UnifiedCacheBase::~UnifiedCacheBase() {} - -void -SharedObject::addRef() const { - umtx_atomic_inc(&hardRefCount); -} - -// removeRef Decrement the reference count and delete if it is zero. -// Note that SharedObjects with a non-null cachePtr are owned by the -// unified cache, and the cache will be responsible for the actual deletion. -// The deletion could be as soon as immediately following the -// update to the reference count, if another thread is running -// a cache eviction cycle concurrently. -// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects. -// THE OBJECT MAY ALREADY BE GONE. -void -SharedObject::removeRef() const { - const UnifiedCacheBase *cache = this->cachePtr; - int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount); - U_ASSERT(updatedRefCount >= 0); - if (updatedRefCount == 0) { - if (cache) { - cache->handleUnreferencedObject(); - } else { - delete this; - } - } -} - - -int32_t -SharedObject::getRefCount() const { - return umtx_loadAcquire(hardRefCount); -} - -void -SharedObject::deleteIfZeroRefCount() const { - if (this->cachePtr == nullptr && getRefCount() == 0) { - delete this; - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/sharedobject.h b/deps/node/deps/icu-small/source/common/sharedobject.h deleted file mode 100644 index 878594c7..00000000 --- a/deps/node/deps/icu-small/source/common/sharedobject.h +++ /dev/null @@ -1,184 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2015-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* sharedobject.h -*/ - -#ifndef __SHAREDOBJECT_H__ -#define __SHAREDOBJECT_H__ - - -#include "unicode/uobject.h" -#include "umutex.h" - -U_NAMESPACE_BEGIN - -class SharedObject; - -/** - * Base class for unified cache exposing enough methods to SharedObject - * instances to allow their addRef() and removeRef() methods to - * update cache metrics. No other part of ICU, except for SharedObject, - * should directly call the methods of this base class. - */ -class U_COMMON_API UnifiedCacheBase : public UObject { -public: - UnifiedCacheBase() { } - - /** - * Notify the cache implementation that an object was seen transitioning to - * zero hard references. The cache may use this to keep track the number of - * unreferenced SharedObjects, and to trigger evictions. - */ - virtual void handleUnreferencedObject() const = 0; - - virtual ~UnifiedCacheBase(); -private: - UnifiedCacheBase(const UnifiedCacheBase &); - UnifiedCacheBase &operator=(const UnifiedCacheBase &); -}; - -/** - * Base class for shared, reference-counted, auto-deleted objects. - * Subclasses can be immutable. - * If they are mutable, then they must implement their copy constructor - * so that copyOnWrite() works. - * - * Either stack-allocate, use LocalPointer, or use addRef()/removeRef(). - * Sharing requires reference-counting. - */ -class U_COMMON_API SharedObject : public UObject { -public: - /** Initializes totalRefCount, softRefCount to 0. */ - SharedObject() : - softRefCount(0), - hardRefCount(0), - cachePtr(NULL) {} - - /** Initializes totalRefCount, softRefCount to 0. */ - SharedObject(const SharedObject &other) : - UObject(other), - softRefCount(0), - hardRefCount(0), - cachePtr(NULL) {} - - virtual ~SharedObject(); - - /** - * Increments the number of hard references to this object. Thread-safe. - * Not for use from within the Unified Cache implementation. - */ - void addRef() const; - - /** - * Decrements the number of hard references to this object, and - * arrange for possible cache-eviction and/or deletion if ref - * count goes to zero. Thread-safe. - * - * Not for use from within the UnifiedCache implementation. - */ - void removeRef() const; - - /** - * Returns the number of hard references for this object. - * Uses a memory barrier. - */ - int32_t getRefCount() const; - - /** - * If noHardReferences() == TRUE then this object has no hard references. - * Must be called only from within the internals of UnifiedCache. - */ - inline UBool noHardReferences() const { return getRefCount() == 0; } - - /** - * If hasHardReferences() == TRUE then this object has hard references. - * Must be called only from within the internals of UnifiedCache. - */ - inline UBool hasHardReferences() const { return getRefCount() != 0; } - - /** - * Deletes this object if it has no references. - * Available for non-cached SharedObjects only. Ownership of cached objects - * is with the UnifiedCache, which is solely responsible for eviction and deletion. - */ - void deleteIfZeroRefCount() const; - - - /** - * Returns a writable version of ptr. - * If there is exactly one owner, then ptr itself is returned as a - * non-const pointer. - * If there are multiple owners, then ptr is replaced with a - * copy-constructed clone, - * and that is returned. - * Returns NULL if cloning failed. - * - * T must be a subclass of SharedObject. - */ - template - static T *copyOnWrite(const T *&ptr) { - const T *p = ptr; - if(p->getRefCount() <= 1) { return const_cast(p); } - T *p2 = new T(*p); - if(p2 == NULL) { return NULL; } - p->removeRef(); - ptr = p2; - p2->addRef(); - return p2; - } - - /** - * Makes dest an owner of the object pointed to by src while adjusting - * reference counts and deleting the previous object dest pointed to - * if necessary. Before this call is made, dest must either be NULL or - * be included in the reference count of the object it points to. - * - * T must be a subclass of SharedObject. - */ - template - static void copyPtr(const T *src, const T *&dest) { - if(src != dest) { - if(dest != NULL) { dest->removeRef(); } - dest = src; - if(src != NULL) { src->addRef(); } - } - } - - /** - * Equivalent to copyPtr(NULL, dest). - */ - template - static void clearPtr(const T *&ptr) { - if (ptr != NULL) { - ptr->removeRef(); - ptr = NULL; - } - } - -private: - /** - * The number of references from the UnifiedCache, which is - * the number of times that the sharedObject is stored as a hash table value. - * For use by UnifiedCache implementation code only. - * All access is synchronized by UnifiedCache's gCacheMutex - */ - mutable int32_t softRefCount; - friend class UnifiedCache; - - /** - * Reference count, excluding references from within the UnifiedCache implementation. - */ - mutable u_atomic_int32_t hardRefCount; - - mutable const UnifiedCacheBase *cachePtr; - -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/simpleformatter.cpp b/deps/node/deps/icu-small/source/common/simpleformatter.cpp deleted file mode 100644 index f866e0a1..00000000 --- a/deps/node/deps/icu-small/source/common/simpleformatter.cpp +++ /dev/null @@ -1,314 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2014-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* simpleformatter.cpp -*/ - -#include "unicode/utypes.h" -#include "unicode/simpleformatter.h" -#include "unicode/unistr.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -namespace { - -/** - * Argument numbers must be smaller than this limit. - * Text segment lengths are offset by this much. - * This is currently the only unused char value in compiled patterns, - * except it is the maximum value of the first unit (max arg +1). - */ -const int32_t ARG_NUM_LIMIT = 0x100; -/** - * Initial and maximum char/UChar value set for a text segment. - * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. - * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. - */ -const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff; -/** - * Maximum length of a text segment. Longer segments are split into shorter ones. - */ -const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT; - -enum { - APOS = 0x27, - DIGIT_ZERO = 0x30, - DIGIT_ONE = 0x31, - DIGIT_NINE = 0x39, - OPEN_BRACE = 0x7b, - CLOSE_BRACE = 0x7d -}; - -inline UBool isInvalidArray(const void *array, int32_t length) { - return (length < 0 || (array == NULL && length != 0)); -} - -} // namespace - -SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) { - if (this == &other) { - return *this; - } - compiledPattern = other.compiledPattern; - return *this; -} - -SimpleFormatter::~SimpleFormatter() {} - -UBool SimpleFormatter::applyPatternMinMaxArguments( - const UnicodeString &pattern, - int32_t min, int32_t max, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return FALSE; - } - // Parse consistent with MessagePattern, but - // - support only simple numbered arguments - // - build a simple binary structure into the result string - const UChar *patternBuffer = pattern.getBuffer(); - int32_t patternLength = pattern.length(); - // Reserve the first char for the number of arguments. - compiledPattern.setTo((UChar)0); - int32_t textLength = 0; - int32_t maxArg = -1; - UBool inQuote = FALSE; - for (int32_t i = 0; i < patternLength;) { - UChar c = patternBuffer[i++]; - if (c == APOS) { - if (i < patternLength && (c = patternBuffer[i]) == APOS) { - // double apostrophe, skip the second one - ++i; - } else if (inQuote) { - // skip the quote-ending apostrophe - inQuote = FALSE; - continue; - } else if (c == OPEN_BRACE || c == CLOSE_BRACE) { - // Skip the quote-starting apostrophe, find the end of the quoted literal text. - ++i; - inQuote = TRUE; - } else { - // The apostrophe is part of literal text. - c = APOS; - } - } else if (!inQuote && c == OPEN_BRACE) { - if (textLength > 0) { - compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, - (UChar)(ARG_NUM_LIMIT + textLength)); - textLength = 0; - } - int32_t argNumber; - if ((i + 1) < patternLength && - 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 && - patternBuffer[i + 1] == CLOSE_BRACE) { - i += 2; - } else { - // Multi-digit argument number (no leading zero) or syntax error. - // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) - // around the number, but this class does not. - argNumber = -1; - if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { - argNumber = c - DIGIT_ZERO; - while (i < patternLength && - DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { - argNumber = argNumber * 10 + (c - DIGIT_ZERO); - if (argNumber >= ARG_NUM_LIMIT) { - break; - } - } - } - if (argNumber < 0 || c != CLOSE_BRACE) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - } - if (argNumber > maxArg) { - maxArg = argNumber; - } - compiledPattern.append((UChar)argNumber); - continue; - } // else: c is part of literal text - // Append c and track the literal-text segment length. - if (textLength == 0) { - // Reserve a char for the length of a new text segment, preset the maximum length. - compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR); - } - compiledPattern.append(c); - if (++textLength == MAX_SEGMENT_LENGTH) { - textLength = 0; - } - } - if (textLength > 0) { - compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, - (UChar)(ARG_NUM_LIMIT + textLength)); - } - int32_t argCount = maxArg + 1; - if (argCount < min || max < argCount) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - compiledPattern.setCharAt(0, (UChar)argCount); - return TRUE; -} - -UnicodeString& SimpleFormatter::format( - const UnicodeString &value0, - UnicodeString &appendTo, UErrorCode &errorCode) const { - const UnicodeString *values[] = { &value0 }; - return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode); -} - -UnicodeString& SimpleFormatter::format( - const UnicodeString &value0, - const UnicodeString &value1, - UnicodeString &appendTo, UErrorCode &errorCode) const { - const UnicodeString *values[] = { &value0, &value1 }; - return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode); -} - -UnicodeString& SimpleFormatter::format( - const UnicodeString &value0, - const UnicodeString &value1, - const UnicodeString &value2, - UnicodeString &appendTo, UErrorCode &errorCode) const { - const UnicodeString *values[] = { &value0, &value1, &value2 }; - return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode); -} - -UnicodeString& SimpleFormatter::formatAndAppend( - const UnicodeString *const *values, int32_t valuesLength, - UnicodeString &appendTo, - int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { - if (U_FAILURE(errorCode)) { - return appendTo; - } - if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) || - valuesLength < getArgumentLimit()) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return appendTo; - } - return format(compiledPattern.getBuffer(), compiledPattern.length(), values, - appendTo, NULL, TRUE, - offsets, offsetsLength, errorCode); -} - -UnicodeString &SimpleFormatter::formatAndReplace( - const UnicodeString *const *values, int32_t valuesLength, - UnicodeString &result, - int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { - if (U_FAILURE(errorCode)) { - return result; - } - if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return result; - } - const UChar *cp = compiledPattern.getBuffer(); - int32_t cpLength = compiledPattern.length(); - if (valuesLength < getArgumentLimit(cp, cpLength)) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return result; - } - - // If the pattern starts with an argument whose value is the same object - // as the result, then we keep the result contents and append to it. - // Otherwise we replace its contents. - int32_t firstArg = -1; - // If any non-initial argument value is the same object as the result, - // then we first copy its contents and use that instead while formatting. - UnicodeString resultCopy; - if (getArgumentLimit(cp, cpLength) > 0) { - for (int32_t i = 1; i < cpLength;) { - int32_t n = cp[i++]; - if (n < ARG_NUM_LIMIT) { - if (values[n] == &result) { - if (i == 2) { - firstArg = n; - } else if (resultCopy.isEmpty() && !result.isEmpty()) { - resultCopy = result; - } - } - } else { - i += n - ARG_NUM_LIMIT; - } - } - } - if (firstArg < 0) { - result.remove(); - } - return format(cp, cpLength, values, - result, &resultCopy, FALSE, - offsets, offsetsLength, errorCode); -} - -UnicodeString SimpleFormatter::getTextWithNoArguments( - const UChar *compiledPattern, int32_t compiledPatternLength) { - int32_t capacity = compiledPatternLength - 1 - - getArgumentLimit(compiledPattern, compiledPatternLength); - UnicodeString sb(capacity, 0, 0); // Java: StringBuilder - for (int32_t i = 1; i < compiledPatternLength;) { - int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT; - if (segmentLength > 0) { - sb.append(compiledPattern + i, segmentLength); - i += segmentLength; - } - } - return sb; -} - -UnicodeString &SimpleFormatter::format( - const UChar *compiledPattern, int32_t compiledPatternLength, - const UnicodeString *const *values, - UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, - int32_t *offsets, int32_t offsetsLength, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return result; - } - for (int32_t i = 0; i < offsetsLength; i++) { - offsets[i] = -1; - } - for (int32_t i = 1; i < compiledPatternLength;) { - int32_t n = compiledPattern[i++]; - if (n < ARG_NUM_LIMIT) { - const UnicodeString *value = values[n]; - if (value == NULL) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return result; - } - if (value == &result) { - if (forbidResultAsValue) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return result; - } - if (i == 2) { - // We are appending to result which is also the first value object. - if (n < offsetsLength) { - offsets[n] = 0; - } - } else { - if (n < offsetsLength) { - offsets[n] = result.length(); - } - result.append(*resultCopy); - } - } else { - if (n < offsetsLength) { - offsets[n] = result.length(); - } - result.append(*value); - } - } else { - int32_t length = n - ARG_NUM_LIMIT; - result.append(compiledPattern + i, length); - i += length; - } - } - return result; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/sprpimpl.h b/deps/node/deps/icu-small/source/common/sprpimpl.h deleted file mode 100644 index 26de904b..00000000 --- a/deps/node/deps/icu-small/source/common/sprpimpl.h +++ /dev/null @@ -1,130 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * - * Copyright (C) 2003-2006, International Business Machines - * Corporation and others. All Rights Reserved. - * - ******************************************************************************* - * file name: sprpimpl.h - * encoding: UTF-8 - * tab size: 8 (not used) - * indentation:4 - * - * created on: 2003feb1 - * created by: Ram Viswanadha - */ - -#ifndef SPRPIMPL_H -#define SPRPIMPL_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -#include "unicode/ustring.h" -#include "unicode/parseerr.h" -#include "unicode/usprep.h" -#include "unicode/udata.h" -#include "utrie.h" -#include "udataswp.h" -#include "ubidi_props.h" - -#define _SPREP_DATA_TYPE "spp" - -enum UStringPrepType{ - USPREP_UNASSIGNED = 0x0000 , - USPREP_MAP = 0x0001 , - USPREP_PROHIBITED = 0x0002 , - USPREP_DELETE = 0x0003 , - USPREP_TYPE_LIMIT = 0x0004 -}; - -typedef enum UStringPrepType UStringPrepType; - -#ifdef USPREP_TYPE_NAMES_ARRAY -static const char* usprepTypeNames[] ={ - "UNASSIGNED" , - "MAP" , - "PROHIBITED" , - "DELETE", - "TYPE_LIMIT" -}; -#endif - -enum{ - _SPREP_NORMALIZATION_ON = 0x0001, - _SPREP_CHECK_BIDI_ON = 0x0002 -}; - -enum{ - _SPREP_TYPE_THRESHOLD = 0xFFF0, - _SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/ - _SPREP_MAX_INDEX_TOP_LENGTH = 0x0003 -}; - -/* indexes[] value names */ -enum { - _SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in StringPrep trie */ - _SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */ - _SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */ - _SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */ - _SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */ - _SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5, /* The starting index of 3 UChars mapping index in the mapping data array */ - _SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6, /* The starting index of 4 UChars mapping index in the mapping data array */ - _SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */ - _SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */ -}; - -typedef struct UStringPrepKey UStringPrepKey; - - -struct UStringPrepKey{ - char* name; - char* path; -}; - -struct UStringPrepProfile{ - int32_t indexes[_SPREP_INDEX_TOP]; - UTrie sprepTrie; - const uint16_t* mappingData; - UDataMemory* sprepData; - int32_t refCount; - UBool isDataLoaded; - UBool doNFKC; - UBool checkBiDi; -}; - -/** - * Helper function for populating the UParseError struct - * @internal - */ -U_CAPI void U_EXPORT2 -uprv_syntaxError(const UChar* rules, - int32_t pos, - int32_t rulesLen, - UParseError* parseError); - - -/** - * Swap StringPrep .spp profile data. See udataswp.h. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -usprep_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -#endif /* #if !UCONFIG_NO_IDNA */ - -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/static_unicode_sets.cpp b/deps/node/deps/icu-small/source/common/static_unicode_sets.cpp deleted file mode 100644 index 5d598a0e..00000000 --- a/deps/node/deps/icu-small/source/common/static_unicode_sets.cpp +++ /dev/null @@ -1,223 +0,0 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -// Allow implicit conversion from char16_t* to UnicodeString for this file: -// Helpful in toString methods and elsewhere. -#define UNISTR_FROM_STRING_EXPLICIT - -#include "static_unicode_sets.h" -#include "umutex.h" -#include "ucln_cmn.h" -#include "unicode/uniset.h" -#include "uresimp.h" -#include "cstring.h" -#include "uassert.h" - -using namespace icu; -using namespace icu::unisets; - - -namespace { - -UnicodeSet* gUnicodeSets[COUNT] = {}; - -// Save the empty instance in static memory to have well-defined behavior if a -// regular UnicodeSet cannot be allocated. -alignas(UnicodeSet) -char gEmptyUnicodeSet[sizeof(UnicodeSet)]; - -// Whether the gEmptyUnicodeSet is initialized and ready to use. -UBool gEmptyUnicodeSetInitialized = FALSE; - -inline UnicodeSet* getImpl(Key key) { - UnicodeSet* candidate = gUnicodeSets[key]; - if (candidate == nullptr) { - return reinterpret_cast(gEmptyUnicodeSet); - } - return candidate; -} - -UnicodeSet* computeUnion(Key k1, Key k2) { - UnicodeSet* result = new UnicodeSet(); - if (result == nullptr) { - return nullptr; - } - result->addAll(*getImpl(k1)); - result->addAll(*getImpl(k2)); - result->freeze(); - return result; -} - -UnicodeSet* computeUnion(Key k1, Key k2, Key k3) { - UnicodeSet* result = new UnicodeSet(); - if (result == nullptr) { - return nullptr; - } - result->addAll(*getImpl(k1)); - result->addAll(*getImpl(k2)); - result->addAll(*getImpl(k3)); - result->freeze(); - return result; -} - - -void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) { - // assert unicodeSets.get(key) == null; - gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status); -} - -class ParseDataSink : public ResourceSink { - public: - void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { - ResourceTable contextsTable = value.getTable(status); - if (U_FAILURE(status)) { return; } - for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) { - if (uprv_strcmp(key, "date") == 0) { - // ignore - } else { - ResourceTable strictnessTable = value.getTable(status); - if (U_FAILURE(status)) { return; } - for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) { - bool isLenient = (uprv_strcmp(key, "lenient") == 0); - ResourceArray array = value.getArray(status); - if (U_FAILURE(status)) { return; } - for (int k = 0; k < array.getSize(); k++) { - array.getValue(k, value); - UnicodeString str = value.getUnicodeString(status); - if (U_FAILURE(status)) { return; } - // There is both lenient and strict data for comma/period, - // but not for any of the other symbols. - if (str.indexOf(u'.') != -1) { - saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status); - } else if (str.indexOf(u',') != -1) { - saveSet(isLenient ? COMMA : STRICT_COMMA, str, status); - } else if (str.indexOf(u'+') != -1) { - saveSet(PLUS_SIGN, str, status); - } else if (str.indexOf(u'‒') != -1) { - saveSet(MINUS_SIGN, str, status); - } else if (str.indexOf(u'$') != -1) { - saveSet(DOLLAR_SIGN, str, status); - } else if (str.indexOf(u'£') != -1) { - saveSet(POUND_SIGN, str, status); - } else if (str.indexOf(u'₨') != -1) { - saveSet(RUPEE_SIGN, str, status); - } - if (U_FAILURE(status)) { return; } - } - } - } - } - } -}; - - -icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER; - -UBool U_CALLCONV cleanupNumberParseUniSets() { - if (gEmptyUnicodeSetInitialized) { - reinterpret_cast(gEmptyUnicodeSet)->~UnicodeSet(); - gEmptyUnicodeSetInitialized = FALSE; - } - for (int32_t i = 0; i < COUNT; i++) { - delete gUnicodeSets[i]; - gUnicodeSets[i] = nullptr; - } - gNumberParseUniSetsInitOnce.reset(); - return TRUE; -} - -void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { - ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets); - - // Initialize the empty instance for well-defined fallback behavior - new(gEmptyUnicodeSet) UnicodeSet(); - reinterpret_cast(gEmptyUnicodeSet)->freeze(); - gEmptyUnicodeSetInitialized = TRUE; - - // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309. - // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). - gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet( - u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status); - gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status); - - LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status)); - if (U_FAILURE(status)) { return; } - ParseDataSink sink; - ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status); - if (U_FAILURE(status)) { return; } - - // NOTE: It is OK for these assertions to fail if there was a no-data build. - U_ASSERT(gUnicodeSets[COMMA] != nullptr); - U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr); - U_ASSERT(gUnicodeSets[PERIOD] != nullptr); - U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr); - - gUnicodeSets[OTHER_GROUPING_SEPARATORS] = new UnicodeSet( - u"['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", status); - gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS); - gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion( - STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS); - - U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr); - - gUnicodeSets[PERCENT_SIGN] = new UnicodeSet(u"[%٪]", status); - gUnicodeSets[PERMILLE_SIGN] = new UnicodeSet(u"[‰؉]", status); - gUnicodeSets[INFINITY_KEY] = new UnicodeSet(u"[∞]", status); - - U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr); - U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr); - gUnicodeSets[YEN_SIGN] = new UnicodeSet(u"[¥\\uffe5]", status); - - gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status); - - gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS); - gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS); - - for (auto* uniset : gUnicodeSets) { - if (uniset != nullptr) { - uniset->freeze(); - } - } -} - -} - -const UnicodeSet* unisets::get(Key key) { - UErrorCode localStatus = U_ZERO_ERROR; - umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus); - if (U_FAILURE(localStatus)) { - return reinterpret_cast(gEmptyUnicodeSet); - } - return getImpl(key); -} - -Key unisets::chooseFrom(UnicodeString str, Key key1) { - return get(key1)->contains(str) ? key1 : NONE; -} - -Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) { - return get(key1)->contains(str) ? key1 : chooseFrom(str, key2); -} - -//Key unisets::chooseCurrency(UnicodeString str) { -// if (get(DOLLAR_SIGN)->contains(str)) { -// return DOLLAR_SIGN; -// } else if (get(POUND_SIGN)->contains(str)) { -// return POUND_SIGN; -// } else if (get(RUPEE_SIGN)->contains(str)) { -// return RUPEE_SIGN; -// } else if (get(YEN_SIGN)->contains(str)) { -// return YEN_SIGN; -// } else { -// return NONE; -// } -//} - - -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/node/deps/icu-small/source/common/static_unicode_sets.h b/deps/node/deps/icu-small/source/common/static_unicode_sets.h deleted file mode 100644 index 5f18b321..00000000 --- a/deps/node/deps/icu-small/source/common/static_unicode_sets.h +++ /dev/null @@ -1,119 +0,0 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// This file is in common instead of i18n because it is needed by ucurr.cpp. - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING -#ifndef __STATIC_UNICODE_SETS_H__ -#define __STATIC_UNICODE_SETS_H__ - -#include "unicode/uniset.h" -#include "unicode/unistr.h" - -U_NAMESPACE_BEGIN -namespace unisets { - -enum Key { - // NONE is used to indicate null in chooseFrom(). - // EMPTY is used to get an empty UnicodeSet. - NONE = -1, - EMPTY = 0, - - // Ignorables - DEFAULT_IGNORABLES, - STRICT_IGNORABLES, - - // Separators - // Notes: - // - COMMA is a superset of STRICT_COMMA - // - PERIOD is a superset of SCRICT_PERIOD - // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS - // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS - COMMA, - PERIOD, - STRICT_COMMA, - STRICT_PERIOD, - OTHER_GROUPING_SEPARATORS, - ALL_SEPARATORS, - STRICT_ALL_SEPARATORS, - - // Symbols - MINUS_SIGN, - PLUS_SIGN, - PERCENT_SIGN, - PERMILLE_SIGN, - INFINITY_KEY, // INFINITY is defined in cmath - - // Currency Symbols - DOLLAR_SIGN, - POUND_SIGN, - RUPEE_SIGN, - YEN_SIGN, // not in CLDR data, but Currency.java wants it - - // Other - DIGITS, - - // Combined Separators with Digits (for lead code points) - DIGITS_OR_ALL_SEPARATORS, - DIGITS_OR_STRICT_ALL_SEPARATORS, - - // The number of elements in the enum. - COUNT -}; - -/** - * Gets the static-allocated UnicodeSet according to the provided key. The - * pointer will be deleted during u_cleanup(); the caller should NOT delete it. - * - * Exported as U_COMMON_API for ucurr.cpp - * - * @param key The desired UnicodeSet according to the enum in this file. - * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but - * may be empty if an error occurred during data loading. - */ -U_COMMON_API const UnicodeSet* get(Key key); - -/** - * Checks if the UnicodeSet given by key1 contains the given string. - * - * Exported as U_COMMON_API for numparse_decimal.cpp - * - * @param str The string to check. - * @param key1 The set to check. - * @return key1 if the set contains str, or NONE if not. - */ -U_COMMON_API Key chooseFrom(UnicodeString str, Key key1); - -/** - * Checks if the UnicodeSet given by either key1 or key2 contains the string. - * - * Exported as U_COMMON_API for numparse_decimal.cpp - * - * @param str The string to check. - * @param key1 The first set to check. - * @param key2 The second set to check. - * @return key1 if that set contains str; key2 if that set contains str; or - * NONE if neither set contains str. - */ -U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); - -// Unused in C++: -// Key chooseCurrency(UnicodeString str); -// Used instead: -static const struct { - Key key; - UChar32 exemplar; -} kCurrencyEntries[] = { - {DOLLAR_SIGN, u'$'}, - {POUND_SIGN, u'£'}, - {RUPEE_SIGN, u'₨'}, - {YEN_SIGN, u'¥'}, -}; - -} // namespace unisets -U_NAMESPACE_END - -#endif //__STATIC_UNICODE_SETS_H__ -#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/node/deps/icu-small/source/common/stringpiece.cpp b/deps/node/deps/icu-small/source/common/stringpiece.cpp deleted file mode 100644 index d4f7f310..00000000 --- a/deps/node/deps/icu-small/source/common/stringpiece.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// Copyright (C) 2009-2013, International Business Machines -// Corporation and others. All Rights Reserved. -// -// Copyright 2004 and onwards Google Inc. -// -// Author: wilsonh@google.com (Wilson Hsieh) -// - -#include "unicode/utypes.h" -#include "unicode/stringpiece.h" -#include "cstring.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -StringPiece::StringPiece(const char* str) - : ptr_(str), length_((str == NULL) ? 0 : static_cast(uprv_strlen(str))) { } - -StringPiece::StringPiece(const StringPiece& x, int32_t pos) { - if (pos < 0) { - pos = 0; - } else if (pos > x.length_) { - pos = x.length_; - } - ptr_ = x.ptr_ + pos; - length_ = x.length_ - pos; -} - -StringPiece::StringPiece(const StringPiece& x, int32_t pos, int32_t len) { - if (pos < 0) { - pos = 0; - } else if (pos > x.length_) { - pos = x.length_; - } - if (len < 0) { - len = 0; - } else if (len > x.length_ - pos) { - len = x.length_ - pos; - } - ptr_ = x.ptr_ + pos; - length_ = len; -} - -void StringPiece::set(const char* str) { - ptr_ = str; - if (str != NULL) - length_ = static_cast(uprv_strlen(str)); - else - length_ = 0; -} - -U_EXPORT UBool U_EXPORT2 -operator==(const StringPiece& x, const StringPiece& y) { - int32_t len = x.size(); - if (len != y.size()) { - return false; - } - if (len == 0) { - return true; - } - const char* p = x.data(); - const char* p2 = y.data(); - // Test last byte in case strings share large common prefix - --len; - if (p[len] != p2[len]) return false; - // At this point we can, but don't have to, ignore the last byte. - return uprv_memcmp(p, p2, len) == 0; -} - - -const int32_t StringPiece::npos = 0x7fffffff; - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/stringtriebuilder.cpp b/deps/node/deps/icu-small/source/common/stringtriebuilder.cpp deleted file mode 100644 index 6f9cc2e5..00000000 --- a/deps/node/deps/icu-small/source/common/stringtriebuilder.cpp +++ /dev/null @@ -1,618 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: stringtriebuilder.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010dec24 -* created by: Markus W. Scherer -*/ - -#include "utypeinfo.h" // for 'typeid' to work -#include "unicode/utypes.h" -#include "unicode/stringtriebuilder.h" -#include "uassert.h" -#include "uhash.h" - -U_CDECL_BEGIN - -static int32_t U_CALLCONV -hashStringTrieNode(const UHashTok key) { - return icu::StringTrieBuilder::hashNode(key.pointer); -} - -static UBool U_CALLCONV -equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { - return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} - -StringTrieBuilder::~StringTrieBuilder() { - deleteCompactBuilder(); -} - -void -StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, - sizeGuess, &errorCode); - if(U_SUCCESS(errorCode)) { - if(nodes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } else { - uhash_setKeyDeleter(nodes, uprv_deleteUObject); - } - } -} - -void -StringTrieBuilder::deleteCompactBuilder() { - uhash_close(nodes); - nodes=NULL; -} - -void -StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, - UErrorCode &errorCode) { - if(buildOption==USTRINGTRIE_BUILD_FAST) { - writeNode(0, elementsLength, 0); - } else /* USTRINGTRIE_BUILD_SMALL */ { - createCompactBuilder(2*elementsLength, errorCode); - Node *root=makeNode(0, elementsLength, 0, errorCode); - if(U_SUCCESS(errorCode)) { - root->markRightEdgesFirst(-1); - root->write(*this); - } - deleteCompactBuilder(); - } -} - -// Requires startmaxLinearMatchLength) { - lastUnitIndex-=maxLinearMatchLength; - length-=maxLinearMatchLength; - writeElementUnits(start, lastUnitIndex, maxLinearMatchLength); - write(getMinLinearMatch()+maxLinearMatchLength-1); - } - writeElementUnits(start, unitIndex, length); - type=getMinLinearMatch()+length-1; - } else { - // Branch node. - int32_t length=countElementUnits(start, limit, unitIndex); - // length>=2 because minUnit!=maxUnit. - writeBranchSubNode(start, limit, unitIndex, length); - if(--lengthgetMaxBranchLinearSubNodeLength()) { - // Branch on the middle unit. - // First, find the middle unit. - int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); - // Encode the less-than branch first. - middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit - lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2); - ++ltLength; - // Continue for the greater-or-equal branch. - start=i; - length=length-length/2; - } - // For each unit, find its elements array start and whether it has a final value. - int32_t starts[kMaxBranchLinearSubNodeLength]; - UBool isFinal[kMaxBranchLinearSubNodeLength-1]; - int32_t unitNumber=0; - do { - int32_t i=starts[unitNumber]=start; - UChar unit=getElementUnit(i++, unitIndex); - i=indexOfElementWithNextUnit(i, unitIndex, unit); - isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start); - start=i; - } while(++unitNumber0); - // The maxUnit sub-node is written as the very last one because we do - // not jump for it at all. - unitNumber=length-1; - writeNode(start, limit, unitIndex+1); - int32_t offset=write(getElementUnit(start, unitIndex)); - // Write the rest of this node's unit-value pairs. - while(--unitNumber>=0) { - start=starts[unitNumber]; - int32_t value; - if(isFinal[unitNumber]) { - // Write the final value for the one string ending with this unit. - value=getElementValue(start); - } else { - // Write the delta to the start position of the sub-node. - value=offset-jumpTargets[unitNumber]; - } - writeValueAndFinal(value, isFinal[unitNumber]); - offset=write(getElementUnit(start, unitIndex)); - } - // Write the split-branch nodes. - while(ltLength>0) { - --ltLength; - writeDeltaTo(lessThan[ltLength]); - offset=write(middleUnits[ltLength]); - } - return offset; -} - -// Requires startmaxLinearMatchLength) { - lastUnitIndex-=maxLinearMatchLength; - length-=maxLinearMatchLength; - node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode); - nextNode=registerNode(node, errorCode); - } - node=createLinearMatchNode(start, unitIndex, length, nextNode); - } else { - // Branch node. - int32_t length=countElementUnits(start, limit, unitIndex); - // length>=2 because minUnit!=maxUnit. - Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode); - node=new BranchHeadNode(length, subNode); - } - if(hasValue && node!=NULL) { - if(matchNodesCanHaveValues()) { - ((ValueNode *)node)->setValue(value); - } else { - node=new IntermediateValueNode(value, registerNode(node, errorCode)); - } - } - return registerNode(node, errorCode); -} - -// startgetMaxBranchLinearSubNodeLength()) { - // Branch on the middle unit. - // First, find the middle unit. - int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); - // Create the less-than branch. - middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit - lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode); - ++ltLength; - // Continue for the greater-or-equal branch. - start=i; - length=length-length/2; - } - if(U_FAILURE(errorCode)) { - return NULL; - } - ListBranchNode *listNode=new ListBranchNode(); - if(listNode==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - // For each unit, find its elements array start and whether it has a final value. - int32_t unitNumber=0; - do { - int32_t i=start; - UChar unit=getElementUnit(i++, unitIndex); - i=indexOfElementWithNextUnit(i, unitIndex, unit); - if(start==i-1 && unitIndex+1==getElementStringLength(start)) { - listNode->add(unit, getElementValue(start)); - } else { - listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode)); - } - start=i; - } while(++unitNumberadd(unit, getElementValue(start)); - } else { - listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode)); - } - Node *node=registerNode(listNode, errorCode); - // Create the split-branch nodes. - while(ltLength>0) { - --ltLength; - node=registerNode( - new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode); - } - return node; -} - -StringTrieBuilder::Node * -StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - delete newNode; - return NULL; - } - if(newNode==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - const UHashElement *old=uhash_find(nodes, newNode); - if(old!=NULL) { - delete newNode; - return (Node *)old->key.pointer; - } - // If uhash_puti() returns a non-zero value from an equivalent, previously - // registered node, then uhash_find() failed to find that and we will leak newNode. -#if U_DEBUG - int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. -#endif - uhash_puti(nodes, newNode, 1, &errorCode); - U_ASSERT(oldValue==0); - if(U_FAILURE(errorCode)) { - delete newNode; - return NULL; - } - return newNode; -} - -StringTrieBuilder::Node * -StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - FinalValueNode key(value); - const UHashElement *old=uhash_find(nodes, &key); - if(old!=NULL) { - return (Node *)old->key.pointer; - } - Node *newNode=new FinalValueNode(value); - if(newNode==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - // If uhash_puti() returns a non-zero value from an equivalent, previously - // registered node, then uhash_find() failed to find that and we will leak newNode. -#if U_DEBUG - int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. -#endif - uhash_puti(nodes, newNode, 1, &errorCode); - U_ASSERT(oldValue==0); - if(U_FAILURE(errorCode)) { - delete newNode; - return NULL; - } - return newNode; -} - -int32_t -StringTrieBuilder::hashNode(const void *node) { - return ((const Node *)node)->hashCode(); -} - -UBool -StringTrieBuilder::equalNodes(const void *left, const void *right) { - return *(const Node *)left==*(const Node *)right; -} - -UBool -StringTrieBuilder::Node::operator==(const Node &other) const { - return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); -} - -int32_t -StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber; - } - return edgeNumber; -} - -UBool -StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const FinalValueNode &o=(const FinalValueNode &)other; - return value==o.value; -} - -void -StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { - offset=builder.writeValueAndFinal(value, TRUE); -} - -UBool -StringTrieBuilder::ValueNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const ValueNode &o=(const ValueNode &)other; - return hasValue==o.hasValue && (!hasValue || value==o.value); -} - -UBool -StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!ValueNode::operator==(other)) { - return FALSE; - } - const IntermediateValueNode &o=(const IntermediateValueNode &)other; - return next==o.next; -} - -int32_t -StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); - } - return edgeNumber; -} - -void -StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { - next->write(builder); - offset=builder.writeValueAndFinal(value, FALSE); -} - -UBool -StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!ValueNode::operator==(other)) { - return FALSE; - } - const LinearMatchNode &o=(const LinearMatchNode &)other; - return length==o.length && next==o.next; -} - -int32_t -StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); - } - return edgeNumber; -} - -UBool -StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const ListBranchNode &o=(const ListBranchNode &)other; - for(int32_t i=0; imarkRightEdgesFirst(edgeNumber-step); - } - // For all but the rightmost edge, decrement the edge number. - step=1; - } while(i>0); - offset=edgeNumber; - } - return edgeNumber; -} - -void -StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { - // Write the sub-nodes in reverse order: The jump lengths are deltas from - // after their own positions, so if we wrote the minUnit sub-node first, - // then its jump delta would be larger. - // Instead we write the minUnit sub-node last, for a shorter delta. - int32_t unitNumber=length-1; - Node *rightEdge=equal[unitNumber]; - int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset(); - do { - --unitNumber; - if(equal[unitNumber]!=NULL) { - equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); - } - } while(unitNumber>0); - // The maxUnit sub-node is written as the very last one because we do - // not jump for it at all. - unitNumber=length-1; - if(rightEdge==NULL) { - builder.writeValueAndFinal(values[unitNumber], TRUE); - } else { - rightEdge->write(builder); - } - offset=builder.write(units[unitNumber]); - // Write the rest of this node's unit-value pairs. - while(--unitNumber>=0) { - int32_t value; - UBool isFinal; - if(equal[unitNumber]==NULL) { - // Write the final value for the one string ending with this unit. - value=values[unitNumber]; - isFinal=TRUE; - } else { - // Write the delta to the start position of the sub-node. - U_ASSERT(equal[unitNumber]->getOffset()>0); - value=offset-equal[unitNumber]->getOffset(); - isFinal=FALSE; - } - builder.writeValueAndFinal(value, isFinal); - offset=builder.write(units[unitNumber]); - } -} - -UBool -StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const SplitBranchNode &o=(const SplitBranchNode &)other; - return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; -} - -int32_t -StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - firstEdgeNumber=edgeNumber; - edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); - offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1); - } - return edgeNumber; -} - -void -StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { - // Encode the less-than branch first. - lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); - // Encode the greater-or-equal branch last because we do not jump for it at all. - greaterOrEqual->write(builder); - // Write this node. - U_ASSERT(lessThan->getOffset()>0); - builder.writeDeltaTo(lessThan->getOffset()); // less-than - offset=builder.write(unit); -} - -UBool -StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!ValueNode::operator==(other)) { - return FALSE; - } - const BranchHeadNode &o=(const BranchHeadNode &)other; - return length==o.length && next==o.next; -} - -int32_t -StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); - } - return edgeNumber; -} - -void -StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { - next->write(builder); - if(length<=builder.getMinLinearMatch()) { - offset=builder.writeValueAndType(hasValue, value, length-1); - } else { - builder.write(length-1); - offset=builder.writeValueAndType(hasValue, value, 0); - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/uarrsort.cpp b/deps/node/deps/icu-small/source/common/uarrsort.cpp deleted file mode 100644 index 03c4d4e7..00000000 --- a/deps/node/deps/icu-small/source/common/uarrsort.cpp +++ /dev/null @@ -1,288 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uarrsort.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003aug04 -* created by: Markus W. Scherer -* -* Internal function for sorting arrays. -*/ - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "uarrsort.h" - -enum { - /** - * "from Knuth" - * - * A binary search over 8 items performs 4 comparisons: - * log2(8)=3 to subdivide, +1 to check for equality. - * A linear search over 8 items on average also performs 4 comparisons. - */ - MIN_QSORT=9, - STACK_ITEM_SIZE=200 -}; - -/* UComparator convenience implementations ---------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -uprv_uint16Comparator(const void *context, const void *left, const void *right) { - (void)context; - return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; -} - -U_CAPI int32_t U_EXPORT2 -uprv_int32Comparator(const void *context, const void *left, const void *right) { - (void)context; - return *(const int32_t *)left - *(const int32_t *)right; -} - -U_CAPI int32_t U_EXPORT2 -uprv_uint32Comparator(const void *context, const void *left, const void *right) { - (void)context; - uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; - - /* compare directly because (l-r) would overflow the int32_t result */ - if(lr */ { - return 1; - } -} - -/* Insertion sort using binary search --------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, - UComparator *cmp, const void *context) { - int32_t start=0; - UBool found=FALSE; - - /* Binary search until we get down to a tiny sub-array. */ - while((limit-start)>=MIN_QSORT) { - int32_t i=(start+limit)/2; - int32_t diff=cmp(context, item, array+i*itemSize); - if(diff==0) { - /* - * Found the item. We look for the *last* occurrence of such - * an item, for stable sorting. - * If we knew that there will be only few equal items, - * we could break now and enter the linear search. - * However, if there are many equal items, then it should be - * faster to continue with the binary search. - * It seems likely that we either have all unique items - * (where found will never become TRUE in the insertion sort) - * or potentially many duplicates. - */ - found=TRUE; - start=i+1; - } else if(diff<0) { - limit=i; - } else { - start=i; - } - } - - /* Linear search over the remaining tiny sub-array. */ - while(start=limit) { - doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); - break; - } - - left=start; - right=limit; - - /* x=array[middle] */ - uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); - - do { - while(/* array[left]0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(length<=1) { - return; - } else if(length0 if left>right - * - * @internal - */ -typedef int32_t U_CALLCONV -UComparator(const void *context, const void *left, const void *right); -U_CDECL_END - -/** - * Array sorting function. - * Uses a UComparator for comparing array items to each other, and simple - * memory copying to move items. - * - * @param array The array to be sorted. - * @param length The number of items in the array. - * @param itemSize The size in bytes of each array item. - * @param cmp UComparator function used to compare two items each. - * @param context Application-specific pointer, passed through to the UComparator. - * @param sortStable If true, a stable sorting algorithm must be used. - * @param pErrorCode ICU in/out UErrorCode parameter. - * - * @internal - */ -U_CAPI void U_EXPORT2 -uprv_sortArray(void *array, int32_t length, int32_t itemSize, - UComparator *cmp, const void *context, - UBool sortStable, UErrorCode *pErrorCode); - -/** - * Convenience UComparator implementation for uint16_t arrays. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -uprv_uint16Comparator(const void *context, const void *left, const void *right); - -/** - * Convenience UComparator implementation for int32_t arrays. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -uprv_int32Comparator(const void *context, const void *left, const void *right); - -/** - * Convenience UComparator implementation for uint32_t arrays. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -uprv_uint32Comparator(const void *context, const void *left, const void *right); - -/** - * Much like Java Collections.binarySearch(list, key, comparator). - * - * Except: Java documents "If the list contains multiple elements equal to - * the specified object, there is no guarantee which one will be found." - * - * This version here will return the largest index of any equal item, - * for use in stable sorting. - * - * @return the index>=0 where the item was found: - * the largest such index, if multiple, for stable sorting; - * or the index<0 for inserting the item at ~index in sorted order - */ -U_CAPI int32_t U_EXPORT2 -uprv_stableBinarySearch(char *array, int32_t length, void *item, int32_t itemSize, - UComparator *cmp, const void *context); - -#endif diff --git a/deps/node/deps/icu-small/source/common/uassert.h b/deps/node/deps/icu-small/source/common/uassert.h deleted file mode 100644 index 2c080eb4..00000000 --- a/deps/node/deps/icu-small/source/common/uassert.h +++ /dev/null @@ -1,32 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File uassert.h -* -* Contains U_ASSERT macro -* -* By default, U_ASSERT just wraps the C library assert macro. -* By changing the definition here, the assert behavior for ICU can be changed -* without affecting other non-ICU uses of the C library assert(). -* -****************************************************************************** -*/ - -#ifndef U_ASSERT_H -#define U_ASSERT_H -/* utypes.h is included to get the proper define for uint8_t */ -#include "unicode/utypes.h" -#if U_DEBUG -# include -# define U_ASSERT(exp) assert(exp) -#else -# define U_ASSERT(exp) -#endif -#endif diff --git a/deps/node/deps/icu-small/source/common/ubidi.cpp b/deps/node/deps/icu-small/source/common/ubidi.cpp deleted file mode 100644 index 4b65d491..00000000 --- a/deps/node/deps/icu-small/source/common/ubidi.cpp +++ /dev/null @@ -1,3039 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ubidi.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999jul27 -* created by: Markus W. Scherer, updated by Matitiahu Allouche -* -*/ - -#include "cmemory.h" -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/uchar.h" -#include "unicode/ubidi.h" -#include "unicode/utf16.h" -#include "ubidi_props.h" -#include "ubidiimp.h" -#include "uassert.h" - -/* - * General implementation notes: - * - * Throughout the implementation, there are comments like (W2) that refer to - * rules of the BiDi algorithm, in this example to the second rule of the - * resolution of weak types. - * - * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) - * character according to UTF-16, the second UChar gets the directional property of - * the entire character assigned, while the first one gets a BN, a boundary - * neutral, type, which is ignored by most of the algorithm according to - * rule (X9) and the implementation suggestions of the BiDi algorithm. - * - * Later, adjustWSLevels() will set the level for each BN to that of the - * following character (UChar), which results in surrogate pairs getting the - * same level on each of their surrogates. - * - * In a UTF-8 implementation, the same thing could be done: the last byte of - * a multi-byte sequence would get the "real" property, while all previous - * bytes of that sequence would get BN. - * - * It is not possible to assign all those parts of a character the same real - * property because this would fail in the resolution of weak types with rules - * that look at immediately surrounding types. - * - * As a related topic, this implementation does not remove Boundary Neutral - * types from the input, but ignores them wherever this is relevant. - * For example, the loop for the resolution of the weak types reads - * types until it finds a non-BN. - * Also, explicit embedding codes are neither changed into BN nor removed. - * They are only treated the same way real BNs are. - * As stated before, adjustWSLevels() takes care of them at the end. - * For the purpose of conformance, the levels of all these codes - * do not matter. - * - * Note that this implementation modifies the dirProps - * after the initial setup, when applying X5c (replace FSI by LRI or RLI), - * X6, N0 (replace paired brackets by L or R). - * - * In this implementation, the resolution of weak types (W1 to W6), - * neutrals (N1 and N2), and the assignment of the resolved level (In) - * are all done in one single loop, in resolveImplicitLevels(). - * Changes of dirProp values are done on the fly, without writing - * them back to the dirProps array. - * - * - * This implementation contains code that allows to bypass steps of the - * algorithm that are not needed on the specific paragraph - * in order to speed up the most common cases considerably, - * like text that is entirely LTR, or RTL text without numbers. - * - * Most of this is done by setting a bit for each directional property - * in a flags variable and later checking for whether there are - * any LTR characters or any RTL characters, or both, whether - * there are any explicit embedding codes, etc. - * - * If the (Xn) steps are performed, then the flags are re-evaluated, - * because they will then not contain the embedding codes any more - * and will be adjusted for override codes, so that subsequently - * more bypassing may be possible than what the initial flags suggested. - * - * If the text is not mixed-directional, then the - * algorithm steps for the weak type resolution are not performed, - * and all levels are set to the paragraph level. - * - * If there are no explicit embedding codes, then the (Xn) steps - * are not performed. - * - * If embedding levels are supplied as a parameter, then all - * explicit embedding codes are ignored, and the (Xn) steps - * are not performed. - * - * White Space types could get the level of the run they belong to, - * and are checked with a test of (flags&MASK_EMBEDDING) to - * consider if the paragraph direction should be considered in - * the flags variable. - * - * If there are no White Space types in the paragraph, then - * (L1) is not necessary in adjustWSLevels(). - */ - -/* to avoid some conditional statements, use tiny constant arrays */ -static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; -static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; -static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; - -#define DIRPROP_FLAG_LR(level) flagLR[(level)&1] -#define DIRPROP_FLAG_E(level) flagE[(level)&1] -#define DIRPROP_FLAG_O(level) flagO[(level)&1] - -#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) - -#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE) - -/* UBiDi object management -------------------------------------------------- */ - -U_CAPI UBiDi * U_EXPORT2 -ubidi_open(void) -{ - UErrorCode errorCode=U_ZERO_ERROR; - return ubidi_openSized(0, 0, &errorCode); -} - -U_CAPI UBiDi * U_EXPORT2 -ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { - UBiDi *pBiDi; - - /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } else if(maxLength<0 || maxRunCount<0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; /* invalid arguments */ - } - - /* allocate memory for the object */ - pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); - if(pBiDi==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ - uprv_memset(pBiDi, 0, sizeof(UBiDi)); - - /* allocate memory for arrays as requested */ - if(maxLength>0) { - if( !getInitialDirPropsMemory(pBiDi, maxLength) || - !getInitialLevelsMemory(pBiDi, maxLength) - ) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - pBiDi->mayAllocateText=TRUE; - } - - if(maxRunCount>0) { - if(maxRunCount==1) { - /* use simpleRuns[] */ - pBiDi->runsSize=sizeof(Run); - } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - pBiDi->mayAllocateRuns=TRUE; - } - - if(U_SUCCESS(*pErrorCode)) { - return pBiDi; - } else { - ubidi_close(pBiDi); - return NULL; - } -} - -/* - * We are allowed to allocate memory if memory==NULL or - * mayAllocate==TRUE for each array that we need. - * We also try to grow memory as needed if we - * allocate it. - * - * Assume sizeNeeded>0. - * If *pMemory!=NULL, then assume *pSize>0. - * - * ### this realloc() may unnecessarily copy the old data, - * which we know we don't need any more; - * is this the best way to do this?? - */ -U_CFUNC UBool -ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { - void **pMemory = (void **)bidiMem; - /* check for existing memory */ - if(*pMemory==NULL) { - /* we need to allocate memory */ - if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { - *pSize=sizeNeeded; - return TRUE; - } else { - return FALSE; - } - } else { - if(sizeNeeded<=*pSize) { - /* there is already enough memory */ - return TRUE; - } - else if(!mayAllocate) { - /* not enough memory, and we must not allocate */ - return FALSE; - } else { - /* we try to grow */ - void *memory; - /* in most cases, we do not need the copy-old-data part of - * realloc, but it is needed when adding runs using getRunsMemory() - * in setParaRunsOnly() - */ - if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { - *pMemory=memory; - *pSize=sizeNeeded; - return TRUE; - } else { - /* we failed to grow */ - return FALSE; - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_close(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ - if(pBiDi->dirPropsMemory!=NULL) { - uprv_free(pBiDi->dirPropsMemory); - } - if(pBiDi->levelsMemory!=NULL) { - uprv_free(pBiDi->levelsMemory); - } - if(pBiDi->openingsMemory!=NULL) { - uprv_free(pBiDi->openingsMemory); - } - if(pBiDi->parasMemory!=NULL) { - uprv_free(pBiDi->parasMemory); - } - if(pBiDi->runsMemory!=NULL) { - uprv_free(pBiDi->runsMemory); - } - if(pBiDi->isolatesMemory!=NULL) { - uprv_free(pBiDi->isolatesMemory); - } - if(pBiDi->insertPoints.points!=NULL) { - uprv_free(pBiDi->insertPoints.points); - } - - uprv_free(pBiDi); - } -} - -/* set to approximate "inverse BiDi" ---------------------------------------- */ - -U_CAPI void U_EXPORT2 -ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { - if(pBiDi!=NULL) { - pBiDi->isInverse=isInverse; - pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L - : UBIDI_REORDER_DEFAULT; - } -} - -U_CAPI UBool U_EXPORT2 -ubidi_isInverse(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->isInverse; - } else { - return FALSE; - } -} - -/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of - * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre - * concept of RUNS_ONLY which is a double operation. - * It could be advantageous to divide this into 3 concepts: - * a) Operation: direct / inverse / RUNS_ONLY - * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R - * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL - * This would allow combinations not possible today like RUNS_ONLY with - * NUMBERS_SPECIAL. - * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and - * REMOVE_CONTROLS for the inverse step. - * Not all combinations would be supported, and probably not all do make sense. - * This would need to document which ones are supported and what are the - * fallbacks for unsupported combinations. - */ -U_CAPI void U_EXPORT2 -ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { - if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) - && (reorderingMode < UBIDI_REORDER_COUNT)) { - pBiDi->reorderingMode = reorderingMode; - pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); - } -} - -U_CAPI UBiDiReorderingMode U_EXPORT2 -ubidi_getReorderingMode(UBiDi *pBiDi) { - if (pBiDi!=NULL) { - return pBiDi->reorderingMode; - } else { - return UBIDI_REORDER_DEFAULT; - } -} - -U_CAPI void U_EXPORT2 -ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { - if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { - reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; - } - if (pBiDi!=NULL) { - pBiDi->reorderingOptions=reorderingOptions; - } -} - -U_CAPI uint32_t U_EXPORT2 -ubidi_getReorderingOptions(UBiDi *pBiDi) { - if (pBiDi!=NULL) { - return pBiDi->reorderingOptions; - } else { - return 0; - } -} - -U_CAPI UBiDiDirection U_EXPORT2 -ubidi_getBaseDirection(const UChar *text, -int32_t length){ - - int32_t i; - UChar32 uchar; - UCharDirection dir; - - if( text==NULL || length<-1 ){ - return UBIDI_NEUTRAL; - } - - if(length==-1) { - length=u_strlen(text); - } - - for( i = 0 ; i < length; ) { - /* i is incremented by U16_NEXT */ - U16_NEXT(text, i, length, uchar); - dir = u_charDirection(uchar); - if( dir == U_LEFT_TO_RIGHT ) - return UBIDI_LTR; - if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) - return UBIDI_RTL; - } - return UBIDI_NEUTRAL; -} - -/* perform (P2)..(P3) ------------------------------------------------------- */ - -/** - * Returns the directionality of the first strong character - * after the last B in prologue, if any. - * Requires prologue!=null. - */ -static DirProp -firstL_R_AL(UBiDi *pBiDi) { - const UChar *text=pBiDi->prologue; - int32_t length=pBiDi->proLength; - int32_t i; - UChar32 uchar; - DirProp dirProp, result=ON; - for(i=0; iparas - */ -static UBool -checkParaCount(UBiDi *pBiDi) { - int32_t count=pBiDi->paraCount; - if(pBiDi->paras==pBiDi->simpleParas) { - if(count<=SIMPLE_PARAS_COUNT) - return TRUE; - if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2)) - return FALSE; - pBiDi->paras=pBiDi->parasMemory; - uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para)); - return TRUE; - } - if(!getInitialParasMemory(pBiDi, count * 2)) - return FALSE; - pBiDi->paras=pBiDi->parasMemory; - return TRUE; -} - -/* - * Get the directional properties for the text, calculate the flags bit-set, and - * determine the paragraph level if necessary (in pBiDi->paras[i].level). - * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. - * When encountering an FSI, it is initially replaced with an LRI, which is the - * default. Only if a strong R or AL is found within its scope will the LRI be - * replaced by an RLI. - */ -static UBool -getDirProps(UBiDi *pBiDi) { - const UChar *text=pBiDi->text; - DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ - - int32_t i=0, originalLength=pBiDi->originalLength; - Flags flags=0; /* collect all directionalities in the text */ - UChar32 uchar; - DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ - UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); - /* for inverse BiDi, the default para level is set to RTL if there is a - strong R or AL character at either end of the text */ - UBool isDefaultLevelInverse=isDefaultLevel && (UBool) - (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || - pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); - int32_t lastArabicPos=-1; - int32_t controlCount=0; - UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & - UBIDI_OPTION_REMOVE_CONTROLS); - - enum State { - NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ - SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ - SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ - LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ - }; - State state; - DirProp lastStrong=ON; /* for default level & inverse BiDi */ - /* The following stacks are used to manage isolate sequences. Those - sequences may be nested, but obviously never more deeply than the - maximum explicit embedding level. - lastStack is the index of the last used entry in the stack. A value of -1 - means that there is no open isolate sequence. - lastStack is reset to -1 on paragraph boundaries. */ - /* The following stack contains the position of the initiator of - each open isolate sequence */ - int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; - /* The following stack contains the last known state before - encountering the initiator of an isolate sequence */ - State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; - int32_t stackLast=-1; - - if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) - pBiDi->length=0; - defaultParaLevel=pBiDi->paraLevel&1; - if(isDefaultLevel) { - pBiDi->paras[0].level=defaultParaLevel; - lastStrong=defaultParaLevel; - if(pBiDi->proLength>0 && /* there is a prologue */ - (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ - if(dirProp==L) - pBiDi->paras[0].level=0; /* set the default para level */ - else - pBiDi->paras[0].level=1; /* set the default para level */ - state=NOT_SEEKING_STRONG; - } else { - state=SEEKING_STRONG_FOR_PARA; - } - } else { - pBiDi->paras[0].level=pBiDi->paraLevel; - state=NOT_SEEKING_STRONG; - } - /* count paragraphs and determine the paragraph level (P2..P3) */ - /* - * see comment in ubidi.h: - * the UBIDI_DEFAULT_XXX values are designed so that - * their bit 0 alone yields the intended default - */ - for( /* i=0 above */ ; i0xffff) { /* set the lead surrogate's property to BN */ - flags|=DIRPROP_FLAG(BN); - dirProps[i-2]=BN; - } - if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) - controlCount++; - if(dirProp==L) { - if(state==SEEKING_STRONG_FOR_PARA) { - pBiDi->paras[pBiDi->paraCount-1].level=0; - state=NOT_SEEKING_STRONG; - } - else if(state==SEEKING_STRONG_FOR_FSI) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - /* no need for next statement, already set by default */ - /* dirProps[isolateStartStack[stackLast]]=LRI; */ - flags|=DIRPROP_FLAG(LRI); - } - state=LOOKING_FOR_PDI; - } - lastStrong=L; - continue; - } - if(dirProp==R || dirProp==AL) { - if(state==SEEKING_STRONG_FOR_PARA) { - pBiDi->paras[pBiDi->paraCount-1].level=1; - state=NOT_SEEKING_STRONG; - } - else if(state==SEEKING_STRONG_FOR_FSI) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - dirProps[isolateStartStack[stackLast]]=RLI; - flags|=DIRPROP_FLAG(RLI); - } - state=LOOKING_FOR_PDI; - } - lastStrong=R; - if(dirProp==AL) - lastArabicPos=i-1; - continue; - } - if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ - stackLast++; - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - isolateStartStack[stackLast]=i-1; - previousStateStack[stackLast]=state; - } - if(dirProp==FSI) { - dirProps[i-1]=LRI; /* default if no strong char */ - state=SEEKING_STRONG_FOR_FSI; - } - else - state=LOOKING_FOR_PDI; - continue; - } - if(dirProp==PDI) { - if(state==SEEKING_STRONG_FOR_FSI) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { - /* no need for next statement, already set by default */ - /* dirProps[isolateStartStack[stackLast]]=LRI; */ - flags|=DIRPROP_FLAG(LRI); - } - } - if(stackLast>=0) { - if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) - state=previousStateStack[stackLast]; - stackLast--; - } - continue; - } - if(dirProp==B) { - if(iparas[pBiDi->paraCount-1].limit=i; - if(isDefaultLevelInverse && lastStrong==R) - pBiDi->paras[pBiDi->paraCount-1].level=1; - if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { - /* When streaming, we only process whole paragraphs - thus some updates are only done on paragraph boundaries */ - pBiDi->length=i; /* i is index to next character */ - pBiDi->controlCount=controlCount; - } - if(iparaCount++; - if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ - return FALSE; - if(isDefaultLevel) { - pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; - state=SEEKING_STRONG_FOR_PARA; - lastStrong=defaultParaLevel; - } else { - pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; - state=NOT_SEEKING_STRONG; - } - stackLast=-1; - } - continue; - } - } - /* Ignore still open isolate sequences with overflow */ - if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { - stackLast=UBIDI_MAX_EXPLICIT_LEVEL; - state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */ - } - /* Resolve direction of still unresolved open FSI sequences */ - while(stackLast>=0) { - if(state==SEEKING_STRONG_FOR_FSI) { - /* no need for next statement, already set by default */ - /* dirProps[isolateStartStack[stackLast]]=LRI; */ - flags|=DIRPROP_FLAG(LRI); - break; - } - state=previousStateStack[stackLast]; - stackLast--; - } - /* When streaming, ignore text after the last paragraph separator */ - if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { - if(pBiDi->lengthparaCount--; - } else { - pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; - pBiDi->controlCount=controlCount; - } - /* For inverse bidi, default para direction is RTL if there is - a strong R or AL at either end of the paragraph */ - if(isDefaultLevelInverse && lastStrong==R) { - pBiDi->paras[pBiDi->paraCount-1].level=1; - } - if(isDefaultLevel) { - pBiDi->paraLevel=static_cast(pBiDi->paras[0].level); - } - /* The following is needed to resolve the text direction for default level - paragraphs containing no strong character */ - for(i=0; iparaCount; i++) - flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); - - if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { - flags|=DIRPROP_FLAG(L); - } - pBiDi->flags=flags; - pBiDi->lastArabicPos=lastArabicPos; - return TRUE; -} - -/* determine the paragraph level at position index */ -U_CFUNC UBiDiLevel -ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { - int32_t i; - for(i=0; iparaCount; i++) - if(pindexparas[i].limit) - break; - if(i>=pBiDi->paraCount) - i=pBiDi->paraCount-1; - return (UBiDiLevel)(pBiDi->paras[i].level); -} - -/* Functions for handling paired brackets ----------------------------------- */ - -/* In the isoRuns array, the first entry is used for text outside of any - isolate sequence. Higher entries are used for each more deeply nested - isolate sequence. isoRunLast is the index of the last used entry. The - openings array is used to note the data of opening brackets not yet - matched by a closing bracket, or matched but still susceptible to change - level. - Each isoRun entry contains the index of the first and - one-after-last openings entries for pending opening brackets it - contains. The next openings entry to use is the one-after-last of the - most deeply nested isoRun entry. - isoRun entries also contain their current embedding level and the last - encountered strong character, since these will be needed to resolve - the level of paired brackets. */ - -static void -bracketInit(UBiDi *pBiDi, BracketData *bd) { - bd->pBiDi=pBiDi; - bd->isoRunLast=0; - bd->isoRuns[0].start=0; - bd->isoRuns[0].limit=0; - bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); - UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1; - bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t; - bd->isoRuns[0].contextDir = (UBiDiDirection)t; - bd->isoRuns[0].contextPos=0; - if(pBiDi->openingsMemory) { - bd->openings=pBiDi->openingsMemory; - bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); - } else { - bd->openings=bd->simpleOpenings; - bd->openingsCount=SIMPLE_OPENINGS_COUNT; - } - bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || - bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; -} - -/* paragraph boundary */ -static void -bracketProcessB(BracketData *bd, UBiDiLevel level) { - bd->isoRunLast=0; - bd->isoRuns[0].limit=0; - bd->isoRuns[0].level=level; - bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1; - bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1); - bd->isoRuns[0].contextPos=0; -} - -/* LRE, LRO, RLE, RLO, PDF */ -static void -bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, - UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - DirProp *dirProps=bd->pBiDi->dirProps; - if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ - return; - if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */ - contextLevel=embeddingLevel; - pLastIsoRun->limit=pLastIsoRun->start; - pLastIsoRun->level=embeddingLevel; - pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1; - pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1); - pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos; -} - -/* LRI or RLI */ -static void -bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - int16_t lastLimit; - pLastIsoRun->lastBase=ON; - lastLimit=pLastIsoRun->limit; - bd->isoRunLast++; - pLastIsoRun++; - pLastIsoRun->start=pLastIsoRun->limit=lastLimit; - pLastIsoRun->level=level; - pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1; - pLastIsoRun->contextDir=(UBiDiDirection)(level&1); - pLastIsoRun->contextPos=0; -} - -/* PDI */ -static void -bracketProcessPDI(BracketData *bd) { - IsoRun *pLastIsoRun; - bd->isoRunLast--; - pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - pLastIsoRun->lastBase=ON; -} - -/* newly found opening bracket: create an openings entry */ -static UBool /* return TRUE if success */ -bracketAddOpening(BracketData *bd, UChar match, int32_t position) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - Opening *pOpening; - if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */ - UBiDi *pBiDi=bd->pBiDi; - if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) - return FALSE; - if(bd->openings==bd->simpleOpenings) - uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, - SIMPLE_OPENINGS_COUNT * sizeof(Opening)); - bd->openings=pBiDi->openingsMemory; /* may have changed */ - bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); - } - pOpening=&bd->openings[pLastIsoRun->limit]; - pOpening->position=position; - pOpening->match=match; - pOpening->contextDir=pLastIsoRun->contextDir; - pOpening->contextPos=pLastIsoRun->contextPos; - pOpening->flags=0; - pLastIsoRun->limit++; - return TRUE; -} - -/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ -static void -fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { - /* This function calls itself recursively */ - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - Opening *qOpening; - DirProp *dirProps=bd->pBiDi->dirProps; - int32_t k, openingPosition, closingPosition; - for(k=openingIndex+1, qOpening=&bd->openings[k]; klimit; k++, qOpening++) { - if(qOpening->match>=0) /* not an N0c match */ - continue; - if(newPropPositioncontextPos) - break; - if(newPropPosition>=qOpening->position) - continue; - if(newProp==qOpening->contextDir) - break; - openingPosition=qOpening->position; - dirProps[openingPosition]=newProp; - closingPosition=-(qOpening->match); - dirProps[closingPosition]=newProp; - qOpening->match=0; /* prevent further changes */ - fixN0c(bd, k, openingPosition, newProp); - fixN0c(bd, k, closingPosition, newProp); - } -} - -/* process closing bracket */ -static DirProp /* return L or R if N0b or N0c, ON if N0d */ -bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - Opening *pOpening, *qOpening; - UBiDiDirection direction; - UBool stable; - DirProp newProp; - pOpening=&bd->openings[openIdx]; - direction=(UBiDiDirection)(pLastIsoRun->level&1); - stable=TRUE; /* assume stable until proved otherwise */ - - /* The stable flag is set when brackets are paired and their - level is resolved and cannot be changed by what will be - found later in the source string. - An unstable match can occur only when applying N0c, where - the resolved level depends on the preceding context, and - this context may be affected by text occurring later. - Example: RTL paragraph containing: abc[(latin) HEBREW] - When the closing parenthesis is encountered, it appears - that N0c1 must be applied since 'abc' sets an opposite - direction context and both parentheses receive level 2. - However, when the closing square bracket is processed, - N0b applies because of 'HEBREW' being included within the - brackets, thus the square brackets are treated like R and - receive level 1. However, this changes the preceding - context of the opening parenthesis, and it now appears - that N0c2 must be applied to the parentheses rather than - N0c1. */ - - if((direction==0 && pOpening->flags&FOUND_L) || - (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ - newProp=static_cast(direction); - } - else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ - /* it is stable if there is no containing pair or in - conditions too complicated and not worth checking */ - stable=(openIdx==pLastIsoRun->start); - if(direction!=pOpening->contextDir) - newProp= static_cast(pOpening->contextDir); /* N0c1 */ - else - newProp= static_cast(direction); /* N0c2 */ - } else { - /* forget this and any brackets nested within this pair */ - pLastIsoRun->limit= static_cast(openIdx); - return ON; /* N0d */ - } - bd->pBiDi->dirProps[pOpening->position]=newProp; - bd->pBiDi->dirProps[position]=newProp; - /* Update nested N0c pairs that may be affected */ - fixN0c(bd, openIdx, pOpening->position, newProp); - if(stable) { - pLastIsoRun->limit= static_cast(openIdx); /* forget any brackets nested within this pair */ - /* remove lower located synonyms if any */ - while(pLastIsoRun->limit>pLastIsoRun->start && - bd->openings[pLastIsoRun->limit-1].position==pOpening->position) - pLastIsoRun->limit--; - } else { - int32_t k; - pOpening->match=-position; - /* neutralize lower located synonyms if any */ - k=openIdx-1; - while(k>=pLastIsoRun->start && - bd->openings[k].position==pOpening->position) - bd->openings[k--].match=0; - /* neutralize any unmatched opening between the current pair; - this will also neutralize higher located synonyms if any */ - for(k=openIdx+1; klimit; k++) { - qOpening=&bd->openings[k]; - if(qOpening->position>=position) - break; - if(qOpening->match>0) - qOpening->match=0; - } - } - return newProp; -} - -/* handle strong characters, digits and candidates for closing brackets */ -static UBool /* return TRUE if success */ -bracketProcessChar(BracketData *bd, int32_t position) { - IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; - DirProp *dirProps, dirProp, newProp; - UBiDiLevel level; - dirProps=bd->pBiDi->dirProps; - dirProp=dirProps[position]; - if(dirProp==ON) { - UChar c, match; - int32_t idx; - /* First see if it is a matching closing bracket. Hopefully, this is - more efficient than checking if it is a closing bracket at all */ - c=bd->pBiDi->text[position]; - for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) { - if(bd->openings[idx].match!=c) - continue; - /* We have a match */ - newProp=bracketProcessClosing(bd, idx, position); - if(newProp==ON) { /* N0d */ - c=0; /* prevent handling as an opening */ - break; - } - pLastIsoRun->lastBase=ON; - pLastIsoRun->contextDir=(UBiDiDirection)newProp; - pLastIsoRun->contextPos=position; - level=bd->pBiDi->levels[position]; - if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ - uint16_t flag; - int32_t i; - newProp=level&1; - pLastIsoRun->lastStrong=newProp; - flag=DIRPROP_FLAG(newProp); - for(i=pLastIsoRun->start; iopenings[i].flags|=flag; - /* matching brackets are not overridden by LRO/RLO */ - bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE; - } - /* matching brackets are not overridden by LRO/RLO */ - bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE; - return TRUE; - } - /* We get here only if the ON character is not a matching closing - bracket or it is a case of N0d */ - /* Now see if it is an opening bracket */ - if(c) - match= static_cast(u_getBidiPairedBracket(c)); /* get the matching char */ - else - match=0; - if(match!=c && /* has a matching char */ - ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */ - /* special case: process synonyms - create an opening entry for each synonym */ - if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ - if(!bracketAddOpening(bd, 0x3009, position)) - return FALSE; - } - else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ - if(!bracketAddOpening(bd, 0x232A, position)) - return FALSE; - } - if(!bracketAddOpening(bd, match, position)) - return FALSE; - } - } - level=bd->pBiDi->levels[position]; - if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ - newProp=level&1; - if(dirProp!=S && dirProp!=WS && dirProp!=ON) - dirProps[position]=newProp; - pLastIsoRun->lastBase=newProp; - pLastIsoRun->lastStrong=newProp; - pLastIsoRun->contextDir=(UBiDiDirection)newProp; - pLastIsoRun->contextPos=position; - } - else if(dirProp<=R || dirProp==AL) { - newProp= static_cast(DIR_FROM_STRONG(dirProp)); - pLastIsoRun->lastBase=dirProp; - pLastIsoRun->lastStrong=dirProp; - pLastIsoRun->contextDir=(UBiDiDirection)newProp; - pLastIsoRun->contextPos=position; - } - else if(dirProp==EN) { - pLastIsoRun->lastBase=EN; - if(pLastIsoRun->lastStrong==L) { - newProp=L; /* W7 */ - if(!bd->isNumbersSpecial) - dirProps[position]=ENL; - pLastIsoRun->contextDir=(UBiDiDirection)L; - pLastIsoRun->contextPos=position; - } - else { - newProp=R; /* N0 */ - if(pLastIsoRun->lastStrong==AL) - dirProps[position]=AN; /* W2 */ - else - dirProps[position]=ENR; - pLastIsoRun->contextDir=(UBiDiDirection)R; - pLastIsoRun->contextPos=position; - } - } - else if(dirProp==AN) { - newProp=R; /* N0 */ - pLastIsoRun->lastBase=AN; - pLastIsoRun->contextDir=(UBiDiDirection)R; - pLastIsoRun->contextPos=position; - } - else if(dirProp==NSM) { - /* if the last real char was ON, change NSM to ON so that it - will stay ON even if the last real char is a bracket which - may be changed to L or R */ - newProp=pLastIsoRun->lastBase; - if(newProp==ON) - dirProps[position]=newProp; - } - else { - newProp=dirProp; - pLastIsoRun->lastBase=dirProp; - } - if(newProp<=R || newProp==AL) { - int32_t i; - uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp)); - for(i=pLastIsoRun->start; ilimit; i++) - if(position>bd->openings[i].position) - bd->openings[i].flags|=flag; - } - return TRUE; -} - -/* perform (X1)..(X9) ------------------------------------------------------- */ - -/* determine if the text is mixed-directional or single-directional */ -static UBiDiDirection -directionFromFlags(UBiDi *pBiDi) { - Flags flags=pBiDi->flags; - /* if the text contains AN and neutrals, then some neutrals may become RTL */ - if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { - return UBIDI_LTR; - } else if(!(flags&MASK_LTR)) { - return UBIDI_RTL; - } else { - return UBIDI_MIXED; - } -} - -/* - * Resolve the explicit levels as specified by explicit embedding codes. - * Recalculate the flags to have them reflect the real properties - * after taking the explicit embeddings into account. - * - * The BiDi algorithm is designed to result in the same behavior whether embedding - * levels are externally specified (from "styled text", supposedly the preferred - * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. - * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). - * However, in a real implementation, the removal of these codes and their index - * positions in the plain text is undesirable since it would result in - * reallocated, reindexed text. - * Instead, this implementation leaves the codes in there and just ignores them - * in the subsequent processing. - * In order to get the same reordering behavior, positions with a BN or a not-isolate - * explicit embedding code just get the same level assigned as the last "real" - * character. - * - * Some implementations, not this one, then overwrite some of these - * directionality properties at "real" same-level-run boundaries by - * L or R codes so that the resolution of weak types can be performed on the - * entire paragraph at once instead of having to parse it once more and - * perform that resolution on same-level-runs. - * This limits the scope of the implicit rules in effectively - * the same way as the run limits. - * - * Instead, this implementation does not modify these codes, except for - * paired brackets whose properties (ON) may be replaced by L or R. - * On one hand, the paragraph has to be scanned for same-level-runs, but - * on the other hand, this saves another loop to reset these codes, - * or saves making and modifying a copy of dirProps[]. - * - * - * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. - * - * - * Handling the stack of explicit levels (Xn): - * - * With the BiDi stack of explicit levels, as pushed with each - * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI, - * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. - * - * In order to have a correct push-pop semantics even in the case of overflows, - * overflow counters and a valid isolate counter are used as described in UAX#9 - * section 3.3.2 "Explicit Levels and Directions". - * - * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. - * - * Returns normally the direction; -1 if there was a memory shortage - * - */ -static UBiDiDirection -resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - const UChar *text=pBiDi->text; - - int32_t i=0, length=pBiDi->length; - Flags flags=pBiDi->flags; /* collect all directionalities in the text */ - DirProp dirProp; - UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); - UBiDiDirection direction; - pBiDi->isolateCount=0; - - if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } - - /* determine if the text is mixed-directional or single-directional */ - direction=directionFromFlags(pBiDi); - - /* we may not need to resolve any explicit levels */ - if((direction!=UBIDI_MIXED)) { - /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ - return direction; - } - if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { - /* inverse BiDi: mixed, but all characters are at the same embedding level */ - /* set all levels to the paragraph level */ - int32_t paraIndex, start, limit; - for(paraIndex=0; paraIndexparaCount; paraIndex++) { - if(paraIndex==0) - start=0; - else - start=pBiDi->paras[paraIndex-1].limit; - limit=pBiDi->paras[paraIndex].limit; - level= static_cast(pBiDi->paras[paraIndex].level); - for(i=start; iparaCount; paraIndex++) { - if(paraIndex==0) - start=0; - else - start=pBiDi->paras[paraIndex-1].limit; - limit=pBiDi->paras[paraIndex].limit; - level= static_cast(pBiDi->paras[paraIndex].level); - for(i=start; i=UBIDI_MAX_EXPLICIT_LEVEL - but we need one more entry as base */ - uint32_t stackLast=0; - int32_t overflowIsolateCount=0; - int32_t overflowEmbeddingCount=0; - int32_t validIsolateCount=0; - BracketData bracketData; - bracketInit(pBiDi, &bracketData); - stack[0]=level; /* initialize base entry to para level, no override, no isolate */ - - /* recalculate the flags */ - flags=0; - - for(i=0; i0 && stack[stackLast]pBiDi->isolateCount) - pBiDi->isolateCount=validIsolateCount; - embeddingLevel=newLevel; - /* we can increment stackLast without checking because newLevel - will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */ - stackLast++; - stack[stackLast]=embeddingLevel+ISOLATE; - bracketProcessLRI_RLI(&bracketData, embeddingLevel); - } else { - /* make it WS so that it is handled by adjustWSLevels() */ - dirProps[i]=WS; - overflowIsolateCount++; - } - break; - case PDI: - if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { - bracketProcessBoundary(&bracketData, lastCcPos, - previousLevel, embeddingLevel); - flags|=DIRPROP_FLAG_MULTI_RUNS; - } - /* (X6a) */ - if(overflowIsolateCount) { - overflowIsolateCount--; - /* make it WS so that it is handled by adjustWSLevels() */ - dirProps[i]=WS; - } - else if(validIsolateCount) { - flags|=DIRPROP_FLAG(PDI); - lastCcPos=i; - overflowEmbeddingCount=0; - while(stack[stackLast]paraLevel); - if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) - flags|=DIRPROP_FLAG(L); - /* again, determine if the text is mixed-directional or single-directional */ - pBiDi->flags=flags; - direction=directionFromFlags(pBiDi); - } - return direction; -} - -/* - * Use a pre-specified embedding levels array: - * - * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), - * ignore all explicit codes (X9), - * and check all the preset levels. - * - * Recalculate the flags to have them reflect the real properties - * after taking the explicit embeddings into account. - */ -static UBiDiDirection -checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - int32_t isolateCount=0; - - int32_t length=pBiDi->length; - Flags flags=0; /* collect all directionalities in the text */ - pBiDi->isolateCount=0; - - int32_t currentParaIndex = 0; - int32_t currentParaLimit = pBiDi->paras[0].limit; - int32_t currentParaLevel = pBiDi->paraLevel; - - for(int32_t i=0; ipBiDi->isolateCount) - pBiDi->isolateCount=isolateCount; - } - else if(dirProp==PDI) - isolateCount--; - else if(dirProp==B) - isolateCount=0; - - // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i); - if (pBiDi->defaultParaLevel != 0 && - i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) { - currentParaLevel = pBiDi->paras[++currentParaIndex].level; - currentParaLimit = pBiDi->paras[currentParaIndex].limit; - } - - UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE; - level &= ~UBIDI_LEVEL_OVERRIDE; - if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) { - if (level == 0) { - if (dirProp == B) { - // Paragraph separators are ok with explicit level 0. - // Prevents reordering of paragraphs. - } else { - // Treat explicit level 0 as a wildcard for the paragraph level. - // Avoid making the caller guess what the paragraph level would be. - level = (UBiDiLevel)currentParaLevel; - levels[i] = level | overrideFlag; - } - } else { - // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level - /* level out of bounds */ - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return UBIDI_LTR; - } - } - if (overrideFlag != 0) { - /* keep the override flag in levels[i] but adjust the flags */ - flags|=DIRPROP_FLAG_O(level); - } else { - /* set the flags */ - flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); - } - } - if(flags&MASK_EMBEDDING) - flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); - /* determine if the text is mixed-directional or single-directional */ - pBiDi->flags=flags; - return directionFromFlags(pBiDi); -} - -/****************************************************************** - The Properties state machine table -******************************************************************* - - All table cells are 8 bits: - bits 0..4: next state - bits 5..7: action to perform (if > 0) - - Cells may be of format "n" where n represents the next state - (except for the rightmost column). - Cells may also be of format "s(x,y)" where x represents an action - to perform and y represents the next state. - -******************************************************************* - Definitions and type for properties state table -******************************************************************* -*/ -#define IMPTABPROPS_COLUMNS 16 -#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) -#define GET_STATEPROPS(cell) ((cell)&0x1f) -#define GET_ACTIONPROPS(cell) ((cell)>>5) -#define s(action, newState) ((uint8_t)(newState+(action<<5))) - -static const uint8_t groupProp[] = /* dirProp regrouped */ -{ -/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ - 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 -}; -enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ - -/****************************************************************** - - PROPERTIES STATE TABLE - - In table impTabProps, - - the ON column regroups ON and WS, FSI, RLI, LRI and PDI - - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF - - the Res column is the reduced property assigned to a run - - Action 1: process current run1, init new run1 - 2: init new run2 - 3: process run1, process run2, init new run1 - 4: process run1, set run1=run2, init new run2 - - Notes: - 1) This table is used in resolveImplicitLevels(). - 2) This table triggers actions when there is a change in the Bidi - property of incoming characters (action 1). - 3) Most such property sequences are processed immediately (in - fact, passed to processPropertySeq(). - 4) However, numbers are assembled as one sequence. This means - that undefined situations (like CS following digits, until - it is known if the next char will be a digit) are held until - following chars define them. - Example: digits followed by CS, then comes another CS or ON; - the digits will be processed, then the CS assigned - as the start of an ON sequence (action 3). - 5) There are cases where more than one sequence must be - processed, for instance digits followed by CS followed by L: - the digits must be processed as one sequence, and the CS - must be processed as an ON sequence, all this before starting - assembling chars for the opening L sequence. - - -*/ -static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = -{ -/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ -/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, -/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, -/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, -/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, -/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, -/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, -/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, -/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, -/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, -/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, -/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, -/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, -/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, -/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, -/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, -/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, -/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, -/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, -/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, -/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, -/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, -/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, -/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, -/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } -}; - -/* we must undef macro s because the levels tables have a different - * structure (4 bits for action and 4 bits for next state. - */ -#undef s - -/****************************************************************** - The levels state machine tables -******************************************************************* - - All table cells are 8 bits: - bits 0..3: next state - bits 4..7: action to perform (if > 0) - - Cells may be of format "n" where n represents the next state - (except for the rightmost column). - Cells may also be of format "s(x,y)" where x represents an action - to perform and y represents the next state. - - This format limits each table to 16 states each and to 15 actions. - -******************************************************************* - Definitions and type for levels state tables -******************************************************************* -*/ -#define IMPTABLEVELS_COLUMNS (DirProp_B + 2) -#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) -#define GET_STATE(cell) ((cell)&0x0f) -#define GET_ACTION(cell) ((cell)>>4) -#define s(action, newState) ((uint8_t)(newState+(action<<4))) - -typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; -typedef uint8_t ImpAct[]; - -/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, - * instead of having a pair of ImpTab and a pair of ImpAct. - */ -typedef struct ImpTabPair { - const void * pImpTab[2]; - const void * pImpAct[2]; -} ImpTabPair; - -/****************************************************************** - - LEVELS STATE TABLES - - In all levels state tables, - - state 0 is the initial state - - the Res column is the increment to add to the text level - for this property sequence. - - The impAct arrays for each table of a pair map the local action - numbers of the table to the total list of actions. For instance, - action 2 in a given table corresponds to the action number which - appears in entry [2] of the impAct array for that table. - The first entry of all impAct arrays must be 0. - - Action 1: init conditional sequence - 2: prepend conditional sequence to current sequence - 3: set ON sequence to new level - 1 - 4: init EN/AN/ON sequence - 5: fix EN/AN/ON sequence followed by R - 6: set previous level sequence to level 2 - - Notes: - 1) These tables are used in processPropertySeq(). The input - is property sequences as determined by resolveImplicitLevels. - 2) Most such property sequences are processed immediately - (levels are assigned). - 3) However, some sequences cannot be assigned a final level till - one or more following sequences are received. For instance, - ON following an R sequence within an even-level paragraph. - If the following sequence is R, the ON sequence will be - assigned basic run level+1, and so will the R sequence. - 4) S is generally handled like ON, since its level will be fixed - to paragraph level in adjustWSLevels(). - -*/ - -static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, -/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, -/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, -/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 }, -/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 } -}; -static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, -/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, -/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, -/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, -/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } -}; -static const ImpAct impAct0 = {0,1,2,3,4}; -static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, - &impTabR_DEFAULT}, - {&impAct0, &impAct0}}; - -static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, -/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 }, -/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 }, -/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 }, -/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } -}; -static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, - &impTabR_DEFAULT}, - {&impAct0, &impAct0}}; - -static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = -/* In this table, EN/AN+ON sequences receive levels as if associated with R - until proven that there is L or sor/eor on both sides. AN is handled like EN. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, -/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, -/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, -/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, -/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, -/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } -}; -static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = -/* In this table, EN/AN+ON sequences receive levels as if associated with R - until proven that there is L on both sides. AN is handled like EN. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, -/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, -/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, -/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } -}; -static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { - {&impTabL_GROUP_NUMBERS_WITH_R, - &impTabR_GROUP_NUMBERS_WITH_R}, - {&impAct0, &impAct0}}; - - -static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = -/* This table is identical to the Default LTR table except that EN and AN are - handled like L. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, -/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, -/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, -/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, -/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } -}; -static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = -/* This table is identical to the Default RTL table except that EN and AN are - handled like L. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, -/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, -/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, -/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, -/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } -}; -static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { - {&impTabL_INVERSE_NUMBERS_AS_L, - &impTabR_INVERSE_NUMBERS_AS_L}, - {&impAct0, &impAct0}}; - -static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ -/* In this table, conditional sequences receive the lower possible level - until proven otherwise. -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, -/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, -/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, -/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, -/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, -/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, -/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } -}; -static const ImpAct impAct1 = {0,1,13,14}; -/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" - */ -static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { - {&impTabL_DEFAULT, - &impTabR_INVERSE_LIKE_DIRECT}, - {&impAct0, &impAct1}}; - -static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = -/* The case handled in this table is (visually): R EN L -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, -/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, -/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, -/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, -/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, -/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } -}; -static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = -/* The cases handled in this table are (visually): R EN L - R L AN L -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, -/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, -/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, -/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, -/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, -/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } -}; -static const ImpAct impAct2 = {0,1,2,5,6,7,8}; -static const ImpAct impAct3 = {0,1,9,10,11,12}; -static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { - {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, - &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, - {&impAct2, &impAct3}}; - -static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { - {&impTabL_NUMBERS_SPECIAL, - &impTabR_INVERSE_LIKE_DIRECT}, - {&impAct0, &impAct1}}; - -static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = -/* The case handled in this table is (visually): R EN L -*/ -{ -/* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, -/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, -/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, -/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } -}; -static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { - {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, - &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, - {&impAct2, &impAct3}}; - -#undef s - -typedef struct { - const ImpTab * pImpTab; /* level table pointer */ - const ImpAct * pImpAct; /* action map array */ - int32_t startON; /* start of ON sequence */ - int32_t startL2EN; /* start of level 2 sequence */ - int32_t lastStrongRTL; /* index of last found R or AL */ - int32_t state; /* current state */ - int32_t runStart; /* start position of the run */ - UBiDiLevel runLevel; /* run level before implicit solving */ -} LevState; - -/*------------------------------------------------------------------------*/ - -static void -addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) - /* param pos: position where to insert - param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER - */ -{ -#define FIRSTALLOC 10 - Point point; - InsertPoints * pInsertPoints=&(pBiDi->insertPoints); - - if (pInsertPoints->capacity == 0) - { - pInsertPoints->points=static_cast(uprv_malloc(sizeof(Point)*FIRSTALLOC)); - if (pInsertPoints->points == NULL) - { - pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - pInsertPoints->capacity=FIRSTALLOC; - } - if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ - { - Point * savePoints=pInsertPoints->points; - pInsertPoints->points=static_cast(uprv_realloc(pInsertPoints->points, - pInsertPoints->capacity*2*sizeof(Point))); - if (pInsertPoints->points == NULL) - { - pInsertPoints->points=savePoints; - pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - else pInsertPoints->capacity*=2; - } - point.pos=pos; - point.flag=flag; - pInsertPoints->points[pInsertPoints->size]=point; - pInsertPoints->size++; -#undef FIRSTALLOC -} - -static void -setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level) -{ - DirProp *dirProps=pBiDi->dirProps, dirProp; - UBiDiLevel *levels=pBiDi->levels; - int32_t isolateCount=0, k; - for(k=start; kpImpTab; - const ImpAct * pImpAct=pLevState->pImpAct; - UBiDiLevel * levels=pBiDi->levels; - UBiDiLevel level, addLevel; - InsertPoints * pInsertPoints; - int32_t start0, k; - - start0=start; /* save original start position */ - oldStateSeq=(uint8_t)pLevState->state; - cell=(*pImpTab)[oldStateSeq][_prop]; - pLevState->state=GET_STATE(cell); /* isolate the new state */ - actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ - addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; - - if(actionSeq) { - switch(actionSeq) { - case 1: /* init ON seq */ - pLevState->startON=start0; - break; - - case 2: /* prepend ON seq to current seq */ - start=pLevState->startON; - break; - - case 3: /* EN/AN after R+ON */ - level=pLevState->runLevel+1; - setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); - break; - - case 4: /* EN/AN before R for NUMBERS_SPECIAL */ - level=pLevState->runLevel+2; - setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); - break; - - case 5: /* L or S after possible relevant EN/AN */ - /* check if we had EN after R/AL */ - if (pLevState->startL2EN >= 0) { - addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); - } - pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ - /* check if we had any relevant EN/AN after R/AL */ - pInsertPoints=&(pBiDi->insertPoints); - if ((pInsertPoints->capacity == 0) || - (pInsertPoints->size <= pInsertPoints->confirmed)) - { - /* nothing, just clean up */ - pLevState->lastStrongRTL=-1; - /* check if we have a pending conditional segment */ - level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; - if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ - start=pLevState->startON; /* reset to basic run level */ - } - if (_prop == DirProp_S) /* add LRM before S */ - { - addPoint(pBiDi, start0, LRM_BEFORE); - pInsertPoints->confirmed=pInsertPoints->size; - } - break; - } - /* reset previous RTL cont to level for LTR text */ - for (k=pLevState->lastStrongRTL+1; kconfirmed=pInsertPoints->size; - pLevState->lastStrongRTL=-1; - if (_prop == DirProp_S) /* add LRM before S */ - { - addPoint(pBiDi, start0, LRM_BEFORE); - pInsertPoints->confirmed=pInsertPoints->size; - } - break; - - case 6: /* R/AL after possible relevant EN/AN */ - /* just clean up */ - pInsertPoints=&(pBiDi->insertPoints); - if (pInsertPoints->capacity > 0) - /* remove all non confirmed insert points */ - pInsertPoints->size=pInsertPoints->confirmed; - pLevState->startON=-1; - pLevState->startL2EN=-1; - pLevState->lastStrongRTL=limit - 1; - break; - - case 7: /* EN/AN after R/AL + possible cont */ - /* check for real AN */ - if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && - (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) - { - /* real AN */ - if (pLevState->startL2EN == -1) /* if no relevant EN already found */ - { - /* just note the righmost digit as a strong RTL */ - pLevState->lastStrongRTL=limit - 1; - break; - } - if (pLevState->startL2EN >= 0) /* after EN, no AN */ - { - addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); - pLevState->startL2EN=-2; - } - /* note AN */ - addPoint(pBiDi, start0, LRM_BEFORE); - break; - } - /* if first EN/AN after R/AL */ - if (pLevState->startL2EN == -1) { - pLevState->startL2EN=start0; - } - break; - - case 8: /* note location of latest R/AL */ - pLevState->lastStrongRTL=limit - 1; - pLevState->startON=-1; - break; - - case 9: /* L after R+ON/EN/AN */ - /* include possible adjacent number on the left */ - for (k=start0-1; k>=0 && !(levels[k]&1); k--); - if(k>=0) { - addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ - pInsertPoints=&(pBiDi->insertPoints); - pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ - } - pLevState->startON=start0; - break; - - case 10: /* AN after L */ - /* AN numbers between L text on both sides may be trouble. */ - /* tentatively bracket with LRMs; will be confirmed if followed by L */ - addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ - addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ - break; - - case 11: /* R after L+ON/EN/AN */ - /* false alert, infirm LRMs around previous AN */ - pInsertPoints=&(pBiDi->insertPoints); - pInsertPoints->size=pInsertPoints->confirmed; - if (_prop == DirProp_S) /* add RLM before S */ - { - addPoint(pBiDi, start0, RLM_BEFORE); - pInsertPoints->confirmed=pInsertPoints->size; - } - break; - - case 12: /* L after L+ON/AN */ - level=pLevState->runLevel + addLevel; - for(k=pLevState->startON; kinsertPoints); - pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ - pLevState->startON=start0; - break; - - case 13: /* L after L+ON+EN/AN/ON */ - level=pLevState->runLevel; - for(k=start0-1; k>=pLevState->startON; k--) { - if(levels[k]==level+3) { - while(levels[k]==level+3) { - levels[k--]-=2; - } - while(levels[k]==level) { - k--; - } - } - if(levels[k]==level+2) { - levels[k]=level; - continue; - } - levels[k]=level+1; - } - break; - - case 14: /* R after L+ON+EN/AN/ON */ - level=pLevState->runLevel+1; - for(k=start0-1; k>=pLevState->startON; k--) { - if(levels[k]>level) { - levels[k]-=2; - } - } - break; - - default: /* we should never get here */ - U_ASSERT(FALSE); - break; - } - } - if((addLevel) || (start < start0)) { - level=pLevState->runLevel + addLevel; - if(start>=pLevState->runStart) { - for(k=start; kprologue; - int32_t length=pBiDi->proLength; - int32_t i; - UChar32 uchar; - DirProp dirProp; - for(i=length; i>0; ) { - /* i is decremented by U16_PREV */ - U16_PREV(text, 0, i, uchar); - dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); - if(dirProp==L) { - return DirProp_L; - } - if(dirProp==R || dirProp==AL) { - return DirProp_R; - } - if(dirProp==B) { - return DirProp_ON; - } - } - return DirProp_ON; -} - -/** - * Returns the directionality of the first strong character, or digit, in the epilogue, if any. - * Requires epilogue!=null. - */ -static DirProp -firstL_R_AL_EN_AN(UBiDi *pBiDi) { - const UChar *text=pBiDi->epilogue; - int32_t length=pBiDi->epiLength; - int32_t i; - UChar32 uchar; - DirProp dirProp; - for(i=0; idirProps; - DirProp dirProp; - LevState levState; - int32_t i, start1, start2; - uint16_t oldStateImp, stateImp, actionImp; - uint8_t gprop, resProp, cell; - UBool inverseRTL; - DirProp nextStrongProp=R; - int32_t nextStrongPos=-1; - - /* check for RTL inverse BiDi mode */ - /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to - * loop on the text characters from end to start. - * This would need a different properties state table (at least different - * actions) and different levels state tables (maybe very similar to the - * LTR corresponding ones. - */ - inverseRTL=(UBool) - ((startlastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && - (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || - pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); - - /* initialize for property and levels state tables */ - levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ - levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ - levState.runStart=start; - levState.runLevel=pBiDi->levels[start]; - levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; - levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; - if(start==0 && pBiDi->proLength>0) { - DirProp lastStrong=lastL_R_AL(pBiDi); - if(lastStrong!=DirProp_ON) { - sor=lastStrong; - } - } - /* The isolates[] entries contain enough information to - resume the bidi algorithm in the same state as it was - when it was interrupted by an isolate sequence. */ - if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) { - levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON; - start1=pBiDi->isolates[pBiDi->isolateCount].start1; - stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; - levState.state=pBiDi->isolates[pBiDi->isolateCount].state; - pBiDi->isolateCount--; - } else { - levState.startON=-1; - start1=start; - if(dirProps[start]==NSM) - stateImp = 1 + sor; - else - stateImp=0; - levState.state=0; - processPropertySeq(pBiDi, &levState, sor, start, start); - } - start2=start; /* to make Java compiler happy */ - - for(i=start; i<=limit; i++) { - if(i>=limit) { - int32_t k; - for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--); - dirProp=dirProps[k]; - if(dirProp==LRI || dirProp==RLI) - break; /* no forced closing for sequence ending with LRI/RLI */ - gprop=eor; - } else { - DirProp prop, prop1; - prop=dirProps[i]; - if(prop==B) { - pBiDi->isolateCount=-1; /* current isolates stack entry == none */ - } - if(inverseRTL) { - if(prop==AL) { - /* AL before EN does not make it AN */ - prop=R; - } else if(prop==EN) { - if(nextStrongPos<=i) { - /* look for next strong char (L/R/AL) */ - int32_t j; - nextStrongProp=R; /* set default */ - nextStrongPos=limit; - for(j=i+1; jlength && pBiDi->epiLength>0) { - DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); - if(firstStrong!=DirProp_ON) { - eor=firstStrong; - } - } - - /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */ - for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--); - dirProp=dirProps[i]; - if((dirProp==LRI || dirProp==RLI) && limitlength) { - pBiDi->isolateCount++; - pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; - pBiDi->isolates[pBiDi->isolateCount].state=levState.state; - pBiDi->isolates[pBiDi->isolateCount].start1=start1; - pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON; - } - else - processPropertySeq(pBiDi, &levState, eor, limit, limit); -} - -/* perform (L1) and (X9) ---------------------------------------------------- */ - -/* - * Reset the embedding levels for some non-graphic characters (L1). - * This function also sets appropriate levels for BN, and - * explicit embedding types that are supposed to have been removed - * from the paragraph in (X9). - */ -static void -adjustWSLevels(UBiDi *pBiDi) { - const DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - int32_t i; - - if(pBiDi->flags&MASK_WS) { - UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; - Flags flag; - - i=pBiDi->trailingWSStart; - while(i>0) { - /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ - while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) { - if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { - levels[i]=0; - } else { - levels[i]=GET_PARALEVEL(pBiDi, i); - } - } - - /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ - /* here, i+1 is guaranteed to be 0) { - flag=DIRPROP_FLAG(dirProps[--i]); - if(flag&MASK_BN_EXPLICIT) { - levels[i]=levels[i+1]; - } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { - levels[i]=0; - break; - } else if(flag&MASK_B_S) { - levels[i]=GET_PARALEVEL(pBiDi, i); - break; - } - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_setContext(UBiDi *pBiDi, - const UChar *prologue, int32_t proLength, - const UChar *epilogue, int32_t epiLength, - UErrorCode *pErrorCode) { - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(pBiDi==NULL || proLength<-1 || epiLength<-1 || - (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(proLength==-1) { - pBiDi->proLength=u_strlen(prologue); - } else { - pBiDi->proLength=proLength; - } - if(epiLength==-1) { - pBiDi->epiLength=u_strlen(epilogue); - } else { - pBiDi->epiLength=epiLength; - } - pBiDi->prologue=prologue; - pBiDi->epilogue=epilogue; -} - -static void -setParaSuccess(UBiDi *pBiDi) { - pBiDi->proLength=0; /* forget the last context */ - pBiDi->epiLength=0; - pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ -} - -#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) -#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) - -static void -setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, - UBiDiLevel paraLevel, UErrorCode *pErrorCode) { - int32_t *runsOnlyMemory = NULL; - int32_t *visualMap; - UChar *visualText; - int32_t saveLength, saveTrailingWSStart; - const UBiDiLevel *levels; - UBiDiLevel *saveLevels; - UBiDiDirection saveDirection; - UBool saveMayAllocateText; - Run *runs; - int32_t visualLength, i, j, visualStart, logicalStart, - runCount, runLength, addedRuns, insertRemove, - start, limit, step, indexOddBit, logicalPos, - index0, index1; - uint32_t saveOptions; - - pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; - if(length==0) { - ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); - goto cleanup3; - } - /* obtain memory for mapping table and visual text */ - runsOnlyMemory=static_cast(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)))); - if(runsOnlyMemory==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - goto cleanup3; - } - visualMap=runsOnlyMemory; - visualText=(UChar *)&visualMap[length]; - saveLevels=(UBiDiLevel *)&visualText[length]; - saveOptions=pBiDi->reorderingOptions; - if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { - pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; - pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; - } - paraLevel&=1; /* accept only 0 or 1 */ - ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - goto cleanup3; - } - /* we cannot access directly pBiDi->levels since it is not yet set if - * direction is not MIXED - */ - levels=ubidi_getLevels(pBiDi, pErrorCode); - uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel)); - saveTrailingWSStart=pBiDi->trailingWSStart; - saveLength=pBiDi->length; - saveDirection=pBiDi->direction; - - /* FOOD FOR THOUGHT: instead of writing the visual text, we could use - * the visual map and the dirProps array to drive the second call - * to ubidi_setPara (but must make provision for possible removal of - * BiDi controls. Alternatively, only use the dirProps array via - * customized classifier callback. - */ - visualLength=ubidi_writeReordered(pBiDi, visualText, length, - UBIDI_DO_MIRRORING, pErrorCode); - ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - goto cleanup2; - } - pBiDi->reorderingOptions=saveOptions; - - pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; - paraLevel^=1; - /* Because what we did with reorderingOptions, visualText may be shorter - * than the original text. But we don't want the levels memory to be - * reallocated shorter than the original length, since we need to restore - * the levels as after the first call to ubidi_setpara() before returning. - * We will force mayAllocateText to FALSE before the second call to - * ubidi_setpara(), and will restore it afterwards. - */ - saveMayAllocateText=pBiDi->mayAllocateText; - pBiDi->mayAllocateText=FALSE; - ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); - pBiDi->mayAllocateText=saveMayAllocateText; - ubidi_getRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - goto cleanup1; - } - /* check if some runs must be split, count how many splits */ - addedRuns=0; - runCount=pBiDi->runCount; - runs=pBiDi->runs; - visualStart=0; - for(i=0; irunsMemory[0]=runs[0]; - } - runs=pBiDi->runs=pBiDi->runsMemory; - pBiDi->runCount+=addedRuns; - } else { - goto cleanup1; - } - } - /* split runs which are not consecutive in source text */ - for(i=runCount-1; i>=0; i--) { - runLength= i==0 ? runs[0].visualLimit : - runs[i].visualLimit-runs[i-1].visualLimit; - logicalStart=runs[i].logicalStart; - indexOddBit=GET_ODD_BIT(logicalStart); - logicalStart=GET_INDEX(logicalStart); - if(runLength<2) { - if(addedRuns) { - runs[i+addedRuns]=runs[i]; - } - logicalPos=visualMap[logicalStart]; - runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, - saveLevels[logicalPos]^indexOddBit); - continue; - } - if(indexOddBit) { - start=logicalStart; - limit=logicalStart+runLength-1; - step=1; - } else { - start=logicalStart+runLength-1; - limit=logicalStart; - step=-1; - } - for(j=start; j!=limit; j+=step) { - index0=visualMap[j]; - index1=visualMap[j+step]; - if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { - logicalPos=BIDI_MIN(visualMap[start], index0); - runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, - saveLevels[logicalPos]^indexOddBit); - runs[i+addedRuns].visualLimit=runs[i].visualLimit; - runs[i].visualLimit-=BIDI_ABS(j-start)+1; - insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); - runs[i+addedRuns].insertRemove=insertRemove; - runs[i].insertRemove&=~insertRemove; - start=j+step; - addedRuns--; - } - } - if(addedRuns) { - runs[i+addedRuns]=runs[i]; - } - logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); - runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, - saveLevels[logicalPos]^indexOddBit); - } - - cleanup1: - /* restore initial paraLevel */ - pBiDi->paraLevel^=1; - cleanup2: - /* restore real text */ - pBiDi->text=text; - pBiDi->length=saveLength; - pBiDi->originalLength=length; - pBiDi->direction=saveDirection; - /* the saved levels should never excess levelsSize, but we check anyway */ - if(saveLength>pBiDi->levelsSize) { - saveLength=pBiDi->levelsSize; - } - uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel)); - pBiDi->trailingWSStart=saveTrailingWSStart; - if(pBiDi->runCount>1) { - pBiDi->direction=UBIDI_MIXED; - } - cleanup3: - /* free memory for mapping table and visual text */ - uprv_free(runsOnlyMemory); - - pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; -} - -/* ubidi_setPara ------------------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, - UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, - UErrorCode *pErrorCode) { - UBiDiDirection direction; - DirProp *dirProps; - - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(pBiDi==NULL || text==NULL || length<-1 || - (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevelreorderingMode==UBIDI_REORDER_RUNS_ONLY) { - setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); - return; - } - - /* initialize the UBiDi structure */ - pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ - pBiDi->text=text; - pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; - pBiDi->paraLevel=paraLevel; - pBiDi->direction=(UBiDiDirection)(paraLevel&1); - pBiDi->paraCount=1; - - pBiDi->dirProps=NULL; - pBiDi->levels=NULL; - pBiDi->runs=NULL; - pBiDi->insertPoints.size=0; /* clean up from last call */ - pBiDi->insertPoints.confirmed=0; /* clean up from last call */ - - /* - * Save the original paraLevel if contextual; otherwise, set to 0. - */ - pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); - - if(length==0) { - /* - * For an empty paragraph, create a UBiDi object with the paraLevel and - * the flags and the direction set but without allocating zero-length arrays. - * There is nothing more to do. - */ - if(IS_DEFAULT_LEVEL(paraLevel)) { - pBiDi->paraLevel&=1; - pBiDi->defaultParaLevel=0; - } - pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); - pBiDi->runCount=0; - pBiDi->paraCount=0; - setParaSuccess(pBiDi); /* mark successful setPara */ - return; - } - - pBiDi->runCount=-1; - - /* allocate paras memory */ - if(pBiDi->parasMemory) - pBiDi->paras=pBiDi->parasMemory; - else - pBiDi->paras=pBiDi->simpleParas; - - /* - * Get the directional properties, - * the flags bit-set, and - * determine the paragraph level if necessary. - */ - if(getDirPropsMemory(pBiDi, length)) { - pBiDi->dirProps=pBiDi->dirPropsMemory; - if(!getDirProps(pBiDi)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - dirProps=pBiDi->dirProps; - /* the processed length may have changed if UBIDI_OPTION_STREAMING */ - length= pBiDi->length; - pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ - - /* are explicit levels specified? */ - if(embeddingLevels==NULL) { - /* no: determine explicit levels according to the (Xn) rules */\ - if(getLevelsMemory(pBiDi, length)) { - pBiDi->levels=pBiDi->levelsMemory; - direction=resolveExplicitLevels(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } else { - /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ - pBiDi->levels=embeddingLevels; - direction=checkExplicitLevels(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - /* allocate isolate memory */ - if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT) - pBiDi->isolates=pBiDi->simpleIsolates; - else - if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize) - pBiDi->isolates=pBiDi->isolatesMemory; - else { - if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { - pBiDi->isolates=pBiDi->isolatesMemory; - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } - pBiDi->isolateCount=-1; /* current isolates stack entry == none */ - - /* - * The steps after (X9) in the UBiDi algorithm are performed only if - * the paragraph text has mixed directionality! - */ - pBiDi->direction=direction; - switch(direction) { - case UBIDI_LTR: - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pBiDi->trailingWSStart=0; - break; - case UBIDI_RTL: - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pBiDi->trailingWSStart=0; - break; - default: - /* - * Choose the right implicit state table - */ - switch(pBiDi->reorderingMode) { - case UBIDI_REORDER_DEFAULT: - pBiDi->pImpTabPair=&impTab_DEFAULT; - break; - case UBIDI_REORDER_NUMBERS_SPECIAL: - pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; - break; - case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: - pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; - break; - case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: - pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; - break; - case UBIDI_REORDER_INVERSE_LIKE_DIRECT: - if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { - pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; - } else { - pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; - } - break; - case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: - if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { - pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; - } else { - pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; - } - break; - default: - /* we should never get here */ - U_ASSERT(FALSE); - break; - } - /* - * If there are no external levels specified and there - * are no significant explicit level codes in the text, - * then we can treat the entire paragraph as one run. - * Otherwise, we need to perform the following rules on runs of - * the text with the same embedding levels. (X10) - * "Significant" explicit level codes are ones that actually - * affect non-BN characters. - * Examples for "insignificant" ones are empty embeddings - * LRE-PDF, LRE-RLE-PDF-PDF, etc. - */ - if(embeddingLevels==NULL && pBiDi->paraCount<=1 && - !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { - resolveImplicitLevels(pBiDi, 0, length, - GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), - GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); - } else { - /* sor, eor: start and end types of same-level-run */ - UBiDiLevel *levels=pBiDi->levels; - int32_t start, limit=0; - UBiDiLevel level, nextLevel; - DirProp sor, eor; - - /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ - level=GET_PARALEVEL(pBiDi, 0); - nextLevel=levels[0]; - if(level0) && (dirProps[start-1]==B)) { - /* except if this is a new paragraph, then set sor = para level */ - sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); - } else { - sor=eor; - } - - /* search for the limit of this run */ - while((++limitinsertPoints.errorCode)) - { - *pErrorCode=pBiDi->insertPoints.errorCode; - return; - } - /* reset the embedding levels for some non-graphic characters (L1), (X9) */ - adjustWSLevels(pBiDi); - break; - } - /* add RLM for inverse Bidi with contextual orientation resolving - * to RTL which would not round-trip otherwise - */ - if((pBiDi->defaultParaLevel>0) && - (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && - ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || - (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { - int32_t i, j, start, last; - UBiDiLevel level; - DirProp dirProp; - for(i=0; iparaCount; i++) { - last=(pBiDi->paras[i].limit)-1; - level= static_cast(pBiDi->paras[i].level); - if(level==0) - continue; /* LTR paragraph */ - start= i==0 ? 0 : pBiDi->paras[i-1].limit; - for(j=last; j>=start; j--) { - dirProp=dirProps[j]; - if(dirProp==L) { - if(jreorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { - pBiDi->resultLength -= pBiDi->controlCount; - } else { - pBiDi->resultLength += pBiDi->insertPoints.size; - } - setParaSuccess(pBiDi); /* mark successful setPara */ -} - -U_CAPI void U_EXPORT2 -ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { - if(pBiDi!=NULL) { - pBiDi->orderParagraphsLTR=orderParagraphsLTR; - } -} - -U_CAPI UBool U_EXPORT2 -ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->orderParagraphsLTR; - } else { - return FALSE; - } -} - -U_CAPI UBiDiDirection U_EXPORT2 -ubidi_getDirection(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->direction; - } else { - return UBIDI_LTR; - } -} - -U_CAPI const UChar * U_EXPORT2 -ubidi_getText(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->text; - } else { - return NULL; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getLength(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->originalLength; - } else { - return 0; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getProcessedLength(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->length; - } else { - return 0; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getResultLength(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->resultLength; - } else { - return 0; - } -} - -/* paragraphs API functions ------------------------------------------------- */ - -U_CAPI UBiDiLevel U_EXPORT2 -ubidi_getParaLevel(const UBiDi *pBiDi) { - if(IS_VALID_PARA_OR_LINE(pBiDi)) { - return pBiDi->paraLevel; - } else { - return 0; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_countParagraphs(UBiDi *pBiDi) { - if(!IS_VALID_PARA_OR_LINE(pBiDi)) { - return 0; - } else { - return pBiDi->paraCount; - } -} - -U_CAPI void U_EXPORT2 -ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, - int32_t *pParaStart, int32_t *pParaLimit, - UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { - int32_t paraStart; - - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); - RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); - - pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ - if(paraIndex) { - paraStart=pBiDi->paras[paraIndex-1].limit; - } else { - paraStart=0; - } - if(pParaStart!=NULL) { - *pParaStart=paraStart; - } - if(pParaLimit!=NULL) { - *pParaLimit=pBiDi->paras[paraIndex].limit; - } - if(pParaLevel!=NULL) { - *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, - int32_t *pParaStart, int32_t *pParaLimit, - UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { - int32_t paraIndex; - - /* check the argument values */ - /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ - RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); - - for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); - ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); - return paraIndex; -} - -U_CAPI void U_EXPORT2 -ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, - const void *newContext, UBiDiClassCallback **oldFn, - const void **oldContext, UErrorCode *pErrorCode) -{ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(pBiDi==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if( oldFn ) - { - *oldFn = pBiDi->fnClassCallback; - } - if( oldContext ) - { - *oldContext = pBiDi->coClassCallback; - } - pBiDi->fnClassCallback = newFn; - pBiDi->coClassCallback = newContext; -} - -U_CAPI void U_EXPORT2 -ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) -{ - if(pBiDi==NULL) { - return; - } - if( fn ) - { - *fn = pBiDi->fnClassCallback; - } - if( context ) - { - *context = pBiDi->coClassCallback; - } -} - -U_CAPI UCharDirection U_EXPORT2 -ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) -{ - UCharDirection dir; - - if( pBiDi->fnClassCallback == NULL || - (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) - { - dir = ubidi_getClass(c); - } - if(dir >= U_CHAR_DIRECTION_COUNT) { - dir = (UCharDirection)ON; - } - return dir; -} diff --git a/deps/node/deps/icu-small/source/common/ubidi_props.cpp b/deps/node/deps/icu-small/source/common/ubidi_props.cpp deleted file mode 100644 index 4141c219..00000000 --- a/deps/node/deps/icu-small/source/common/ubidi_props.cpp +++ /dev/null @@ -1,254 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ubidi_props.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004dec30 -* created by: Markus W. Scherer -* -* Low-level Unicode bidi/shaping properties access. -*/ - -#include "unicode/utypes.h" -#include "unicode/uset.h" -#include "unicode/udata.h" /* UDataInfo */ -#include "ucmndata.h" /* DataHeader */ -#include "udatamem.h" -#include "uassert.h" -#include "cmemory.h" -#include "utrie2.h" -#include "ubidi_props.h" -#include "ucln_cmn.h" - -struct UBiDiProps { - UDataMemory *mem; - const int32_t *indexes; - const uint32_t *mirrors; - const uint8_t *jgArray; - const uint8_t *jgArray2; - - UTrie2 trie; - uint8_t formatVersion[4]; -}; - -/* ubidi_props_data.h is machine-generated by genbidi --csource */ -#define INCLUDED_FROM_UBIDI_PROPS_C -#include "ubidi_props_data.h" - -/* set of property starts for UnicodeSet ------------------------------------ */ - -static UBool U_CALLCONV -_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - (void)end; - (void)value; - /* add the start code point to the USet */ - const USetAdder *sa=(const USetAdder *)context; - sa->add(sa->set, start); - return TRUE; -} - -U_CFUNC void -ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { - int32_t i, length; - UChar32 c, start, limit; - - const uint8_t *jgArray; - uint8_t prev, jg; - - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* add the start code point of each same-value range of the trie */ - utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); - - /* add the code points from the bidi mirroring table */ - length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; - for(i=0; iaddRange(sa->set, c, c+1); - } - - /* add the code points from the Joining_Group array where the value changes */ - start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; - limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; - jgArray=ubidi_props_singleton.jgArray; - for(;;) { - prev=0; - while(startadd(sa->set, start); - prev=jg; - } - ++start; - } - if(prev!=0) { - /* add the limit code point if the last value was not 0 (it is now start==limit) */ - sa->add(sa->set, limit); - } - if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) { - /* switch to the second Joining_Group range */ - start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; - limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; - jgArray=ubidi_props_singleton.jgArray2; - } else { - break; - } - } - - /* add code points with hardcoded properties, plus the ones following them */ - - /* (none right now) */ -} - -/* property access functions ------------------------------------------------ */ - -U_CFUNC int32_t -ubidi_getMaxValue(UProperty which) { - int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX]; - switch(which) { - case UCHAR_BIDI_CLASS: - return (max&UBIDI_CLASS_MASK); - case UCHAR_JOINING_GROUP: - return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT; - case UCHAR_JOINING_TYPE: - return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT; - case UCHAR_BIDI_PAIRED_BRACKET_TYPE: - return (max&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT; - default: - return -1; /* undefined */ - } -} - -U_CAPI UCharDirection -ubidi_getClass(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); - return (UCharDirection)UBIDI_GET_CLASS(props); -} - -U_CFUNC UBool -ubidi_isMirrored(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); - return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); -} - -static UChar32 -getMirror(UChar32 c, uint16_t props) { - int32_t delta=UBIDI_GET_MIRROR_DELTA(props); - if(delta!=UBIDI_ESC_MIRROR_DELTA) { - return c+delta; - } else { - /* look for mirror code point in the mirrors[] table */ - const uint32_t *mirrors; - uint32_t m; - int32_t i, length; - UChar32 c2; - - mirrors=ubidi_props_singleton.mirrors; - length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; - - /* linear search */ - for(i=0; i>UBIDI_JT_SHIFT); -} - -U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(UChar32 c) { - UChar32 start, limit; - - start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; - limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; - if(start<=c && c>UBIDI_BPT_SHIFT); -} - -U_CFUNC UChar32 -ubidi_getPairedBracket(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); - if((props&UBIDI_BPT_MASK)==0) { - return c; - } else { - return getMirror(c, props); - } -} - -/* public API (see uchar.h) ------------------------------------------------- */ - -U_CFUNC UCharDirection -u_charDirection(UChar32 c) { - return ubidi_getClass(c); -} - -U_CFUNC UBool -u_isMirrored(UChar32 c) { - return ubidi_isMirrored(c); -} - -U_CFUNC UChar32 -u_charMirror(UChar32 c) { - return ubidi_getMirror(c); -} - -U_STABLE UChar32 U_EXPORT2 -u_getBidiPairedBracket(UChar32 c) { - return ubidi_getPairedBracket(c); -} diff --git a/deps/node/deps/icu-small/source/common/ubidi_props.h b/deps/node/deps/icu-small/source/common/ubidi_props.h deleted file mode 100644 index 698ee9c5..00000000 --- a/deps/node/deps/icu-small/source/common/ubidi_props.h +++ /dev/null @@ -1,148 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ubidi_props.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004dec30 -* created by: Markus W. Scherer -* -* Low-level Unicode bidi/shaping properties access. -*/ - -#ifndef __UBIDI_PROPS_H__ -#define __UBIDI_PROPS_H__ - -#include "unicode/utypes.h" -#include "unicode/uset.h" -#include "putilimp.h" -#include "uset_imp.h" -#include "udataswp.h" - -U_CDECL_BEGIN - -/* library API -------------------------------------------------------------- */ - -U_CFUNC void -ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); - -/* property access functions */ - -U_CFUNC int32_t -ubidi_getMaxValue(UProperty which); - -U_CAPI UCharDirection -ubidi_getClass(UChar32 c); - -U_CFUNC UBool -ubidi_isMirrored(UChar32 c); - -U_CFUNC UChar32 -ubidi_getMirror(UChar32 c); - -U_CFUNC UBool -ubidi_isBidiControl(UChar32 c); - -U_CFUNC UBool -ubidi_isJoinControl(UChar32 c); - -U_CFUNC UJoiningType -ubidi_getJoiningType(UChar32 c); - -U_CFUNC UJoiningGroup -ubidi_getJoiningGroup(UChar32 c); - -U_CFUNC UBidiPairedBracketType -ubidi_getPairedBracketType(UChar32 c); - -U_CFUNC UChar32 -ubidi_getPairedBracket(UChar32 c); - -/* file definitions --------------------------------------------------------- */ - -#define UBIDI_DATA_NAME "ubidi" -#define UBIDI_DATA_TYPE "icu" - -/* format "BiDi" */ -#define UBIDI_FMT_0 0x42 -#define UBIDI_FMT_1 0x69 -#define UBIDI_FMT_2 0x44 -#define UBIDI_FMT_3 0x69 - -/* indexes into indexes[] */ -enum { - UBIDI_IX_INDEX_TOP, - UBIDI_IX_LENGTH, - UBIDI_IX_TRIE_SIZE, - UBIDI_IX_MIRROR_LENGTH, - - UBIDI_IX_JG_START, - UBIDI_IX_JG_LIMIT, - UBIDI_IX_JG_START2, /* new in format version 2.2, ICU 54 */ - UBIDI_IX_JG_LIMIT2, - - UBIDI_MAX_VALUES_INDEX=15, - UBIDI_IX_TOP=16 -}; - -/* definitions for 16-bit bidi/shaping properties word ---------------------- */ - -enum { - /* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */ - UBIDI_JT_SHIFT=5, /* joining type: 3 bits (7..5) */ - - UBIDI_BPT_SHIFT=8, /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */ - - UBIDI_JOIN_CONTROL_SHIFT=10, - UBIDI_BIDI_CONTROL_SHIFT=11, - - UBIDI_IS_MIRRORED_SHIFT=12, /* 'is mirrored' */ - UBIDI_MIRROR_DELTA_SHIFT=13, /* bidi mirroring delta: 3 bits (15..13) */ - - UBIDI_MAX_JG_SHIFT=16 /* max JG value in indexes[UBIDI_MAX_VALUES_INDEX] bits 23..16 */ -}; - -#define UBIDI_CLASS_MASK 0x0000001f -#define UBIDI_JT_MASK 0x000000e0 -#define UBIDI_BPT_MASK 0x00000300 - -#define UBIDI_MAX_JG_MASK 0x00ff0000 - -#define UBIDI_GET_CLASS(props) ((props)&UBIDI_CLASS_MASK) -#define UBIDI_GET_FLAG(props, shift) (((props)>>(shift))&1) - -#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC -# define UBIDI_GET_MIRROR_DELTA(props) ((int16_t)(props)>>UBIDI_MIRROR_DELTA_SHIFT) -#else -# define UBIDI_GET_MIRROR_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UBIDI_MIRROR_DELTA_SHIFT)|0xe000) : ((props)>>UBIDI_MIRROR_DELTA_SHIFT)) -#endif - -enum { - UBIDI_ESC_MIRROR_DELTA=-4, - UBIDI_MIN_MIRROR_DELTA=-3, - UBIDI_MAX_MIRROR_DELTA=3 -}; - -/* definitions for 32-bit mirror table entry -------------------------------- */ - -enum { - /* the source Unicode code point takes 21 bits (20..0) */ - UBIDI_MIRROR_INDEX_SHIFT=21, - UBIDI_MAX_MIRROR_INDEX=0x7ff -}; - -#define UBIDI_GET_MIRROR_CODE_POINT(m) (UChar32)((m)&0x1fffff) - -#define UBIDI_GET_MIRROR_INDEX(m) ((m)>>UBIDI_MIRROR_INDEX_SHIFT) - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/ubidi_props_data.h b/deps/node/deps/icu-small/source/common/ubidi_props_data.h deleted file mode 100644 index 55e331b6..00000000 --- a/deps/node/deps/icu-small/source/common/ubidi_props_data.h +++ /dev/null @@ -1,894 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// Copyright (C) 1999-2016, International Business Machines -// Corporation and others. All Rights Reserved. -// -// file name: ubidi_props_data.h -// -// machine-generated by: icu/tools/unicode/c/genprops/bidipropsbuilder.cpp - - -#ifdef INCLUDED_FROM_UBIDI_PROPS_C - -static const UVersionInfo ubidi_props_dataVersion={0xb,0,0,0}; - -static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x647c,0x5e98,0x28,0x620,0x8c0,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6502b6}; - -static const uint16_t ubidi_props_trieIndex[12100]={ -0x36f,0x377,0x37f,0x387,0x39f,0x3a7,0x3af,0x3b7,0x38f,0x397,0x38f,0x397,0x38f,0x397,0x38f,0x397, -0x38f,0x397,0x38f,0x397,0x3bd,0x3c5,0x3cd,0x3d5,0x3dd,0x3e5,0x3e1,0x3e9,0x3f1,0x3f9,0x3f4,0x3fc, -0x38f,0x397,0x38f,0x397,0x404,0x40c,0x38f,0x397,0x38f,0x397,0x38f,0x397,0x412,0x41a,0x422,0x42a, -0x432,0x43a,0x442,0x44a,0x450,0x458,0x460,0x468,0x470,0x478,0x47e,0x486,0x48e,0x496,0x49e,0x4a6, -0x4b2,0x4ae,0x4ba,0x4c2,0x424,0x4d2,0x4da,0x4ca,0x4e2,0x4e4,0x4ec,0x4f4,0x4fc,0x4fd,0x505,0x50d, -0x515,0x4fd,0x51d,0x522,0x515,0x4fd,0x52a,0x532,0x4fc,0x53a,0x542,0x4f4,0x547,0x38f,0x54f,0x553, -0x55b,0x55d,0x565,0x56d,0x4fc,0x575,0x57d,0x4f4,0x406,0x581,0x505,0x4f4,0x38f,0x38f,0x589,0x38f, -0x38f,0x58f,0x597,0x38f,0x38f,0x59b,0x5a3,0x38f,0x5a7,0x5ae,0x38f,0x5b6,0x5be,0x5c5,0x546,0x38f, -0x38f,0x5cd,0x5d5,0x5dd,0x5e5,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x5ed,0x38f,0x5f5,0x38f,0x38f,0x38f, -0x5fd,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x605,0x38f,0x38f,0x38f,0x60d,0x60d,0x509,0x509,0x38f,0x613,0x61b,0x5f5, -0x631,0x623,0x623,0x639,0x640,0x629,0x38f,0x38f,0x38f,0x648,0x650,0x38f,0x38f,0x38f,0x652,0x65a, -0x662,0x38f,0x669,0x671,0x38f,0x679,0x38f,0x38f,0x539,0x681,0x547,0x689,0x406,0x691,0x38f,0x698, -0x38f,0x69d,0x38f,0x38f,0x38f,0x38f,0x6a3,0x6ab,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x3dd,0x6b3, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x6bb,0x6c3,0x6c7, -0x6df,0x6e5,0x6cf,0x6d7,0x6ed,0x6f5,0x6f9,0x5c8,0x701,0x709,0x711,0x38f,0x719,0x65a,0x65a,0x65a, -0x729,0x731,0x739,0x741,0x746,0x74e,0x756,0x721,0x75e,0x766,0x38f,0x76c,0x773,0x65a,0x65a,0x65a, -0x65a,0x573,0x779,0x65a,0x781,0x38f,0x38f,0x657,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, -0x65a,0x65a,0x65a,0x65a,0x65a,0x789,0x65a,0x65a,0x65a,0x65a,0x65a,0x78f,0x65a,0x65a,0x797,0x79f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x65a,0x65a,0x65a,0x65a,0x7af,0x7b7,0x7bf,0x7a7, -0x7cf,0x7d7,0x7df,0x7e6,0x7ed,0x7f5,0x7f9,0x7c7,0x65a,0x65a,0x65a,0x801,0x807,0x65a,0x80d,0x810, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x818,0x38f,0x38f,0x38f,0x820,0x38f,0x38f,0x38f,0x3dd, -0x828,0x830,0x835,0x38f,0x83d,0x65a,0x65a,0x65d,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x844,0x84a, -0x85a,0x852,0x38f,0x38f,0x862,0x5fd,0x38f,0x3b6,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x65a,0x81f, -0x3c4,0x38f,0x839,0x86a,0x38f,0x872,0x87a,0x38f,0x38f,0x38f,0x38f,0x87e,0x38f,0x38f,0x652,0x3b5, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x65a,0x65a, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x839,0x65a,0x573,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x885,0x38f,0x38f,0x88a,0x55d,0x38f,0x38f,0x5a9,0x65a,0x651,0x38f,0x38f,0x892,0x38f,0x38f,0x38f, -0x89a,0x8a1,0x623,0x8a9,0x38f,0x38f,0x57f,0x8b1,0x38f,0x8b9,0x8c0,0x38f,0x4e2,0x8c5,0x38f,0x4fb, -0x38f,0x8cd,0x8d5,0x4fd,0x38f,0x8d9,0x4fc,0x8e1,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x8e8, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x8fc,0x8f0,0x8f4,0x48e,0x48e,0x48e,0x48e,0x48e, -0x48e,0x48e,0x48e,0x48e,0x48e,0x48e,0x48e,0x48e,0x48e,0x904,0x48e,0x48e,0x48e,0x48e,0x90c,0x910, -0x918,0x920,0x924,0x92c,0x48e,0x48e,0x48e,0x930,0x938,0x37f,0x940,0x948,0x38f,0x38f,0x38f,0x950, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0xe3c,0xe3c,0xe7c,0xebc,0xe3c,0xe3c,0xe3c,0xe3c,0xe3c,0xe3c,0xef4,0xf34,0xf74,0xf84,0xfc4,0xfd0, -0xe3c,0xe3c,0x1010,0xe3c,0xe3c,0xe3c,0x1048,0x1088,0x10c8,0x1108,0x1140,0x1180,0x11c0,0x11f8,0x1238,0x1278, -0xa40,0xa80,0xac0,0xaff,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb37,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb74,0x1a0,0x1a0,0xba9,0xbe9,0xc29,0xc69,0xca9,0xce9, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0xd69,0xd79,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, -0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd29, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x958,0x38f,0x65a,0x65a,0x960,0x5fd,0x38f,0x4f5, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x968,0x38f,0x38f,0x38f,0x96f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x977,0x424,0x424,0x424,0x424,0x424,0x424,0x424, -0x97f,0x983,0x424,0x424,0x424,0x424,0x993,0x98b,0x424,0x99b,0x424,0x424,0x9a3,0x9a9,0x424,0x424, -0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x9b9,0x9b1,0x424,0x424,0x424,0x424,0x424,0x424, -0x424,0x424,0x424,0x9c1,0x424,0x424,0x424,0x424,0x424,0x9c9,0x9d0,0x9d6,0x424,0x424,0x424,0x424, -0x4fc,0x9de,0x9e5,0x9ec,0x406,0x9ef,0x38f,0x38f,0x4e2,0x9f6,0x38f,0x9fc,0x406,0xa01,0xa09,0x38f, -0x38f,0xa0e,0x38f,0x38f,0x38f,0x38f,0x820,0xa16,0x406,0x581,0x55c,0xa1d,0x38f,0x38f,0x38f,0x38f, -0x38f,0x9de,0xa25,0x38f,0x38f,0xa2d,0xa35,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0xa39,0xa41,0x38f, -0x38f,0xa49,0x55c,0xa51,0x38f,0xa57,0x38f,0x38f,0x5ed,0xa5f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0xa64,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0xa6c, -0xa70,0xa78,0x38f,0xa7f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0xa86,0x38f,0x38f,0xa94,0xa8e,0x38f,0x38f,0x38f,0xa9c,0xaa4,0x38f,0xaa8,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x583,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0xaae,0x38f, -0xab4,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0xaba,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x516,0xac2,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0xac9,0xad1,0xad7,0x38f,0x38f,0x65a,0x65a,0xadf,0x38f,0x38f,0x38f,0x38f, -0x38f,0x65a,0x65a,0x833,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0xae1, -0x38f,0xae8,0x38f,0xae4,0x38f,0xaeb,0x38f,0xaf3,0xaf7,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x3dd,0xaff,0x3dd,0xb06,0xb0d,0xb15,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0xb1d,0xb25,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x424,0x424,0x424,0x424,0x424,0x424,0xb2d, -0x424,0xb35,0xb35,0xb3c,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424, -0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x8f4,0x48e,0x48e,0x424, -0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x48e,0x48e,0x48e,0x48e,0x48e,0x48e,0x48e, -0xb44,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x424,0x65a,0xb4c,0x65a,0x65a,0x65d,0xb51,0xb55, -0x844,0xb5d,0x3b1,0x38f,0xb63,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x76a,0x38f,0x38f,0x38f, -0x38f,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a, -0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0x65a,0xb6b, -0xb73,0x65a,0x65a,0x65a,0x65d,0x65a,0x65a,0xb7b,0x38f,0xb4c,0x65a,0xb83,0x65a,0xb8b,0x846,0x38f, -0x38f,0xb4c,0xb8f,0x65a,0xb97,0x65a,0xb9f,0xba7,0x65a,0x38f,0x38f,0x38f,0x846,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0xbaf,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f, -0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0x38f,0xbaf,0xbbf,0xbb7,0xbb7,0xbb7,0xbc0,0xbc0,0xbc0, -0xbc0,0x3dd,0x3dd,0x3dd,0x3dd,0x3dd,0x3dd,0x3dd,0xbc8,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0, -0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0, -0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0, -0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0, -0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0xbc0,0x36e,0x36e,0x36e,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,8,7,8,9,7,0x12,0x12,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,7,7,7,8,9,0xa,0xa,4, -4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6,2,2,2,2, -2,2,2,2,2,2,6,0xa,0x500a,0xa,0xd00a,0xa,0xa,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0xa,0xa,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0x12,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x12,0x12,0x12,0x12, -0x12,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,6,0xa,4,4, -4,4,0xa,0xa,0xa,0xa,0,0x900a,0xa,0xb2,0xa,0xa,4,4,2,2, -0xa,0,0xa,0xa,0xa,2,0,0x900a,0xa,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0, -0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xa,0xa,0,0,0,0,0,0, -0,0,0xa,0,0,0,0,0,0xa,0xa,0,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0xa,0,0,0xa,0xa,4,1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,0xb1,1,0xb1,0xb1,1,0xb1,0xb1,1,0xb1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,5,5,5,5,5,5,0xa,0xa, -0xd,4,4,0xd,6,0xd,0xa,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xd,0x8ad,0xd,0xd,0xd,0x4d,0xd,0x8d,0x8d,0x8d,0x8d,0x4d,0x8d, -0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x2d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x8d,0x4d,0x4d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,5,5,5,5,5,5,5,5, -5,5,4,5,5,0xd,0x4d,0x4d,0xb1,0x8d,0x8d,0x8d,0xd,0x8d,0x8d,0x8d, -0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d, -0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x8d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x8d,0x4d,0x8d, -0x4d,0x4d,0x8d,0x8d,0xd,0x8d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,5,0xa,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,0x8d,0x8d, -2,2,2,2,2,2,2,2,2,2,0x4d,0x4d,0x4d,0xd,0xd,0x4d, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xad, -0x8d,0xb1,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d, -0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d,0x4d,0x8d,0x4d,0x4d,0x8d, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xd,0xd,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d, -0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1, -1,1,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, -0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, -0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,0xa,0xa, -0xa,0xa,0x21,1,1,0xb1,1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1, -1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1, -0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41,0x41,0x41,0x81,0x81, -0x41,0x81,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x81,0x41,1,1, -1,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d,0x4d,0x4d,0xd,0x8d, -0x4d,0x8d,0x8d,0xd,0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xb1,0xb1,5,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x8d,0x8d,0x8d,0xd,0x8d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0xd,0x4d,0x4d, -0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0,0, -0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0,0, -0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, -0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,4, -0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0xb1, -0xb1,0xb1,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0xb1,0,0, -0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1, -0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0, -0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0, -0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0, -0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0xb1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa, -0xa,0xa,0xa,0xa,0xa,4,0xa,0,0,0,0,0,0xb1,0,0,0, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0,0,0xa0,0,0,0,0,0,0,0xa0,0,0,0,0,0, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0, -0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1, -0,0xb1,0x310a,0xf20a,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1, -0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0, -0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, -0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0, -0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, -0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x310a, -0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, -0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0,0,0,0,0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1, -0xb1,0xb1,0x12,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0, -0,0,0,0,0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0, -0,0,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0, -0xa,0,0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0,0,0,0,0,0, -0xb1,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa, -6,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2, -0xb2,0x12,0x814,0x815,0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0, -2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9, -9,9,9,9,9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae, -0x8b0,0x8ac,0x8af,6,4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa, -0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,2,2,3,3, -0xa,0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -4,4,4,4,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0, -0,0,0,0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0, -0,0,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0, -0,0,4,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa, -0,0,0,0,0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa, -0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0x300a,0xf00a,0xa,0xa,0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a, -0x900a,0x900a,0x900a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a, -0xb00a,0xb00a,0xa,0xa,0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a, -0x100a,0x100a,0xa,0x900a,0x900a,0x900a,0x900a,0xa,0x900a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a, -0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a, -0x300a,0xf00a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, -0x900a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0x100a,0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, -0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a, -0x900a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0x900a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a, -0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a, -0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a, -0xf00a,0x310a,0xf20a,0xa,0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a, -0x100a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a, -0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a, -0x100a,0x100a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0x100a,0x900a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a, -0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a, -0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x900a,0x100a,0x100a,0x100a,0x100a,0x900a,0xa,0x100a,0x900a, -0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a, -0x300a,0xf00a,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa, -0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a, -0x900a,0x900a,0x100a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x900a,0xa,0xa,0xa,0xa,0x100a, -0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, -0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa, -0x100a,0xa,0x100a,0xa,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, -0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa, -0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa, -0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, -0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, -0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, -0x300a,0xf00a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, -0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, -0x100a,0xa,0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0, -0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa, -0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0xa,0,0,0,0,0,0,0, -0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa, -0,0,0,0,0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0, -0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a, -0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, -0,0,0,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa, -0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0, -0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0xb1,0, -0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa, -0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0, -0,0,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0, -0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0, -0,0,0,0,0,0xb1,0,0,0xb1,0,0,0,0,0xb1,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,3,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,1,0xb1,1, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xd,0xd, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -6,0xa,6,0,0xa,6,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,4, -0xa,0xa,3,3,0x300a,0xf00a,0xa,0,0xa,4,4,0xa,0,0,0,0, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xb2, -0,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6, -2,2,2,2,2,2,2,2,2,2,6,0xa,0x500a,0xa,0xd00a,0xa, -0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0x310a, -0xf20a,0xa,0x310a,0xf20a,0xa,0xa,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -4,4,0xa,0xa,0xa,4,4,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xaa,0xaa,0xaa,0xa,0xa,0x12,0x12, -0,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, -0xb1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xa,1,0xb1,0xb1,0xb1, -1,0xb1,0xb1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0xb1,0x41,0x81,1,1, -0x81,0xb1,0xb1,1,1,1,1,0x41,0x41,0x41,0x41,0x81,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41, -0x41,0x81,1,0x81,1,0x81,0x81,1,1,0x61,0x81,0x81,0x81,0x81,0x81,0x41, -0x41,0x41,0x41,0x61,0x41,0x41,0x41,0x41,0x41,0x81,0x41,0x41,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x41,0x81,0x41,0x81, -0x81,0x81,0x41,0x41,0x41,0x81,0x41,0x41,0x81,0x41,0x81,0x81,0x41,0x81,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0x81,0x81,0x81, -0x81,0x41,0x41,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x4d,0x4d,0x8d,0x4d,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,5,5,5,5,5,5,5,5,5,5,0xd,0xd, -0xd,0xd,0xd,0xd,0x6d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x4d,0x4d,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, -0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0x4d,0x4d,0x4d,0x8d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1, -0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xb1, -0,0,0,0,0,0,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0, -0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, -0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, -0,0xb1,0,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, -0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0, -0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xa0,0xa0,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0, -0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0, -0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xa0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0,0,0,0xb1,0,0xb1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, -0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0, -0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xb1,0xb1, -0xb1,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x100a, -0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0, -0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1, -1,1,1,1,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, -0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, -0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2,2,2,2,0xa, -0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xa,0xa,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa, -0xa,0xa,0xa,0,0,0,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0, -0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, -0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x12,0x12,0xb2,0xb2,0xb2,0xb2, -0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, -0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0xb2,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0,0,0,0 -}; - -static const uint32_t ubidi_props_mirrors[40]={ -0x2000ab,0xbb,0x4202215,0x4e0221f,0x3e02220,0x3a02221,0x3c02222,0x4c02224,0x2202243,0x1402245,0x120224c,0x4002298,0x44022a6,0x48022a8,0x46022a9,0x4a022ab, -0x38022b8,0x10022cd,0x2e022f2,0x30022f3,0x32022f4,0x34022f6,0x36022f7,0x24022fa,0x26022fb,0x28022fc,0x2a022fd,0x2c022fe,0x20027dc,0xa0299b,0xc029a0,0x8029a3, -0x16029b8,0x4029f5,0x1802ade,0x1c02ae3,0x1a02ae4,0x1e02ae5,0xe02aee,0x602bfe -}; - -static const uint8_t ubidi_props_jgArray[672]={ -0x2d,0,3,3,0x2c,3,0x2d,3,4,0x2a,4,4,0xd,0xd,0xd,6, -6,0x1f,0x1f,0x23,0x23,0x21,0x21,0x28,0x28,1,1,0xb,0xb,0x37,0x37,0x37, -0,9,0x1d,0x13,0x16,0x18,0x1a,0x10,0x2c,0x2d,0x2d,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0x1d, -0,3,3,3,0,3,0x2c,0x2c,0x2d,4,4,4,4,4,4,4, -4,0xd,0xd,0xd,0xd,0xd,0xd,0xd,6,6,6,6,6,6,6,6, -6,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x23,0x23,0x23,0x21,0x21,0x28, -1,9,9,9,9,9,9,0x1d,0x1d,0xb,0x26,0xb,0x13,0x13,0x13,0xb, -0xb,0xb,0xb,0xb,0xb,0x16,0x16,0x16,0x16,0x1a,0x1a,0x1a,0x1a,0x38,0x15,0xd, -0x2a,0x11,0x11,0xe,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x37,0x2f,0x37,0x2c, -0x2d,0x2d,0x2e,0x2e,0,0x2a,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0x1f, -0,0,0,0,0,0,0,0,0,0,0x23,0x21,1,0,0,0x15, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,5,0xc,0xc,7,7,0xf,0x27,0x32,0x12,0x2b,0x2b,0x30,0x31,0x14, -0x17,0x19,0x1b,0x24,0xa,8,0x1c,0x20,0x22,0x1e,7,0x25,0x29,5,0xc,7, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0x35,0x34,0x33, -4,4,4,4,4,4,4,0xd,0xd,6,6,0x1f,0x23,1,1,1, -9,9,0xb,0xb,0xb,0x18,0x18,0x1a,0x1a,0x1a,0x16,0x1f,0x1f,0x23,0xd,0xd, -0x23,0x1f,0xd,3,3,0x37,0x37,0x2d,0x2c,0x2c,0x36,0x36,0xd,0x23,0x23,0x13, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x5d,0x5a,0x60,0x63,0x5e,0x5f,0x59,0x61,0x5b,0x5c,0x62,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -4,4,0xd,0x28,9,0x1d,0x16,0x18,0x2d,0x2d,0x1f,0x2c,0x39,0,6,0x21, -0xb,0x55,0x1f,1,0x13,0,4,4,4,0x1f,0x2d,0x56,0x58,0x57,0,0 -}; - -static const uint8_t ubidi_props_jgArray2[612]={ -0x3a,0x3c,0x3c,0x40,0x40,0x3d,0,0x52,0,0x54,0x54,0,0,0x41,0x4f,0x53, -0x43,0x43,0x43,0x44,0x3e,0x50,0x45,0x46,0x4c,0x3b,0x3b,0x48,0x48,0x4b,0x49,0x49, -0x49,0x4a,0,0,0x4d,0,0,0,0,0,0,0x47,0x3f,0x4e,0x51,0x42, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0x65,0,0,0,0,0,0,0x65,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0x64,0,0,0x65,0,0x64,0, -0x64,0,0,0x64 -}; - -static const UBiDiProps ubidi_props_singleton={ - NULL, - ubidi_props_indexes, - ubidi_props_mirrors, - ubidi_props_jgArray, - ubidi_props_jgArray2, - { - ubidi_props_trieIndex, - ubidi_props_trieIndex+3516, - NULL, - 3516, - 8584, - 0x1a0, - 0xe3c, - 0x0, - 0x0, - 0x110000, - 0x2f40, - NULL, 0, FALSE, FALSE, 0, NULL - }, - { 2,2,0,0 } -}; - -#endif // INCLUDED_FROM_UBIDI_PROPS_C diff --git a/deps/node/deps/icu-small/source/common/ubidiimp.h b/deps/node/deps/icu-small/source/common/ubidiimp.h deleted file mode 100644 index a5d07274..00000000 --- a/deps/node/deps/icu-small/source/common/ubidiimp.h +++ /dev/null @@ -1,468 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ubidiimp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999aug06 -* created by: Markus W. Scherer, updated by Matitiahu Allouche -*/ - -#ifndef UBIDIIMP_H -#define UBIDIIMP_H - -#include "unicode/utypes.h" -#include "unicode/ubidi.h" -#include "unicode/uchar.h" -#include "ubidi_props.h" - -/* miscellaneous definitions ---------------------------------------------- */ - -typedef uint8_t DirProp; -typedef uint32_t Flags; - -/* Comparing the description of the BiDi algorithm with this implementation - is easier with the same names for the BiDi types in the code as there. - See UCharDirection in uchar.h . -*/ -enum { - L= U_LEFT_TO_RIGHT, /* 0 */ - R= U_RIGHT_TO_LEFT, /* 1 */ - EN= U_EUROPEAN_NUMBER, /* 2 */ - ES= U_EUROPEAN_NUMBER_SEPARATOR, /* 3 */ - ET= U_EUROPEAN_NUMBER_TERMINATOR, /* 4 */ - AN= U_ARABIC_NUMBER, /* 5 */ - CS= U_COMMON_NUMBER_SEPARATOR, /* 6 */ - B= U_BLOCK_SEPARATOR, /* 7 */ - S= U_SEGMENT_SEPARATOR, /* 8 */ - WS= U_WHITE_SPACE_NEUTRAL, /* 9 */ - ON= U_OTHER_NEUTRAL, /* 10 */ - LRE=U_LEFT_TO_RIGHT_EMBEDDING, /* 11 */ - LRO=U_LEFT_TO_RIGHT_OVERRIDE, /* 12 */ - AL= U_RIGHT_TO_LEFT_ARABIC, /* 13 */ - RLE=U_RIGHT_TO_LEFT_EMBEDDING, /* 14 */ - RLO=U_RIGHT_TO_LEFT_OVERRIDE, /* 15 */ - PDF=U_POP_DIRECTIONAL_FORMAT, /* 16 */ - NSM=U_DIR_NON_SPACING_MARK, /* 17 */ - BN= U_BOUNDARY_NEUTRAL, /* 18 */ - FSI=U_FIRST_STRONG_ISOLATE, /* 19 */ - LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */ - RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */ - PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */ - ENL, /* EN after W7 */ /* 23 */ - ENR, /* EN not subject to W7 */ /* 24 */ - dirPropCount -}; - -/* Sometimes, bit values are more appropriate - to deal with directionality properties. - Abbreviations in these macro names refer to names - used in the BiDi algorithm. -*/ -#define DIRPROP_FLAG(dir) (1UL<<(dir)) -#define PURE_DIRPROP(prop) ((prop)&~0xE0) ????????????????????????? - -/* special flag for multiple runs from explicit embedding codes */ -#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) - -/* are there any characters that are LTR or RTL? */ -#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI)) -#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI)) -#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) -#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)) - -/* explicit embedding codes */ -#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF)) - -/* explicit isolate codes */ -#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI)) - -#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) - -/* paragraph and segment separators */ -#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) - -/* all types that are counted as White Space or Neutral in some steps */ -#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO) - -/* types that are neutrals or could becomes neutrals in (Wn) */ -#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS) - -/* - * These types may be changed to "e", - * the embedding type (L or R) of the run, - * in the BiDi algorithm (N2) - */ -#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) - -/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ -#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) - -#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) - -/* - * The following bit is used for the directional isolate status. - * Stack entries corresponding to isolate sequences are greater than ISOLATE. - */ -#define ISOLATE 0x0100 - -U_CFUNC UBiDiLevel -ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index); - -#define GET_PARALEVEL(ubidi, index) \ - ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \ - (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index)))) - -/* number of paras entries allocated initially without malloc */ -#define SIMPLE_PARAS_COUNT 10 -/* number of isolate entries allocated initially without malloc */ -#define SIMPLE_ISOLATES_COUNT 5 -/* number of isolate run entries for paired brackets allocated initially without malloc */ -#define SIMPLE_OPENINGS_COUNT 20 - -#define CR 0x000D -#define LF 0x000A - -/* Run structure for reordering --------------------------------------------- */ -enum { - LRM_BEFORE=1, - LRM_AFTER=2, - RLM_BEFORE=4, - RLM_AFTER=8 -}; - -typedef struct Para { - int32_t limit; - int32_t level; -} Para; - -enum { /* flags for Opening.flags */ - FOUND_L=DIRPROP_FLAG(L), - FOUND_R=DIRPROP_FLAG(R) -}; - -typedef struct Opening { - int32_t position; /* position of opening bracket */ - int32_t match; /* matching char or -position of closing bracket */ - int32_t contextPos; /* position of last strong char found before opening */ - uint16_t flags; /* bits for L or R/AL found within the pair */ - UBiDiDirection contextDir; /* L or R according to last strong char before opening */ - uint8_t filler; /* to complete a nice multiple of 4 chars */ -} Opening; - -typedef struct IsoRun { - int32_t contextPos; /* position of char determining context */ - uint16_t start; /* index of first opening entry for this run */ - uint16_t limit; /* index after last opening entry for this run */ - UBiDiLevel level; /* level of this run */ - DirProp lastStrong; /* bidi class of last strong char found in this run */ - DirProp lastBase; /* bidi class of last base char found in this run */ - UBiDiDirection contextDir; /* L or R to use as context for following openings */ -} IsoRun; - -typedef struct BracketData { - UBiDi *pBiDi; - /* array of opening entries which should be enough in most cases; no malloc() */ - Opening simpleOpenings[SIMPLE_OPENINGS_COUNT]; - Opening *openings; /* pointer to current array of entries */ - int32_t openingsCount; /* number of allocated entries */ - int32_t isoRunLast; /* index of last used entry */ - /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL - + 1 for index 0, + 1 for before the first isolated sequence */ - IsoRun isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2]; - UBool isNumbersSpecial; /* reordering mode for NUMBERS_SPECIAL */ -} BracketData; - -typedef struct Isolate { - int32_t startON; - int32_t start1; - int32_t state; - int16_t stateImp; -} Isolate; - -typedef struct Run { - int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ - visualLimit, /* last visual position of the run +1 */ - insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, - if <0, count of bidi controls within run */ -} Run; - -/* in a Run, logicalStart will get this bit set if the run level is odd */ -#define INDEX_ODD_BIT (1UL<<31) - -#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) -#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) -#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) - -#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) -#define GET_ODD_BIT(x) ((uint32_t)(x)>>31) -#define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0)) -#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0)) - -U_CFUNC UBool -ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); - -/** BiDi control code points */ -enum { - ZWNJ_CHAR=0x200c, - ZWJ_CHAR, - LRM_CHAR, - RLM_CHAR, - LRE_CHAR=0x202a, - RLE_CHAR, - PDF_CHAR, - LRO_CHAR, - RLO_CHAR, - LRI_CHAR=0x2066, - RLI_CHAR, - FSI_CHAR, - PDI_CHAR -}; - -#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4) - -/* InsertPoints structure for noting where to put BiDi marks ---------------- */ - -typedef struct Point { - int32_t pos; /* position in text */ - int32_t flag; /* flag for LRM/RLM, before/after */ -} Point; - -typedef struct InsertPoints { - int32_t capacity; /* number of points allocated */ - int32_t size; /* number of points used */ - int32_t confirmed; /* number of points confirmed */ - UErrorCode errorCode; /* for eventual memory shortage */ - Point *points; /* pointer to array of points */ -} InsertPoints; - - -/* UBiDi structure ----------------------------------------------------------- */ - -struct UBiDi { - /* pointer to parent paragraph object (pointer to self if this object is - * a paragraph object); set to NULL in a newly opened object; set to a - * real value after a successful execution of ubidi_setPara or ubidi_setLine - */ - const UBiDi * pParaBiDi; - - /* alias pointer to the current text */ - const UChar *text; - - /* length of the current text */ - int32_t originalLength; - - /* if the UBIDI_OPTION_STREAMING option is set, this is the length - * of text actually processed by ubidi_setPara, which may be shorter than - * the original length. - * Otherwise, it is identical to the original length. - */ - int32_t length; - - /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or - * marks are allowed to be inserted in one of the reordering mode, the - * length of the result string may be different from the processed length. - */ - int32_t resultLength; - - /* memory sizes in bytes */ - int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize; - - /* allocated memory */ - DirProp *dirPropsMemory; - UBiDiLevel *levelsMemory; - Opening *openingsMemory; - Para *parasMemory; - Run *runsMemory; - Isolate *isolatesMemory; - - /* indicators for whether memory may be allocated after ubidi_open() */ - UBool mayAllocateText, mayAllocateRuns; - - /* arrays with one value per text-character */ - DirProp *dirProps; - UBiDiLevel *levels; - - /* are we performing an approximation of the "inverse BiDi" algorithm? */ - UBool isInverse; - - /* are we using the basic algorithm or its variation? */ - UBiDiReorderingMode reorderingMode; - - /* UBIDI_REORDER_xxx values must be ordered so that all the regular - * logical to visual modes come first, and all inverse BiDi modes - * come last. - */ - #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL - - /* bitmask for reordering options */ - uint32_t reorderingOptions; - - /* must block separators receive level 0? */ - UBool orderParagraphsLTR; - - /* the paragraph level */ - UBiDiLevel paraLevel; - /* original paraLevel when contextual */ - /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ - UBiDiLevel defaultParaLevel; - - /* context data */ - const UChar *prologue; - int32_t proLength; - const UChar *epilogue; - int32_t epiLength; - - /* the following is set in ubidi_setPara, used in processPropertySeq */ - const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ - - /* the overall paragraph or line directionality - see UBiDiDirection */ - UBiDiDirection direction; - - /* flags is a bit set for which directional properties are in the text */ - Flags flags; - - /* lastArabicPos is index to the last AL in the text, -1 if none */ - int32_t lastArabicPos; - - /* characters after trailingWSStart are WS and are */ - /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ - int32_t trailingWSStart; - - /* fields for paragraph handling */ - int32_t paraCount; /* set in getDirProps() */ - /* filled in getDirProps() */ - Para *paras; - - /* for relatively short text, we only need a tiny array of paras (no malloc()) */ - Para simpleParas[SIMPLE_PARAS_COUNT]; - - /* fields for line reordering */ - int32_t runCount; /* ==-1: runs not set up yet */ - Run *runs; - - /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ - Run simpleRuns[1]; - - /* maximum or current nesting depth of isolate sequences */ - /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal - nesting encountered. - Within resolveImplicitLevels(), this is the index of the current isolates - stack entry. */ - int32_t isolateCount; - Isolate *isolates; - - /* for simple text, have a small stack (no malloc()) */ - Isolate simpleIsolates[SIMPLE_ISOLATES_COUNT]; - - /* for inverse Bidi with insertion of directional marks */ - InsertPoints insertPoints; - - /* for option UBIDI_OPTION_REMOVE_CONTROLS */ - int32_t controlCount; - - /* for Bidi class callback */ - UBiDiClassCallback *fnClassCallback; /* action pointer */ - const void *coClassCallback; /* context pointer */ -}; - -#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) -#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) - -typedef union { - DirProp *dirPropsMemory; - UBiDiLevel *levelsMemory; - Opening *openingsMemory; - Para *parasMemory; - Run *runsMemory; - Isolate *isolatesMemory; -} BidiMemoryForAllocation; - -/* Macros for initial checks at function entry */ -#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ - if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue -#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ - if(!IS_VALID_PARA(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return retvalue; \ - } -#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ - if(!IS_VALID_PARA_OR_LINE(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return retvalue; \ - } -#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ - if((arg)<(start) || (arg)>=(limit)) { \ - (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ - return retvalue; \ - } - -#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \ - if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return -#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \ - if(!IS_VALID_PARA(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return; \ - } -#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \ - if(!IS_VALID_PARA_OR_LINE(bidi)) { \ - errcode=U_INVALID_STATE_ERROR; \ - return; \ - } -#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \ - if((arg)<(start) || (arg)>=(limit)) { \ - (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ - return; \ - } - -/* helper function to (re)allocate memory if allowed */ -U_CFUNC UBool -ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); - -/* helper macros for each allocated array in UBiDi */ -#define getDirPropsMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ - (pBiDi)->mayAllocateText, (length)) - -#define getLevelsMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ - (pBiDi)->mayAllocateText, (length)) - -#define getRunsMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ - (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) - -/* additional macros used by ubidi_open() - always allow allocation */ -#define getInitialDirPropsMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ - TRUE, (length)) - -#define getInitialLevelsMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ - TRUE, (length)) - -#define getInitialOpeningsMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \ - TRUE, (length)*sizeof(Opening)) - -#define getInitialParasMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ - TRUE, (length)*sizeof(Para)) - -#define getInitialRunsMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ - TRUE, (length)*sizeof(Run)) - -#define getInitialIsolatesMemory(pBiDi, length) \ - ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \ - TRUE, (length)*sizeof(Isolate)) - -#endif diff --git a/deps/node/deps/icu-small/source/common/ubidiln.cpp b/deps/node/deps/icu-small/source/common/ubidiln.cpp deleted file mode 100644 index 71c581fe..00000000 --- a/deps/node/deps/icu-small/source/common/ubidiln.cpp +++ /dev/null @@ -1,1349 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ubidiln.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999aug06 -* created by: Markus W. Scherer, updated by Matitiahu Allouche -*/ - -#include "cmemory.h" -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/uchar.h" -#include "unicode/ubidi.h" -#include "ubidiimp.h" -#include "uassert.h" - -/* - * General remarks about the functions in this file: - * - * These functions deal with the aspects of potentially mixed-directional - * text in a single paragraph or in a line of a single paragraph - * which has already been processed according to - * the Unicode 6.3 BiDi algorithm as defined in - * http://www.unicode.org/unicode/reports/tr9/ , version 28, - * also described in The Unicode Standard, Version 6.3.0 . - * - * This means that there is a UBiDi object with a levels - * and a dirProps array. - * paraLevel and direction are also set. - * Only if the length of the text is zero, then levels==dirProps==NULL. - * - * The overall directionality of the paragraph - * or line is used to bypass the reordering steps if possible. - * Even purely RTL text does not need reordering there because - * the ubidi_getLogical/VisualIndex() functions can compute the - * index on the fly in such a case. - * - * The implementation of the access to same-level-runs and of the reordering - * do attempt to provide better performance and less memory usage compared to - * a direct implementation of especially rule (L2) with an array of - * one (32-bit) integer per text character. - * - * Here, the levels array is scanned as soon as necessary, and a vector of - * same-level-runs is created. Reordering then is done on this vector. - * For each run of text positions that were resolved to the same level, - * only 8 bytes are stored: the first text position of the run and the visual - * position behind the run after reordering. - * One sign bit is used to hold the directionality of the run. - * This is inefficient if there are many very short runs. If the average run - * length is <2, then this uses more memory. - * - * In a further attempt to save memory, the levels array is never changed - * after all the resolution rules (Xn, Wn, Nn, In). - * Many functions have to consider the field trailingWSStart: - * if it is less than length, then there is an implicit trailing run - * at the paraLevel, - * which is not reflected in the levels array. - * This allows a line UBiDi object to use the same levels array as - * its paragraph parent object. - * - * When a UBiDi object is created for a line of a paragraph, then the - * paragraph's levels and dirProps arrays are reused by way of setting - * a pointer into them, not by copying. This again saves memory and forbids to - * change the now shared levels for (L1). - */ - -/* handle trailing WS (L1) -------------------------------------------------- */ - -/* - * setTrailingWSStart() sets the start index for a trailing - * run of WS in the line. This is necessary because we do not modify - * the paragraph's levels array that we just point into. - * Using trailingWSStart is another form of performing (L1). - * - * To make subsequent operations easier, we also include the run - * before the WS if it is at the paraLevel - we merge the two here. - * - * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is - * set correctly for the line even when contextual multiple paragraphs. - */ -static void -setTrailingWSStart(UBiDi *pBiDi) { - /* pBiDi->direction!=UBIDI_MIXED */ - - const DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - int32_t start=pBiDi->length; - UBiDiLevel paraLevel=pBiDi->paraLevel; - - /* If the line is terminated by a block separator, all preceding WS etc... - are already set to paragraph level. - Setting trailingWSStart to pBidi->length will avoid changing the - level of B chars from 0 to paraLevel in ubidi_getLevels when - orderParagraphsLTR==TRUE. - */ - if(dirProps[start-1]==B) { - pBiDi->trailingWSStart=start; /* currently == pBiDi->length */ - return; - } - /* go backwards across all WS, BN, explicit codes */ - while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { - --start; - } - - /* if the WS run can be merged with the previous run then do so here */ - while(start>0 && levels[start-1]==paraLevel) { - --start; - } - - pBiDi->trailingWSStart=start; -} - -/* ubidi_setLine ------------------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ubidi_setLine(const UBiDi *pParaBiDi, - int32_t start, int32_t limit, - UBiDi *pLineBiDi, - UErrorCode *pErrorCode) { - int32_t length; - - /* check the argument values */ - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode); - RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode); - RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode); - if(pLineBiDi==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != - ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { - /* the line crosses a paragraph boundary */ - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* set the values in pLineBiDi from its pParaBiDi parent */ - pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */ - pLineBiDi->text=pParaBiDi->text+start; - length=pLineBiDi->length=limit-start; - pLineBiDi->resultLength=pLineBiDi->originalLength=length; - pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start); - pLineBiDi->paraCount=pParaBiDi->paraCount; - pLineBiDi->runs=NULL; - pLineBiDi->flags=0; - pLineBiDi->reorderingMode=pParaBiDi->reorderingMode; - pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions; - pLineBiDi->controlCount=0; - if(pParaBiDi->controlCount>0) { - int32_t j; - for(j=start; jtext[j])) { - pLineBiDi->controlCount++; - } - } - pLineBiDi->resultLength-=pLineBiDi->controlCount; - } - - pLineBiDi->dirProps=pParaBiDi->dirProps+start; - pLineBiDi->levels=pParaBiDi->levels+start; - pLineBiDi->runCount=-1; - - if(pParaBiDi->direction!=UBIDI_MIXED) { - /* the parent is already trivial */ - pLineBiDi->direction=pParaBiDi->direction; - - /* - * The parent's levels are all either - * implicitly or explicitly ==paraLevel; - * do the same here. - */ - if(pParaBiDi->trailingWSStart<=start) { - pLineBiDi->trailingWSStart=0; - } else if(pParaBiDi->trailingWSStarttrailingWSStart=pParaBiDi->trailingWSStart-start; - } else { - pLineBiDi->trailingWSStart=length; - } - } else { - const UBiDiLevel *levels=pLineBiDi->levels; - int32_t i, trailingWSStart; - UBiDiLevel level; - - setTrailingWSStart(pLineBiDi); - trailingWSStart=pLineBiDi->trailingWSStart; - - /* recalculate pLineBiDi->direction */ - if(trailingWSStart==0) { - /* all levels are at paraLevel */ - pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); - } else { - /* get the level of the first character */ - level=(UBiDiLevel)(levels[0]&1); - - /* if there is anything of a different level, then the line is mixed */ - if(trailingWSStartparaLevel&1)!=level) { - /* the trailing WS is at paraLevel, which differs from levels[0] */ - pLineBiDi->direction=UBIDI_MIXED; - } else { - /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ - i=1; - for(;;) { - if(i==trailingWSStart) { - /* the direction values match those in level */ - pLineBiDi->direction=(UBiDiDirection)level; - break; - } else if((levels[i]&1)!=level) { - pLineBiDi->direction=UBIDI_MIXED; - break; - } - ++i; - } - } - } - - switch(pLineBiDi->direction) { - case UBIDI_LTR: - /* make sure paraLevel is even */ - pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pLineBiDi->trailingWSStart=0; - break; - case UBIDI_RTL: - /* make sure paraLevel is odd */ - pLineBiDi->paraLevel|=1; - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pLineBiDi->trailingWSStart=0; - break; - default: - break; - } - } - pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ - return; -} - -U_CAPI UBiDiLevel U_EXPORT2 -ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { - /* return paraLevel if in the trailing WS run, otherwise the real level */ - if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) { - return 0; - } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { - return GET_PARALEVEL(pBiDi, charIndex); - } else { - return pBiDi->levels[charIndex]; - } -} - -U_CAPI const UBiDiLevel * U_EXPORT2 -ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - int32_t start, length; - - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL); - if((length=pBiDi->length)<=0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - if((start=pBiDi->trailingWSStart)==length) { - /* the current levels array reflects the WS run */ - return pBiDi->levels; - } - - /* - * After the previous if(), we know that the levels array - * has an implicit trailing WS run and therefore does not fully - * reflect itself all the levels. - * This must be a UBiDi object for a line, and - * we need to create a new levels array. - */ - if(getLevelsMemory(pBiDi, length)) { - UBiDiLevel *levels=pBiDi->levelsMemory; - - if(start>0 && levels!=pBiDi->levels) { - uprv_memcpy(levels, pBiDi->levels, start); - } - /* pBiDi->paraLevel is ok even if contextual multiple paragraphs, - since pBidi is a line object */ - uprv_memset(levels+start, pBiDi->paraLevel, length-start); - - /* this new levels array is set for the line and reflects the WS run */ - pBiDi->trailingWSStart=length; - return pBiDi->levels=levels; - } else { - /* out of memory */ - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } -} - -U_CAPI void U_EXPORT2 -ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, - int32_t *pLogicalLimit, UBiDiLevel *pLevel) { - UErrorCode errorCode; - int32_t runCount, visualStart, logicalLimit, logicalFirst, i; - Run iRun; - - errorCode=U_ZERO_ERROR; - RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode); - /* ubidi_countRuns will check VALID_PARA_OR_LINE */ - runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); - if(U_FAILURE(errorCode)) { - return; - } - /* this is done based on runs rather than on levels since levels have - a special interpretation when UBIDI_REORDER_RUNS_ONLY - */ - visualStart=logicalLimit=0; - iRun=pBiDi->runs[0]; - - for(i=0; iruns[i]; - logicalFirst=GET_INDEX(iRun.logicalStart); - logicalLimit=logicalFirst+iRun.visualLimit-visualStart; - if((logicalPosition>=logicalFirst) && - (logicalPositionreorderingMode==UBIDI_REORDER_RUNS_ONLY) { - *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart); - } - else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) { - *pLevel=GET_PARALEVEL(pBiDi, logicalPosition); - } else { - *pLevel=pBiDi->levels[logicalPosition]; - } - } -} - -/* runs API functions ------------------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - ubidi_getRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return -1; - } - return pBiDi->runCount; -} - -U_CAPI UBiDiDirection U_EXPORT2 -ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, - int32_t *pLogicalStart, int32_t *pLength) -{ - int32_t start; - UErrorCode errorCode = U_ZERO_ERROR; - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR); - ubidi_getRuns(pBiDi, &errorCode); - if(U_FAILURE(errorCode)) { - return UBIDI_LTR; - } - RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR); - - start=pBiDi->runs[runIndex].logicalStart; - if(pLogicalStart!=NULL) { - *pLogicalStart=GET_INDEX(start); - } - if(pLength!=NULL) { - if(runIndex>0) { - *pLength=pBiDi->runs[runIndex].visualLimit- - pBiDi->runs[runIndex-1].visualLimit; - } else { - *pLength=pBiDi->runs[0].visualLimit; - } - } - return (UBiDiDirection)GET_ODD_BIT(start); -} - -/* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ -static void -getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { - /* simple, single-run case */ - pBiDi->runs=pBiDi->simpleRuns; - pBiDi->runCount=1; - - /* fill and reorder the single run */ - pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); - pBiDi->runs[0].visualLimit=pBiDi->length; - pBiDi->runs[0].insertRemove=0; -} - -/* reorder the runs array (L2) ---------------------------------------------- */ - -/* - * Reorder the same-level runs in the runs array. - * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. - * All the visualStart fields=logical start before reordering. - * The "odd" bits are not set yet. - * - * Reordering with this data structure lends itself to some handy shortcuts: - * - * Since each run is moved but not modified, and since at the initial maxLevel - * each sequence of same-level runs consists of only one run each, we - * don't need to do anything there and can predecrement maxLevel. - * In many simple cases, the reordering is thus done entirely in the - * index mapping. - * Also, reordering occurs only down to the lowest odd level that occurs, - * which is minLevel|1. However, if the lowest level itself is odd, then - * in the last reordering the sequence of the runs at this level or higher - * will be all runs, and we don't need the elaborate loop to search for them. - * This is covered by ++minLevel instead of minLevel|=1 followed - * by an extra reorder-all after the reorder-some loop. - * About a trailing WS run: - * Such a run would need special treatment because its level is not - * reflected in levels[] if this is not a paragraph object. - * Instead, all characters from trailingWSStart on are implicitly at - * paraLevel. - * However, for all maxLevel>paraLevel, this run will never be reordered - * and does not need to be taken into account. maxLevel==paraLevel is only reordered - * if minLevel==paraLevel is odd, which is done in the extra segment. - * This means that for the main reordering loop we don't need to consider - * this run and can --runCount. If it is later part of the all-runs - * reordering, then runCount is adjusted accordingly. - */ -static void -reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { - Run *runs, tempRun; - UBiDiLevel *levels; - int32_t firstRun, endRun, limitRun, runCount; - - /* nothing to do? */ - if(maxLevel<=(minLevel|1)) { - return; - } - - /* - * Reorder only down to the lowest odd level - * and reorder at an odd minLevel in a separate, simpler loop. - * See comments above for why minLevel is always incremented. - */ - ++minLevel; - - runs=pBiDi->runs; - levels=pBiDi->levels; - runCount=pBiDi->runCount; - - /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ - if(pBiDi->trailingWSStartlength) { - --runCount; - } - - while(--maxLevel>=minLevel) { - firstRun=0; - - /* loop for all sequences of runs */ - for(;;) { - /* look for a sequence of runs that are all at >=maxLevel */ - /* look for the first run of such a sequence */ - while(firstRun=runCount) { - break; /* no more such runs */ - } - - /* look for the limit run of such a sequence (the run behind it) */ - for(limitRun=firstRun; ++limitRun=maxLevel;) {} - - /* Swap the entire sequence of runs from firstRun to limitRun-1. */ - endRun=limitRun-1; - while(firstRuntrailingWSStart==pBiDi->length) { - --runCount; - } - - /* Swap the entire sequence of all runs. (endRun==runCount) */ - while(firstRunruns; - int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart; - - for(i=0; i=logicalStart) && (logicalIndex<(logicalStart+length))) { - return i; - } - visualStart+=length; - } - /* we should never get here */ - U_ASSERT(FALSE); - *pErrorCode = U_INVALID_STATE_ERROR; - return 0; -} - -/* - * Compute the runs array from the levels array. - * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 - * and the runs are reordered. - * Odd-level runs have visualStart on their visual right edge and - * they progress visually to the left. - * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the - * sum of appropriate LRM/RLM_BEFORE/AFTER flags. - * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the - * negative number of BiDi control characters within this run. - */ -U_CFUNC UBool -ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { - /* - * This method returns immediately if the runs are already set. This - * includes the case of length==0 (handled in setPara).. - */ - if (pBiDi->runCount>=0) { - return TRUE; - } - - if(pBiDi->direction!=UBIDI_MIXED) { - /* simple, single-run case - this covers length==0 */ - /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ - getSingleRun(pBiDi, pBiDi->paraLevel); - } else /* UBIDI_MIXED, length>0 */ { - /* mixed directionality */ - int32_t length=pBiDi->length, limit; - UBiDiLevel *levels=pBiDi->levels; - int32_t i, runCount; - UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ - /* - * If there are WS characters at the end of the line - * and the run preceding them has a level different from - * paraLevel, then they will form their own run at paraLevel (L1). - * Count them separately. - * We need some special treatment for this in order to not - * modify the levels array which a line UBiDi object shares - * with its paragraph parent and its other line siblings. - * In other words, for the trailing WS, it may be - * levels[]!=paraLevel but we have to treat it like it were so. - */ - limit=pBiDi->trailingWSStart; - /* count the runs, there is at least one non-WS run, and limit>0 */ - runCount=0; - for(i=0; i1 || limit1 */ - if(getRunsMemory(pBiDi, runCount)) { - runs=pBiDi->runsMemory; - } else { - return FALSE; - } - - /* set the runs */ - /* FOOD FOR THOUGHT: this could be optimized, e.g.: - * 464->444, 484->444, 575->555, 595->555 - * However, that would take longer. Check also how it would - * interact with BiDi control removal and inserting Marks. - */ - runIndex=0; - - /* search for the run limits and initialize visualLimit values with the run lengths */ - i=0; - do { - /* prepare this run */ - start=i; - level=levels[i]; - if(levelmaxLevel) { - maxLevel=level; - } - - /* look for the run limit */ - while(++iparaLevel is ok even - if contextual multiple paragraphs. */ - if(pBiDi->paraLevelparaLevel; - } - } - - /* set the object fields */ - pBiDi->runs=runs; - pBiDi->runCount=runCount; - - reorderLine(pBiDi, minLevel, maxLevel); - - /* now add the direction flags and adjust the visualLimit's to be just that */ - /* this loop will also handle the trailing WS run */ - limit=0; - for(i=0; iparaLevel is ok even if - contextual multiple paragraphs. */ - if(runIndexparaLevel & 1) != 0)? 0 : runIndex; - - ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); - } - } - } - - /* handle insert LRM/RLM BEFORE/AFTER run */ - if(pBiDi->insertPoints.size>0) { - Point *point, *start=pBiDi->insertPoints.points, - *limit=start+pBiDi->insertPoints.size; - int32_t runIndex; - for(point=start; pointpos, pErrorCode); - pBiDi->runs[runIndex].insertRemove|=point->flag; - } - } - - /* handle remove BiDi control characters */ - if(pBiDi->controlCount>0) { - int32_t runIndex; - const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu; - for(pu=start; puruns[runIndex].insertRemove--; - } - } - } - - return TRUE; -} - -static UBool -prepareReorder(const UBiDiLevel *levels, int32_t length, - int32_t *indexMap, - UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { - int32_t start; - UBiDiLevel level, minLevel, maxLevel; - - if(levels==NULL || length<=0) { - return FALSE; - } - - /* determine minLevel and maxLevel */ - minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; - maxLevel=0; - for(start=length; start>0;) { - level=levels[--start]; - if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { - return FALSE; - } - if(levelmaxLevel) { - maxLevel=level; - } - } - *pMinLevel=minLevel; - *pMaxLevel=maxLevel; - - /* initialize the index map */ - for(start=length; start>0;) { - --start; - indexMap[start]=start; - } - - return TRUE; -} - -/* reorder a line based on a levels array (L2) ------------------------------ */ - -U_CAPI void U_EXPORT2 -ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { - int32_t start, limit, sumOfSosEos; - UBiDiLevel minLevel = 0, maxLevel = 0; - - if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { - return; - } - - /* nothing to do? */ - if(minLevel==maxLevel && (minLevel&1)==0) { - return; - } - - /* reorder only down to the lowest odd level */ - minLevel|=1; - - /* loop maxLevel..minLevel */ - do { - start=0; - - /* loop for all sequences of levels to reorder at the current maxLevel */ - for(;;) { - /* look for a sequence of levels that are all at >=maxLevel */ - /* look for the first index of such a sequence */ - while(start=length) { - break; /* no more such sequences */ - } - - /* look for the limit of such a sequence (the index behind it) */ - for(limit=start; ++limit=maxLevel;) {} - - /* - * sos=start of sequence, eos=end of sequence - * - * The closed (inclusive) interval from sos to eos includes all the logical - * and visual indexes within this sequence. They are logically and - * visually contiguous and in the same range. - * - * For each run, the new visual index=sos+eos-old visual index; - * we pre-add sos+eos into sumOfSosEos -> - * new visual index=sumOfSosEos-old visual index; - */ - sumOfSosEos=start+limit-1; - - /* reorder each index in the sequence */ - do { - indexMap[start]=sumOfSosEos-indexMap[start]; - } while(++start=minLevel); -} - -U_CAPI void U_EXPORT2 -ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { - int32_t start, end, limit, temp; - UBiDiLevel minLevel = 0, maxLevel = 0; - - if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { - return; - } - - /* nothing to do? */ - if(minLevel==maxLevel && (minLevel&1)==0) { - return; - } - - /* reorder only down to the lowest odd level */ - minLevel|=1; - - /* loop maxLevel..minLevel */ - do { - start=0; - - /* loop for all sequences of levels to reorder at the current maxLevel */ - for(;;) { - /* look for a sequence of levels that are all at >=maxLevel */ - /* look for the first index of such a sequence */ - while(start=length) { - break; /* no more such runs */ - } - - /* look for the limit of such a sequence (the index behind it) */ - for(limit=start; ++limit=maxLevel;) {} - - /* - * Swap the entire interval of indexes from start to limit-1. - * We don't need to swap the levels for the purpose of this - * algorithm: the sequence of levels that we look at does not - * move anyway. - */ - end=limit-1; - while(start=minLevel); -} - -/* API functions for logical<->visual mapping ------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { - int32_t visualIndex=UBIDI_MAP_NOWHERE; - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1); - - /* we can do the trivial cases without the runs array */ - switch(pBiDi->direction) { - case UBIDI_LTR: - visualIndex=logicalIndex; - break; - case UBIDI_RTL: - visualIndex=pBiDi->length-logicalIndex-1; - break; - default: - if(!ubidi_getRuns(pBiDi, pErrorCode)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return -1; - } else { - Run *runs=pBiDi->runs; - int32_t i, visualStart=0, offset, length; - - /* linear search for the run, search on the visual runs */ - for(i=0; irunCount; ++i) { - length=runs[i].visualLimit-visualStart; - offset=logicalIndex-GET_INDEX(runs[i].logicalStart); - if(offset>=0 && offset=pBiDi->runCount) { - return UBIDI_MAP_NOWHERE; - } - } - } - - if(pBiDi->insertPoints.size>0) { - /* add the number of added marks until the calculated visual index */ - Run *runs=pBiDi->runs; - int32_t i, length, insertRemove; - int32_t visualStart=0, markFound=0; - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) { - markFound++; - } - /* is it the run containing the visual index? */ - if(visualIndexcontrolCount>0) { - /* subtract the number of controls until the calculated visual index */ - Run *runs=pBiDi->runs; - int32_t i, j, start, limit, length, insertRemove; - int32_t visualStart=0, controlFound=0; - UChar uchar=pBiDi->text[logicalIndex]; - /* is the logical index pointing to a control ? */ - if(IS_BIDI_CONTROL_CHAR(uchar)) { - return UBIDI_MAP_NOWHERE; - } - /* loop on runs */ - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - /* calculated visual index is beyond this run? */ - if(visualIndex>=runs[i].visualLimit) { - controlFound-=insertRemove; - continue; - } - /* calculated visual index must be within current run */ - if(insertRemove==0) { - return visualIndex-controlFound; - } - if(IS_EVEN_RUN(runs[i].logicalStart)) { - /* LTR: check from run start to logical index */ - start=runs[i].logicalStart; - limit=logicalIndex; - } else { - /* RTL: check from logical index to run end */ - start=logicalIndex+1; - limit=GET_INDEX(runs[i].logicalStart)+length; - } - for(j=start; jtext[j]; - if(IS_BIDI_CONTROL_CHAR(uchar)) { - controlFound++; - } - } - return visualIndex-controlFound; - } - } - - return visualIndex; -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { - Run *runs; - int32_t i, runCount, start; - RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); - RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); - RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1); - /* we can do the trivial cases without the runs array */ - if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) { - if(pBiDi->direction==UBIDI_LTR) { - return visualIndex; - } - else if(pBiDi->direction==UBIDI_RTL) { - return pBiDi->length-visualIndex-1; - } - } - if(!ubidi_getRuns(pBiDi, pErrorCode)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return -1; - } - - runs=pBiDi->runs; - runCount=pBiDi->runCount; - if(pBiDi->insertPoints.size>0) { - /* handle inserted LRM/RLM */ - int32_t markFound=0, insertRemove; - int32_t visualStart=0, length; - runs=pBiDi->runs; - /* subtract number of marks until visual index */ - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { - if(visualIndex<=(visualStart+markFound)) { - return UBIDI_MAP_NOWHERE; - } - markFound++; - } - /* is adjusted visual index within this run? */ - if(visualIndex<(runs[i].visualLimit+markFound)) { - visualIndex-=markFound; - break; - } - if(insertRemove&(LRM_AFTER|RLM_AFTER)) { - if(visualIndex==(visualStart+length+markFound)) { - return UBIDI_MAP_NOWHERE; - } - markFound++; - } - } - } - else if(pBiDi->controlCount>0) { - /* handle removed BiDi control characters */ - int32_t controlFound=0, insertRemove, length; - int32_t logicalStart, logicalEnd, visualStart=0, j, k; - UChar uchar; - UBool evenRun; - /* add number of controls until visual index */ - for(i=0; ; i++, visualStart+=length) { - length=runs[i].visualLimit-visualStart; - insertRemove=runs[i].insertRemove; - /* is adjusted visual index beyond current run? */ - if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) { - controlFound-=insertRemove; - continue; - } - /* adjusted visual index is within current run */ - if(insertRemove==0) { - visualIndex+=controlFound; - break; - } - /* count non-control chars until visualIndex */ - logicalStart=runs[i].logicalStart; - evenRun=IS_EVEN_RUN(logicalStart); - REMOVE_ODD_BIT(logicalStart); - logicalEnd=logicalStart+length-1; - for(j=0; jtext[k]; - if(IS_BIDI_CONTROL_CHAR(uchar)) { - controlFound++; - } - if((visualIndex+controlFound)==(visualStart+j)) { - break; - } - } - visualIndex+=controlFound; - break; - } - } - /* handle all cases */ - if(runCount<=10) { - /* linear search for the run */ - for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} - } else { - /* binary search for the run */ - int32_t begin=0, limit=runCount; - - /* the middle if() is guaranteed to find the run, we don't need a loop limit */ - for(;;) { - i=(begin+limit)/2; - if(visualIndex>=runs[i].visualLimit) { - begin=i+1; - } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { - break; - } else { - limit=i; - } - } - } - - start=runs[i].logicalStart; - if(IS_EVEN_RUN(start)) { - /* LTR */ - /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ - if(i>0) { - visualIndex-=runs[i-1].visualLimit; - } - return start+visualIndex; - } else { - /* RTL */ - return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; - } -} - -U_CAPI void U_EXPORT2 -ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ - ubidi_countRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - /* no op */ - } else if(indexMap==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else { - /* fill a logical-to-visual index map using the runs[] */ - int32_t visualStart, visualLimit, i, j, k; - int32_t logicalStart, logicalLimit; - Run *runs=pBiDi->runs; - if (pBiDi->length<=0) { - return; - } - if (pBiDi->length>pBiDi->resultLength) { - uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t)); - } - - visualStart=0; - for(j=0; jrunCount; ++j) { - logicalStart=GET_INDEX(runs[j].logicalStart); - visualLimit=runs[j].visualLimit; - if(IS_EVEN_RUN(runs[j].logicalStart)) { - do { /* LTR */ - indexMap[logicalStart++]=visualStart++; - } while(visualStartinsertPoints.size>0) { - int32_t markFound=0, runCount=pBiDi->runCount; - int32_t length, insertRemove; - visualStart=0; - /* add number of marks found until each index */ - for(i=0; i0) { - logicalStart=GET_INDEX(runs[i].logicalStart); - logicalLimit=logicalStart+length; - for(j=logicalStart; jcontrolCount>0) { - int32_t controlFound=0, runCount=pBiDi->runCount; - int32_t length, insertRemove; - UBool evenRun; - UChar uchar; - visualStart=0; - /* subtract number of controls found until each index */ - for(i=0; itext[k]; - if(IS_BIDI_CONTROL_CHAR(uchar)) { - controlFound++; - indexMap[k]=UBIDI_MAP_NOWHERE; - continue; - } - indexMap[k]-=controlFound; - } - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { - RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); - if(indexMap==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ - ubidi_countRuns(pBiDi, pErrorCode); - if(U_SUCCESS(*pErrorCode)) { - /* fill a visual-to-logical index map using the runs[] */ - Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; - int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; - - if (pBiDi->resultLength<=0) { - return; - } - visualStart=0; - for(; runslogicalStart; - visualLimit=runs->visualLimit; - if(IS_EVEN_RUN(logicalStart)) { - do { /* LTR */ - *pi++ = logicalStart++; - } while(++visualStartinsertPoints.size>0) { - int32_t markFound=0, runCount=pBiDi->runCount; - int32_t insertRemove, i, j, k; - runs=pBiDi->runs; - /* count all inserted marks */ - for(i=0; iresultLength; - for(i=runCount-1; i>=0 && markFound>0; i--) { - insertRemove=runs[i].insertRemove; - if(insertRemove&(LRM_AFTER|RLM_AFTER)) { - indexMap[--k]= UBIDI_MAP_NOWHERE; - markFound--; - } - visualStart= i>0 ? runs[i-1].visualLimit : 0; - for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) { - indexMap[--k]=indexMap[j]; - } - if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { - indexMap[--k]= UBIDI_MAP_NOWHERE; - markFound--; - } - } - } - else if(pBiDi->controlCount>0) { - int32_t runCount=pBiDi->runCount, logicalEnd; - int32_t insertRemove, length, i, j, k, m; - UChar uchar; - UBool evenRun; - runs=pBiDi->runs; - visualStart=0; - /* move forward indexes by number of preceding controls */ - k=0; - for(i=0; itext[m]; - if(!IS_BIDI_CONTROL_CHAR(uchar)) { - indexMap[k++]=m; - } - } - } - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { - if(srcMap!=NULL && destMap!=NULL && length>0) { - const int32_t *pi; - int32_t destLength=-1, count=0; - /* find highest value and count positive indexes in srcMap */ - pi=srcMap+length; - while(pi>srcMap) { - if(*--pi>destLength) { - destLength=*pi; - } - if(*pi>=0) { - count++; - } - } - destLength++; /* add 1 for origin 0 */ - if(count0) { - if(*--pi>=0) { - destMap[*pi]=--length; - } else { - --length; - } - } - } -} diff --git a/deps/node/deps/icu-small/source/common/ubiditransform.cpp b/deps/node/deps/icu-small/source/common/ubiditransform.cpp deleted file mode 100644 index 394df609..00000000 --- a/deps/node/deps/icu-small/source/common/ubiditransform.cpp +++ /dev/null @@ -1,530 +0,0 @@ -/* -****************************************************************************** -* -* © 2016 and later: Unicode, Inc. and others. -* License & terms of use: http://www.unicode.org/copyright.html -* -****************************************************************************** -* file name: ubiditransform.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2016jul24 -* created by: Lina Kemmel -* -*/ - -#include "cmemory.h" -#include "unicode/ubidi.h" -#include "unicode/ustring.h" -#include "unicode/ushape.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" -#include "unicode/ubiditransform.h" - -/* Some convenience defines */ -#define LTR UBIDI_LTR -#define RTL UBIDI_RTL -#define LOGICAL UBIDI_LOGICAL -#define VISUAL UBIDI_VISUAL -#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL -#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR - -#define CHECK_LEN(STR, LEN, ERROR) { \ - if (LEN == 0) return 0; \ - if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ - if (LEN == -1) LEN = u_strlen(STR); \ - } - -#define MAX_ACTIONS 7 - -/** - * Typedef for a pointer to a function, which performs some operation (such as - * reordering, setting "inverse" mode, character mirroring, etc.). Return value - * indicates whether the text was changed in the course of this operation or - * not. - */ -typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); - -/** - * Structure that holds a predefined reordering scheme, including the following - * information: - *

    - *
  • an input base direction,
  • - *
  • an input order,
  • - *
  • an output base direction,
  • - *
  • an output order,
  • - *
  • a digit shaping direction,
  • - *
  • a letter shaping direction,
  • - *
  • a base direction that should be applied when the reordering engine is - * invoked (which can not always be derived from the caller-defined - * options),
  • - *
  • an array of pointers to functions that accomplish the bidi layout - * transformation.
  • - *
- */ -typedef struct { - UBiDiLevel inLevel; /* input level */ - UBiDiOrder inOrder; /* input order */ - UBiDiLevel outLevel; /* output level */ - UBiDiOrder outOrder; /* output order */ - uint32_t digitsDir; /* digit shaping direction */ - uint32_t lettersDir; /* letter shaping direction */ - UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ - const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ -} ReorderingScheme; - -struct UBiDiTransform { - UBiDi *pBidi; /* pointer to a UBiDi object */ - const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ - UChar *src; /* input text */ - UChar *dest; /* output text */ - uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ - uint32_t srcSize; /* input text capacity excluding the trailing zero */ - uint32_t destSize; /* output text capacity */ - uint32_t *pDestLength; /* number of UChars written to dest */ - uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ - uint32_t digits; /* digit option for ArabicShaping */ - uint32_t letters; /* letter option for ArabicShaping */ -}; - -U_DRAFT UBiDiTransform* U_EXPORT2 -ubiditransform_open(UErrorCode *pErrorCode) -{ - UBiDiTransform *pBiDiTransform = NULL; - if (U_SUCCESS(*pErrorCode)) { - pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); - if (pBiDiTransform == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - } - } - return pBiDiTransform; -} - -U_DRAFT void U_EXPORT2 -ubiditransform_close(UBiDiTransform *pBiDiTransform) -{ - if (pBiDiTransform != NULL) { - if (pBiDiTransform->pBidi != NULL) { - ubidi_close(pBiDiTransform->pBidi); - } - if (pBiDiTransform->src != NULL) { - uprv_free(pBiDiTransform->src); - } - uprv_free(pBiDiTransform); - } -} - -/** - * Performs Bidi resolution of text. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, - pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); - return FALSE; -} - -/** - * Performs basic reordering of text (Logical -> Visual LTR). - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, - static_cast(pTransform->reorderingOptions), pErrorCode); - - *pTransform->pDestLength = pTransform->srcLength; - pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; - return TRUE; -} - -/** - * Sets "inverse" mode on the UBiDi object. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - (void)pErrorCode; - ubidi_setInverse(pTransform->pBidi, TRUE); - ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); - return FALSE; -} - -/** - * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL - * transformation. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - (void)pErrorCode; - ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); - return FALSE; -} - -/** - * Performs string reverse. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - ubidi_writeReverse(pTransform->src, pTransform->srcLength, - pTransform->dest, pTransform->destSize, - UBIDI_REORDER_DEFAULT, pErrorCode); - *pTransform->pDestLength = pTransform->srcLength; - return TRUE; -} - -/** - * Applies a new value to the text that serves as input at the current - * processing step. This value is identical to the original one when we begin - * the processing, but usually changes as the transformation progresses. - * - * @param pTransform A pointer to the UBiDiTransform structure. - * @param newSrc A pointer whose value is to be used as input text. - * @param newLength A length of the new text in UChars. - * @param newSize A new source capacity in UChars. - * @param pErrorCode Pointer to the error code value. - */ -static void -updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, - uint32_t newSize, UErrorCode *pErrorCode) -{ - if (newSize < newLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return; - } - if (newSize > pTransform->srcSize) { - newSize += 50; // allocate slightly more than needed right now - if (pTransform->src != NULL) { - uprv_free(pTransform->src); - pTransform->src = NULL; - } - pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); - if (pTransform->src == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - //pTransform->srcLength = pTransform->srcSize = 0; - return; - } - pTransform->srcSize = newSize; - } - u_strncpy(pTransform->src, newSrc, newLength); - pTransform->srcLength = u_terminateUChars(pTransform->src, - pTransform->srcSize, newLength, pErrorCode); -} - -/** - * Calls a lower level shaping function. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param options Shaping options. - * @param pErrorCode Pointer to the error code value. - */ -static void -doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) -{ - *pTransform->pDestLength = u_shapeArabic(pTransform->src, - pTransform->srcLength, pTransform->dest, pTransform->destSize, - options, pErrorCode); -} - -/** - * Performs digit and letter shaping. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - if ((pTransform->letters | pTransform->digits) == 0) { - return FALSE; - } - if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { - doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, - pErrorCode); - } else { - doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); - if (U_SUCCESS(*pErrorCode)) { - updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, - *pTransform->pDestLength, pErrorCode); - doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, - pErrorCode); - } - } - return TRUE; -} - -/** - * Performs character mirroring. - * - * @param pTransform Pointer to the UBiDiTransform structure. - * @param pErrorCode Pointer to the error code value. - * - * @return Whether or not this function modifies the text. Besides the return - * value, the caller should also check U_SUCCESS(*pErrorCode). - */ -static UBool -action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) -{ - UChar32 c; - uint32_t i = 0, j = 0; - if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { - return FALSE; - } - if (pTransform->destSize < pTransform->srcLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return FALSE; - } - do { - UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; - U16_NEXT(pTransform->src, i, pTransform->srcLength, c); - U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); - } while (i < pTransform->srcLength); - - *pTransform->pDestLength = pTransform->srcLength; - pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; - return TRUE; -} - -/** - * All possible reordering schemes. - * - */ -static const ReorderingScheme Schemes[] = -{ - /* 0: Logical LTR => Visual LTR */ - {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_reorder, NULL}}, - /* 1: Logical RTL => Visual LTR */ - {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 2: Logical LTR => Visual RTL */ - {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, - /* 3: Logical RTL => Visual RTL */ - {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, - /* 4: Visual LTR => Logical RTL */ - {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, - /* 5: Visual RTL => Logical RTL */ - {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, - {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, - /* 6: Visual LTR => Logical LTR */ - {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 7: Visual RTL => Logical LTR */ - {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 8: Logical LTR => Logical RTL */ - {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, - /* 9: Logical RTL => Logical LTR */ - {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, - {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, - /* 10: Visual LTR => Visual RTL */ - {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, - /* 11: Visual RTL => Visual LTR */ - {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, - /* 12: Logical LTR => Logical LTR */ - {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 13: Logical RTL => Logical RTL */ - {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 14: Visual LTR => Visual LTR */ - {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_resolve, action_mirror, action_shapeArabic, NULL}}, - /* 15: Visual RTL => Visual RTL */ - {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, - {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} -}; - -static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); - -/** - * When the direction option is UBIDI_DEFAULT_LTR or - * UBIDI_DEFAULT_RTL, resolve the base direction according to that - * of the first strong bidi character. - */ -static void -resolveBaseDirection(const UChar *text, uint32_t length, - UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) -{ - switch (*pInLevel) { - case UBIDI_DEFAULT_LTR: - case UBIDI_DEFAULT_RTL: { - UBiDiLevel level = static_cast(ubidi_getBaseDirection(text, length)); - *pInLevel = static_cast(level != UBIDI_NEUTRAL) ? level - : *pInLevel == UBIDI_DEFAULT_RTL ? static_cast(RTL) : static_cast(LTR); - break; - } - default: - *pInLevel &= 1; - break; - } - switch (*pOutLevel) { - case UBIDI_DEFAULT_LTR: - case UBIDI_DEFAULT_RTL: - *pOutLevel = *pInLevel; - break; - default: - *pOutLevel &= 1; - break; - } -} - -/** - * Finds a valid ReorderingScheme matching the - * caller-defined scheme. - * - * @return A valid ReorderingScheme object or NULL - */ -static const ReorderingScheme* -findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, - UBiDiOrder inOrder, UBiDiOrder outOrder) -{ - uint32_t i; - for (i = 0; i < nSchemes; i++) { - const ReorderingScheme *pScheme = Schemes + i; - if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel - && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { - return pScheme; - } - } - return NULL; -} - -U_DRAFT uint32_t U_EXPORT2 -ubiditransform_transform(UBiDiTransform *pBiDiTransform, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - UBiDiLevel inParaLevel, UBiDiOrder inOrder, - UBiDiLevel outParaLevel, UBiDiOrder outOrder, - UBiDiMirroring doMirroring, uint32_t shapingOptions, - UErrorCode *pErrorCode) -{ - uint32_t destLength = 0; - UBool textChanged = FALSE; - const UBiDiTransform *pOrigTransform = pBiDiTransform; - const UBiDiAction *action = NULL; - - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (src == NULL || dest == NULL) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - CHECK_LEN(src, srcLength, pErrorCode); - CHECK_LEN(dest, destSize, pErrorCode); - - if (pBiDiTransform == NULL) { - pBiDiTransform = ubiditransform_open(pErrorCode); - if (U_FAILURE(*pErrorCode)) { - return 0; - } - } - /* Current limitation: in multiple paragraphs will be resolved according - to the 1st paragraph */ - resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); - - pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, - inOrder, outOrder); - if (pBiDiTransform->pActiveScheme == NULL) { - goto cleanup; - } - pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING - : UBIDI_REORDER_DEFAULT; - - /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text - scheme at the time shaping is invoked. */ - shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; - pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; - pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; - - updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); - if (U_FAILURE(*pErrorCode)) { - goto cleanup; - } - if (pBiDiTransform->pBidi == NULL) { - pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); - if (U_FAILURE(*pErrorCode)) { - goto cleanup; - } - } - pBiDiTransform->dest = dest; - pBiDiTransform->destSize = destSize; - pBiDiTransform->pDestLength = &destLength; - - /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ - for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { - if ((*action)(pBiDiTransform, pErrorCode)) { - if (action + 1) { - updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, - *pBiDiTransform->pDestLength, pErrorCode); - } - textChanged = TRUE; - } - } - ubidi_setInverse(pBiDiTransform->pBidi, FALSE); - - if (!textChanged && U_SUCCESS(*pErrorCode)) { - /* Text was not changed - just copy src to dest */ - if (destSize < srcLength) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - } else { - u_strncpy(dest, src, srcLength); - destLength = srcLength; - } - } -cleanup: - if (pOrigTransform != pBiDiTransform) { - ubiditransform_close(pBiDiTransform); - } else { - pBiDiTransform->dest = NULL; - pBiDiTransform->pDestLength = NULL; - pBiDiTransform->srcLength = 0; - pBiDiTransform->destSize = 0; - } - return U_FAILURE(*pErrorCode) ? 0 : destLength; -} diff --git a/deps/node/deps/icu-small/source/common/ubidiwrt.cpp b/deps/node/deps/icu-small/source/common/ubidiwrt.cpp deleted file mode 100644 index a89099da..00000000 --- a/deps/node/deps/icu-small/source/common/ubidiwrt.cpp +++ /dev/null @@ -1,640 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ubidiwrt.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999aug06 -* created by: Markus W. Scherer, updated by Matitiahu Allouche -* -* This file contains implementations for BiDi functions that use -* the core algorithm and core API to write reordered text. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/uchar.h" -#include "unicode/ubidi.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "ustr_imp.h" -#include "ubidiimp.h" - -/* - * The function implementations in this file are designed - * for UTF-16 and UTF-32, not for UTF-8. - * - * Assumptions that are not true for UTF-8: - * - Any code point always needs the same number of code units - * ("minimum-length-problem" of UTF-8) - * - The BiDi control characters need only one code unit each - * - * Further assumptions for all UTFs: - * - u_charMirror(c) needs the same number of code units as c - */ -#if UTF_SIZE==8 -# error reimplement ubidi_writeReordered() for UTF-8, see comment above -#endif - -#define IS_COMBINING(type) ((1UL<<(type))&(1UL<0); - return srcLength; - } - case UBIDI_DO_MIRRORING: { - /* do mirroring */ - int32_t i=0, j=0; - UChar32 c; - - if(destSize0) { - c=*src++; - if(!IS_BIDI_CONTROL_CHAR(c)) { - --remaining; - } - } - return destSize-remaining; - } - *dest++=c; - } - } while(--srcLength>0); - return destSize-remaining; - } - default: { - /* remove BiDi control characters and do mirroring */ - int32_t remaining=destSize; - int32_t i, j=0; - UChar32 c; - do { - i=0; - U16_NEXT(src, i, srcLength, c); - src+=i; - srcLength-=i; - if(!IS_BIDI_CONTROL_CHAR(c)) { - remaining-=i; - if(remaining<0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - - /* preflight the length */ - while(srcLength>0) { - c=*src++; - if(!IS_BIDI_CONTROL_CHAR(c)) { - --remaining; - } - --srcLength; - } - return destSize-remaining; - } - c=u_charMirror(c); - U16_APPEND_UNSAFE(dest, j, c); - } - } while(srcLength>0); - return j; - } - } /* end of switch */ -} - -static int32_t -doWriteReverse(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode) { - /* - * RTL run - - * - * RTL runs need to be copied to the destination in reverse order - * of code points, not code units, to keep Unicode characters intact. - * - * The general strategy for this is to read the source text - * in backward order, collect all code units for a code point - * (and optionally following combining characters, see below), - * and copy all these code units in ascending order - * to the destination for this run. - * - * Several options request whether combining characters - * should be kept after their base characters, - * whether BiDi control characters should be removed, and - * whether characters should be replaced by their mirror-image - * equivalent Unicode characters. - */ - int32_t i, j; - UChar32 c; - - /* optimize for several combinations of options */ - switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) { - case 0: - /* - * With none of the "complicated" options set, the destination - * run will have the same length as the source run, - * and there is no mirroring and no keeping combining characters - * with their base characters. - */ - if(destSize0); - break; - case UBIDI_KEEP_BASE_COMBINING: - /* - * Here, too, the destination - * run will have the same length as the source run, - * and there is no mirroring. - * We do need to keep combining characters with their base characters. - */ - if(destSize0 && IS_COMBINING(u_charType(c))); - - /* copy this "user character" */ - j=srcLength; - do { - *dest++=src[j++]; - } while(j0); - break; - default: - /* - * With several "complicated" options set, this is the most - * general and the slowest copying of an RTL run. - * We will do mirroring, remove BiDi controls, and - * keep combining characters with their base characters - * as requested. - */ - if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) { - i=srcLength; - } else { - /* we need to find out the destination length of the run, - which will not include the BiDi control characters */ - int32_t length=srcLength; - UChar ch; - - i=0; - do { - ch=*src++; - if(!IS_BIDI_CONTROL_CHAR(ch)) { - ++i; - } - } while(--length>0); - src-=srcLength; - } - - if(destSize0 && IS_COMBINING(u_charType(c))) { - U16_PREV(src, 0, srcLength, c); - } - } - - if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) { - /* do not copy this BiDi control character */ - continue; - } - - /* copy this "user character" */ - j=srcLength; - if(options&UBIDI_DO_MIRRORING) { - /* mirror only the base character */ - int32_t k=0; - c=u_charMirror(c); - U16_APPEND_UNSAFE(dest, k, c); - dest+=k; - j+=k; - } - while(j0); - break; - } /* end of switch */ - - return destSize; -} - -U_CAPI int32_t U_EXPORT2 -ubidi_writeReverse(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode) { - int32_t destLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* more error checking */ - if( src==NULL || srcLength<-1 || - destSize<0 || (destSize>0 && dest==NULL)) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* do input and output overlap? */ - if( dest!=NULL && - ((src>=dest && src=src && dest0) { - destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode); - } else { - /* nothing to do */ - destLength=0; - } - - return u_terminateUChars(dest, destSize, destLength, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ubidi_writeReordered(UBiDi *pBiDi, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode) { - const UChar *text; - UChar *saveDest; - int32_t length, destCapacity; - int32_t run, runCount, logicalStart, runLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* more error checking */ - if( pBiDi==NULL || - (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 || - destSize<0 || (destSize>0 && dest==NULL)) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* do input and output overlap? */ - if( dest!=NULL && - ((text>=dest && text=text && destoriginalLength))) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length==0) { - /* nothing to do */ - return u_terminateUChars(dest, destSize, 0, pErrorCode); - } - - runCount=ubidi_countRuns(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* destSize shrinks, later destination length=destCapacity-destSize */ - saveDest=dest; - destCapacity=destSize; - - /* - * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the - * reordering mode (checked below) is appropriate. - */ - if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { - options|=UBIDI_INSERT_LRM_FOR_NUMERIC; - options&=~UBIDI_REMOVE_BIDI_CONTROLS; - } - /* - * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS - * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC. - */ - if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { - options|=UBIDI_REMOVE_BIDI_CONTROLS; - options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; - } - /* - * If we do not perform the "inverse BiDi" algorithm, then we - * don't need to insert any LRMs, and don't need to test for it. - */ - if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) && - (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) && - (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && - (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) { - options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; - } - /* - * Iterate through all visual runs and copy the run text segments to - * the destination, according to the options. - * - * The tests for where to insert LRMs ignore the fact that there may be - * BN codes or non-BMP code points at the beginning and end of a run; - * they may insert LRMs unnecessarily but the tests are faster this way - * (this would have to be improved for UTF-8). - * - * Note that the only errors that are set by doWriteXY() are buffer overflow - * errors. Ignore them until the end, and continue for preflighting. - */ - if(!(options&UBIDI_OUTPUT_REVERSE)) { - /* forward output */ - if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { - /* do not insert BiDi controls */ - for(run=0; rundirProps; - const UChar *src; - UChar uc; - UBiDiDirection dir; - int32_t markFlag; - - for(run=0; runruns[run].insertRemove; - if(markFlag<0) { /* BiDi controls count */ - markFlag=0; - } - - if(UBIDI_LTR==dir) { - if((pBiDi->isInverse) && - (/*run>0 &&*/ dirProps[logicalStart]!=L)) { - markFlag |= LRM_BEFORE; - } - if (markFlag & LRM_BEFORE) { - uc=LRM_CHAR; - } - else if (markFlag & RLM_BEFORE) { - uc=RLM_CHAR; - } - else uc=0; - if(uc) { - if(destSize>0) { - *dest++=uc; - } - --destSize; - } - - runLength=doWriteForward(src, runLength, - dest, destSize, - (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if((pBiDi->isInverse) && - (/*run0) { - *dest++=uc; - } - --destSize; - } - } else { /* RTL run */ - if((pBiDi->isInverse) && - (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) { - markFlag |= RLM_BEFORE; - } - if (markFlag & LRM_BEFORE) { - uc=LRM_CHAR; - } - else if (markFlag & RLM_BEFORE) { - uc=RLM_CHAR; - } - else uc=0; - if(uc) { - if(destSize>0) { - *dest++=uc; - } - --destSize; - } - - runLength=doWriteReverse(src, runLength, - dest, destSize, - options, pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if((pBiDi->isInverse) && - (/*run0) { - *dest++=uc; - } - --destSize; - } - } - } - } - } else { - /* reverse output */ - if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { - /* do not insert BiDi controls */ - for(run=runCount; --run>=0;) { - if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) { - runLength=doWriteReverse(text+logicalStart, runLength, - dest, destSize, - (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - } else { - runLength=doWriteForward(text+logicalStart, runLength, - dest, destSize, - options, pErrorCode); - } - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - } - } else { - /* insert BiDi controls for "inverse BiDi" */ - const DirProp *dirProps=pBiDi->dirProps; - const UChar *src; - UBiDiDirection dir; - - for(run=runCount; --run>=0;) { - /* reverse output */ - dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength); - src=text+logicalStart; - - if(UBIDI_LTR==dir) { - if(/*run0) { - *dest++=LRM_CHAR; - } - --destSize; - } - - runLength=doWriteReverse(src, runLength, - dest, destSize, - (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if(/*run>0 &&*/ dirProps[logicalStart]!=L) { - if(destSize>0) { - *dest++=LRM_CHAR; - } - --destSize; - } - } else { - if(/*run0) { - *dest++=RLM_CHAR; - } - --destSize; - } - - runLength=doWriteForward(src, runLength, - dest, destSize, - options, pErrorCode); - if(dest!=NULL) { - dest+=runLength; - } - destSize-=runLength; - - if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) { - if(destSize>0) { - *dest++=RLM_CHAR; - } - --destSize; - } - } - } - } - } - - return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode); -} diff --git a/deps/node/deps/icu-small/source/common/ubrk.cpp b/deps/node/deps/icu-small/source/common/ubrk.cpp deleted file mode 100644 index f8bdf5a6..00000000 --- a/deps/node/deps/icu-small/source/common/ubrk.cpp +++ /dev/null @@ -1,357 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 1996-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/ubrk.h" - -#include "unicode/brkiter.h" -#include "unicode/uloc.h" -#include "unicode/ustring.h" -#include "unicode/uchriter.h" -#include "unicode/rbbi.h" -#include "rbbirb.h" -#include "uassert.h" -#include "cmemory.h" - -U_NAMESPACE_USE - -//------------------------------------------------------------------------------ -// -// ubrk_open Create a canned type of break iterator based on type (word, line, etc.) -// and locale. -// -//------------------------------------------------------------------------------ -U_CAPI UBreakIterator* U_EXPORT2 -ubrk_open(UBreakIteratorType type, - const char *locale, - const UChar *text, - int32_t textLength, - UErrorCode *status) -{ - - if(U_FAILURE(*status)) return 0; - - BreakIterator *result = 0; - - switch(type) { - - case UBRK_CHARACTER: - result = BreakIterator::createCharacterInstance(Locale(locale), *status); - break; - - case UBRK_WORD: - result = BreakIterator::createWordInstance(Locale(locale), *status); - break; - - case UBRK_LINE: - result = BreakIterator::createLineInstance(Locale(locale), *status); - break; - - case UBRK_SENTENCE: - result = BreakIterator::createSentenceInstance(Locale(locale), *status); - break; - - case UBRK_TITLE: - result = BreakIterator::createTitleInstance(Locale(locale), *status); - break; - - default: - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - - // check for allocation error - if (U_FAILURE(*status)) { - return 0; - } - if(result == 0) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - - UBreakIterator *uBI = (UBreakIterator *)result; - if (text != NULL) { - ubrk_setText(uBI, text, textLength, status); - } - return uBI; -} - - - -//------------------------------------------------------------------------------ -// -// ubrk_openRules open a break iterator from a set of break rules. -// Invokes the rule builder. -// -//------------------------------------------------------------------------------ -U_CAPI UBreakIterator* U_EXPORT2 -ubrk_openRules( const UChar *rules, - int32_t rulesLength, - const UChar *text, - int32_t textLength, - UParseError *parseErr, - UErrorCode *status) { - - if (status == NULL || U_FAILURE(*status)){ - return 0; - } - - BreakIterator *result = 0; - UnicodeString ruleString(rules, rulesLength); - result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status); - if(U_FAILURE(*status)) { - return 0; - } - - UBreakIterator *uBI = (UBreakIterator *)result; - if (text != NULL) { - ubrk_setText(uBI, text, textLength, status); - } - return uBI; -} - - -U_CAPI UBreakIterator* U_EXPORT2 -ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, - const UChar * text, int32_t textLength, - UErrorCode * status) -{ - if (U_FAILURE(*status)) { - return NULL; - } - if (rulesLength < 0) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - LocalPointer lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status); - if (U_FAILURE(*status)) { - return NULL; - } - UBreakIterator *uBI = reinterpret_cast(lpRBBI.orphan()); - if (text != NULL) { - ubrk_setText(uBI, text, textLength, status); - } - return uBI; -} - - -U_CAPI UBreakIterator * U_EXPORT2 -ubrk_safeClone( - const UBreakIterator *bi, - void * /*stackBuffer*/, - int32_t *pBufferSize, - UErrorCode *status) -{ - if (status == NULL || U_FAILURE(*status)){ - return NULL; - } - if (bi == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - if (pBufferSize != NULL) { - int32_t inputSize = *pBufferSize; - *pBufferSize = 1; - if (inputSize == 0) { - return NULL; // preflighting for deprecated functionality - } - } - BreakIterator *newBI = ((BreakIterator *)bi)->clone(); - if (newBI == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - *status = U_SAFECLONE_ALLOCATED_WARNING; - } - return (UBreakIterator *)newBI; -} - - - -U_CAPI void U_EXPORT2 -ubrk_close(UBreakIterator *bi) -{ - delete (BreakIterator *)bi; -} - -U_CAPI void U_EXPORT2 -ubrk_setText(UBreakIterator* bi, - const UChar* text, - int32_t textLength, - UErrorCode* status) -{ - UText ut = UTEXT_INITIALIZER; - utext_openUChars(&ut, text, textLength, status); - ((BreakIterator*)bi)->setText(&ut, *status); - // A stack allocated UText wrapping a UChar * string - // can be dumped without explicitly closing it. -} - - - -U_CAPI void U_EXPORT2 -ubrk_setUText(UBreakIterator *bi, - UText *text, - UErrorCode *status) -{ - ((BreakIterator*)bi)->setText(text, *status); -} - - - - - -U_CAPI int32_t U_EXPORT2 -ubrk_current(const UBreakIterator *bi) -{ - - return ((BreakIterator*)bi)->current(); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_next(UBreakIterator *bi) -{ - - return ((BreakIterator*)bi)->next(); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_previous(UBreakIterator *bi) -{ - - return ((BreakIterator*)bi)->previous(); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_first(UBreakIterator *bi) -{ - - return ((BreakIterator*)bi)->first(); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_last(UBreakIterator *bi) -{ - - return ((BreakIterator*)bi)->last(); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_preceding(UBreakIterator *bi, - int32_t offset) -{ - - return ((BreakIterator*)bi)->preceding(offset); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_following(UBreakIterator *bi, - int32_t offset) -{ - - return ((BreakIterator*)bi)->following(offset); -} - -U_CAPI const char* U_EXPORT2 -ubrk_getAvailable(int32_t index) -{ - - return uloc_getAvailable(index); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_countAvailable() -{ - - return uloc_countAvailable(); -} - - -U_CAPI UBool U_EXPORT2 -ubrk_isBoundary(UBreakIterator *bi, int32_t offset) -{ - return ((BreakIterator*)bi)->isBoundary(offset); -} - - -U_CAPI int32_t U_EXPORT2 -ubrk_getRuleStatus(UBreakIterator *bi) -{ - return ((BreakIterator*)bi)->getRuleStatus(); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status) -{ - return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status); -} - - -U_CAPI const char* U_EXPORT2 -ubrk_getLocaleByType(const UBreakIterator *bi, - ULocDataLocaleType type, - UErrorCode* status) -{ - if (bi == NULL) { - if (U_SUCCESS(*status)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return NULL; - } - return ((BreakIterator*)bi)->getLocaleID(type, *status); -} - - -U_CAPI void U_EXPORT2 -ubrk_refreshUText(UBreakIterator *bi, - UText *text, - UErrorCode *status) -{ - BreakIterator *bii = reinterpret_cast(bi); - bii->refreshInputText(text, *status); -} - -U_CAPI int32_t U_EXPORT2 -ubrk_getBinaryRules(UBreakIterator *bi, - uint8_t * binaryRules, int32_t rulesCapacity, - UErrorCode * status) -{ - if (U_FAILURE(*status)) { - return 0; - } - if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - RuleBasedBreakIterator* rbbi; - if ((rbbi = dynamic_cast(reinterpret_cast(bi))) == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - uint32_t rulesLength; - const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength); - if (rulesLength > INT32_MAX) { - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - if (binaryRules != NULL) { // if not preflighting - // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely - if ((int32_t)rulesLength > rulesCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - } else { - uprv_memcpy(binaryRules, returnedRules, rulesLength); - } - } - return (int32_t)rulesLength; -} - - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/deps/node/deps/icu-small/source/common/ubrkimpl.h b/deps/node/deps/icu-small/source/common/ubrkimpl.h deleted file mode 100644 index 8197f663..00000000 --- a/deps/node/deps/icu-small/source/common/ubrkimpl.h +++ /dev/null @@ -1,15 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2006, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef UBRKIMPL_H -#define UBRKIMPL_H - -#define U_ICUDATA_BRKITR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "brkitr" - -#endif /*UBRKIMPL_H*/ diff --git a/deps/node/deps/icu-small/source/common/ucase.cpp b/deps/node/deps/icu-small/source/common/ucase.cpp deleted file mode 100644 index 50c8d20c..00000000 --- a/deps/node/deps/icu-small/source/common/ucase.cpp +++ /dev/null @@ -1,1571 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucase.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004aug30 -* created by: Markus W. Scherer -* -* Low-level Unicode character/string case mapping code. -* Much code moved here (and modified) from uchar.c. -*/ - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "unicode/uset.h" -#include "unicode/udata.h" /* UDataInfo */ -#include "unicode/utf16.h" -#include "ucmndata.h" /* DataHeader */ -#include "udatamem.h" -#include "umutex.h" -#include "uassert.h" -#include "cmemory.h" -#include "utrie2.h" -#include "ucase.h" - -struct UCaseProps { - UDataMemory *mem; - const int32_t *indexes; - const uint16_t *exceptions; - const uint16_t *unfold; - - UTrie2 trie; - uint8_t formatVersion[4]; -}; - -/* ucase_props_data.h is machine-generated by gencase --csource */ -#define INCLUDED_FROM_UCASE_CPP -#include "ucase_props_data.h" - -/* set of property starts for UnicodeSet ------------------------------------ */ - -static UBool U_CALLCONV -_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { - /* add the start code point to the USet */ - const USetAdder *sa=(const USetAdder *)context; - sa->add(sa->set, start); - return TRUE; -} - -U_CFUNC void U_EXPORT2 -ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* add the start code point of each same-value range of the trie */ - utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); - - /* add code points with hardcoded properties, plus the ones following them */ - - /* (none right now, see comment below) */ - - /* - * Omit code points with hardcoded specialcasing properties - * because we do not build property UnicodeSets for them right now. - */ -} - -/* data access primitives --------------------------------------------------- */ - -U_CFUNC const UTrie2 * U_EXPORT2 -ucase_getTrie() { - return &ucase_props_singleton.trie; -} - -#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) - -/* number of bits in an 8-bit integer value */ -static const uint8_t flagsOffset[256]={ - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 -}; - -#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx))) -#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)] - -/* - * Get the value of an optional-value slot where HAS_SLOT(excWord, idx). - * - * @param excWord (in) initial exceptions word - * @param idx (in) desired slot index - * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++; - * moved to the last uint16_t of the value, use +1 for beginning of next slot - * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified - */ -#define GET_SLOT_VALUE(excWord, idx, pExc16, value) \ - if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \ - (pExc16)+=SLOT_OFFSET(excWord, idx); \ - (value)=*pExc16; \ - } else { \ - (pExc16)+=2*SLOT_OFFSET(excWord, idx); \ - (value)=*pExc16++; \ - (value)=((value)<<16)|*pExc16; \ - } - -/* simple case mappings ----------------------------------------------------- */ - -U_CAPI UChar32 U_EXPORT2 -ucase_tolower(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_IS_UPPER_OR_TITLE(props)) { - c+=UCASE_GET_DELTA(props); - } - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - uint16_t excWord=*pe++; - if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); - return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - } - if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { - GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c); - } - } - return c; -} - -U_CAPI UChar32 U_EXPORT2 -ucase_toupper(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)==UCASE_LOWER) { - c+=UCASE_GET_DELTA(props); - } - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - uint16_t excWord=*pe++; - if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) { - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); - return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - } - if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { - GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c); - } - } - return c; -} - -U_CAPI UChar32 U_EXPORT2 -ucase_totitle(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)==UCASE_LOWER) { - c+=UCASE_GET_DELTA(props); - } - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - uint16_t excWord=*pe++; - if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) { - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); - return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - } - int32_t idx; - if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) { - idx=UCASE_EXC_TITLE; - } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { - idx=UCASE_EXC_UPPER; - } else { - return c; - } - GET_SLOT_VALUE(excWord, idx, pe, c); - } - return c; -} - -static const UChar iDot[2] = { 0x69, 0x307 }; -static const UChar jDot[2] = { 0x6a, 0x307 }; -static const UChar iOgonekDot[3] = { 0x12f, 0x307 }; -static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 }; -static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 }; -static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 }; - - -U_CFUNC void U_EXPORT2 -ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { - uint16_t props; - - /* - * Hardcode the case closure of i and its relatives and ignore the - * data file data for these characters. - * The Turkic dotless i and dotted I with their case mapping conditions - * and case folding option make the related characters behave specially. - * This code matches their closure behavior to their case folding behavior. - */ - - switch(c) { - case 0x49: - /* regular i and I are in one equivalence class */ - sa->add(sa->set, 0x69); - return; - case 0x69: - sa->add(sa->set, 0x49); - return; - case 0x130: - /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ - sa->addString(sa->set, iDot, 2); - return; - case 0x131: - /* dotless i is in a class by itself */ - return; - default: - /* otherwise use the data file data */ - break; - } - - props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)!=UCASE_NONE) { - /* add the one simple case mapping, no matter what type it is */ - int32_t delta=UCASE_GET_DELTA(props); - if(delta!=0) { - sa->add(sa->set, c+delta); - } - } - } else { - /* - * c has exceptions, so there may be multiple simple and/or - * full case mappings. Add them all. - */ - const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - const UChar *closure; - uint16_t excWord=*pe++; - int32_t idx, closureLength, fullLength, length; - - pe0=pe; - - /* add all simple case mappings */ - for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) { - if(HAS_SLOT(excWord, idx)) { - pe=pe0; - GET_SLOT_VALUE(excWord, idx, pe, c); - sa->add(sa->set, c); - } - } - if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) { - pe=pe0; - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); - sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta); - } - - /* get the closure string pointer & length */ - if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) { - pe=pe0; - GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength); - closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */ - closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */ - } else { - closureLength=0; - closure=NULL; - } - - /* add the full case folding */ - if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { - pe=pe0; - GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength); - - /* start of full case mapping strings */ - ++pe; - - fullLength&=0xffff; /* bits 16 and higher are reserved */ - - /* skip the lowercase result string */ - pe+=fullLength&UCASE_FULL_LOWER; - fullLength>>=4; - - /* add the full case folding string */ - length=fullLength&0xf; - if(length!=0) { - sa->addString(sa->set, (const UChar *)pe, length); - pe+=length; - } - - /* skip the uppercase and titlecase strings */ - fullLength>>=4; - pe+=fullLength&0xf; - fullLength>>=4; - pe+=fullLength; - - closure=(const UChar *)pe; /* behind full case mappings */ - } - - /* add each code point in the closure string */ - for(idx=0; idxadd(sa->set, c); - } - } -} - -/* - * compare s, which has a length, with t, which has a maximum length or is NUL-terminated - * must be length>0 and max>0 and length<=max - */ -static inline int32_t -strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) { - int32_t c1, c2; - - max-=length; /* we require length<=max, so no need to decrement max in the loop */ - do { - c1=*s++; - c2=*t++; - if(c2==0) { - return 1; /* reached the end of t but not of s */ - } - c1-=c2; - if(c1!=0) { - return c1; /* return difference result */ - } - } while(--length>0); - /* ends with length==0 */ - - if(max==0 || *t==0) { - return 0; /* equal to length of both strings */ - } else { - return -max; /* return lengh difference */ - } -} - -U_CFUNC UBool U_EXPORT2 -ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) { - int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; - - if(ucase_props_singleton.unfold==NULL || s==NULL) { - return FALSE; /* no reverse case folding data, or no string */ - } - if(length<=1) { - /* the string is too short to find any match */ - /* - * more precise would be: - * if(!u_strHasMoreChar32Than(s, length, 1)) - * but this does not make much practical difference because - * a single supplementary code point would just not be found - */ - return FALSE; - } - - const uint16_t *unfold=ucase_props_singleton.unfold; - unfoldRows=unfold[UCASE_UNFOLD_ROWS]; - unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH]; - unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH]; - unfold+=unfoldRowWidth; - - if(length>unfoldStringWidth) { - /* the string is too long to find any match */ - return FALSE; - } - - /* do a binary search for the string */ - start=0; - limit=unfoldRows; - while(start(unfold+(i*unfoldRowWidth)); - result=strcmpMax(s, length, p, unfoldStringWidth); - - if(result==0) { - /* found the string: add each code point, and its case closure */ - UChar32 c; - - for(i=unfoldStringWidth; iadd(sa->set, c); - ucase_addCaseClosure(c, sa); - } - return TRUE; - } else if(result<0) { - limit=i; - } else /* result>0 */ { - start=i+1; - } - } - - return FALSE; /* string not found */ -} - -U_NAMESPACE_BEGIN - -FullCaseFoldingIterator::FullCaseFoldingIterator() - : unfold(reinterpret_cast(ucase_props_singleton.unfold)), - unfoldRows(unfold[UCASE_UNFOLD_ROWS]), - unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]), - unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]), - currentRow(0), - rowCpIndex(unfoldStringWidth) { - unfold+=unfoldRowWidth; -} - -UChar32 -FullCaseFoldingIterator::next(UnicodeString &full) { - // Advance past the last-delivered code point. - const UChar *p=unfold+(currentRow*unfoldRowWidth); - if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) { - ++currentRow; - p+=unfoldRowWidth; - rowCpIndex=unfoldStringWidth; - } - if(currentRow>=unfoldRows) { return U_SENTINEL; } - // Set "full" to the NUL-terminated string in the first unfold column. - int32_t length=unfoldStringWidth; - while(length>0 && p[length-1]==0) { --length; } - full.setTo(FALSE, p, length); - // Return the code point. - UChar32 c; - U16_NEXT_UNSAFE(p, rowCpIndex, c); - return c; -} - -namespace LatinCase { - -const int8_t TO_LOWER_NORMAL[LIMIT] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, - - 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC -}; - -const int8_t TO_LOWER_TR_LT[LIMIT] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0, - EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, - - 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, - 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC -}; - -const int8_t TO_UPPER_NORMAL[LIMIT] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, - -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, - -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, - -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, - - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, - - -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC -}; - -const int8_t TO_UPPER_TR[LIMIT] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32, - -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, - -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, - -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, - - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, - - -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, - 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC -}; - -} // namespace LatinCase - -U_NAMESPACE_END - -/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ -U_CAPI int32_t U_EXPORT2 -ucase_getType(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - return UCASE_GET_TYPE(props); -} - -/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */ -U_CAPI int32_t U_EXPORT2 -ucase_getTypeOrIgnorable(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - return UCASE_GET_TYPE_AND_IGNORABLE(props); -} - -/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */ -static inline int32_t -getDotType(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - return props&UCASE_DOT_MASK; - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK; - } -} - -U_CAPI UBool U_EXPORT2 -ucase_isSoftDotted(UChar32 c) { - return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED); -} - -U_CAPI UBool U_EXPORT2 -ucase_isCaseSensitive(UChar32 c) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - return (UBool)((props&UCASE_SENSITIVE)!=0); - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0); - } -} - -/* string casing ------------------------------------------------------------ */ - -/* - * These internal functions form the core of string case mappings. - * They map single code points to result code points or strings and take - * all necessary conditions (context, locale ID, options) into account. - * - * They do not iterate over the source or write to the destination - * so that the same functions are useful for non-standard string storage, - * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc. - * For the same reason, the "surrounding text" context is passed in as a - * UCaseContextIterator which does not make any assumptions about - * the underlying storage. - * - * This section contains helper functions that check for conditions - * in the input text surrounding the current code point - * according to SpecialCasing.txt. - * - * Each helper function gets the index - * - after the current code point if it looks at following text - * - before the current code point if it looks at preceding text - * - * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows: - * - * Final_Sigma - * C is preceded by a sequence consisting of - * a cased letter and a case-ignorable sequence, - * and C is not followed by a sequence consisting of - * an ignorable sequence and then a cased letter. - * - * More_Above - * C is followed by one or more characters of combining class 230 (ABOVE) - * in the combining character sequence. - * - * After_Soft_Dotted - * The last preceding character with combining class of zero before C - * was Soft_Dotted, - * and there is no intervening combining character class 230 (ABOVE). - * - * Before_Dot - * C is followed by combining dot above (U+0307). - * Any sequence of characters with a combining class that is neither 0 nor 230 - * may intervene between the current character and the combining dot above. - * - * The erratum from 2002-10-31 adds the condition - * - * After_I - * The last preceding base character was an uppercase I, and there is no - * intervening combining character class 230 (ABOVE). - * - * (See Jitterbug 2344 and the comments on After_I below.) - * - * Helper definitions in Unicode 3.2 UAX 21: - * - * D1. A character C is defined to be cased - * if it meets any of the following criteria: - * - * - The general category of C is Titlecase Letter (Lt) - * - In [CoreProps], C has one of the properties Uppercase, or Lowercase - * - Given D = NFD(C), then it is not the case that: - * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) - * (This third criterium does not add any characters to the list - * for Unicode 3.2. Ignored.) - * - * D2. A character C is defined to be case-ignorable - * if it meets either of the following criteria: - * - * - The general category of C is - * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or - * Letter Modifier (Lm), or Symbol Modifier (Sk) - * - C is one of the following characters - * U+0027 APOSTROPHE - * U+00AD SOFT HYPHEN (SHY) - * U+2019 RIGHT SINGLE QUOTATION MARK - * (the preferred character for apostrophe) - * - * D3. A case-ignorable sequence is a sequence of - * zero or more case-ignorable characters. - */ - -#define is_d(c) ((c)=='d' || (c)=='D') -#define is_e(c) ((c)=='e' || (c)=='E') -#define is_i(c) ((c)=='i' || (c)=='I') -#define is_l(c) ((c)=='l' || (c)=='L') -#define is_r(c) ((c)=='r' || (c)=='R') -#define is_t(c) ((c)=='t' || (c)=='T') -#define is_u(c) ((c)=='u' || (c)=='U') -#define is_z(c) ((c)=='z' || (c)=='Z') - -/* separator? */ -#define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0) - -/** - * Requires non-NULL locale ID but otherwise does the equivalent of - * checking for language codes as if uloc_getLanguage() were called: - * Accepts both 2- and 3-letter codes and accepts case variants. - */ -U_CFUNC int32_t -ucase_getCaseLocale(const char *locale) { - /* - * This function used to use uloc_getLanguage(), but the current code - * removes the dependency of this low-level code on uloc implementation code - * and is faster because not the whole locale ID has to be - * examined and copied/transformed. - * - * Because this code does not want to depend on uloc, the caller must - * pass in a non-NULL locale, i.e., may need to call uloc_getDefault(). - */ - char c=*locale++; - // Fastpath for English "en" which is often used for default (=root locale) case mappings, - // and for Chinese "zh": Very common but no special case mapping behavior. - // Then check lowercase vs. uppercase to reduce the number of comparisons - // for other locales without special behavior. - if(c=='e') { - /* el or ell? */ - c=*locale++; - if(is_l(c)) { - c=*locale++; - if(is_l(c)) { - c=*locale; - } - if(is_sep(c)) { - return UCASE_LOC_GREEK; - } - } - // en, es, ... -> root - } else if(c=='z') { - return UCASE_LOC_ROOT; -#if U_CHARSET_FAMILY==U_ASCII_FAMILY - } else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY - } else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z -#else -# error Unknown charset family! -#endif - // lowercase c - if(c=='t') { - /* tr or tur? */ - c=*locale++; - if(is_u(c)) { - c=*locale++; - } - if(is_r(c)) { - c=*locale; - if(is_sep(c)) { - return UCASE_LOC_TURKISH; - } - } - } else if(c=='a') { - /* az or aze? */ - c=*locale++; - if(is_z(c)) { - c=*locale++; - if(is_e(c)) { - c=*locale; - } - if(is_sep(c)) { - return UCASE_LOC_TURKISH; - } - } - } else if(c=='l') { - /* lt or lit? */ - c=*locale++; - if(is_i(c)) { - c=*locale++; - } - if(is_t(c)) { - c=*locale; - if(is_sep(c)) { - return UCASE_LOC_LITHUANIAN; - } - } - } else if(c=='n') { - /* nl or nld? */ - c=*locale++; - if(is_l(c)) { - c=*locale++; - if(is_d(c)) { - c=*locale; - } - if(is_sep(c)) { - return UCASE_LOC_DUTCH; - } - } - } - } else { - // uppercase c - // Same code as for lowercase c but also check for 'E'. - if(c=='T') { - /* tr or tur? */ - c=*locale++; - if(is_u(c)) { - c=*locale++; - } - if(is_r(c)) { - c=*locale; - if(is_sep(c)) { - return UCASE_LOC_TURKISH; - } - } - } else if(c=='A') { - /* az or aze? */ - c=*locale++; - if(is_z(c)) { - c=*locale++; - if(is_e(c)) { - c=*locale; - } - if(is_sep(c)) { - return UCASE_LOC_TURKISH; - } - } - } else if(c=='L') { - /* lt or lit? */ - c=*locale++; - if(is_i(c)) { - c=*locale++; - } - if(is_t(c)) { - c=*locale; - if(is_sep(c)) { - return UCASE_LOC_LITHUANIAN; - } - } - } else if(c=='E') { - /* el or ell? */ - c=*locale++; - if(is_l(c)) { - c=*locale++; - if(is_l(c)) { - c=*locale; - } - if(is_sep(c)) { - return UCASE_LOC_GREEK; - } - } - } else if(c=='N') { - /* nl or nld? */ - c=*locale++; - if(is_l(c)) { - c=*locale++; - if(is_d(c)) { - c=*locale; - } - if(is_sep(c)) { - return UCASE_LOC_DUTCH; - } - } - } - } - return UCASE_LOC_ROOT; -} - -/* - * Is followed by - * {case-ignorable}* cased - * ? - * (dir determines looking forward/backward) - * If a character is case-ignorable, it is skipped regardless of whether - * it is also cased or not. - */ -static UBool -isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) { - UChar32 c; - - if(iter==NULL) { - return FALSE; - } - - for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) { - int32_t type=ucase_getTypeOrIgnorable(c); - if(type&4) { - /* case-ignorable, continue with the loop */ - } else if(type!=UCASE_NONE) { - return TRUE; /* followed by cased letter */ - } else { - return FALSE; /* uncased and not case-ignorable */ - } - } - - return FALSE; /* not followed by cased letter */ -} - -/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ -static UBool -isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) { - UChar32 c; - int32_t dotType; - int8_t dir; - - if(iter==NULL) { - return FALSE; - } - - for(dir=-1; (c=iter(context, dir))>=0; dir=0) { - dotType=getDotType(c); - if(dotType==UCASE_SOFT_DOTTED) { - return TRUE; /* preceded by TYPE_i */ - } else if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ - } - } - - return FALSE; /* not preceded by TYPE_i */ -} - -/* - * See Jitterbug 2344: - * The condition After_I for Turkic-lowercasing of U+0307 combining dot above - * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because - * we made those releases compatible with Unicode 3.2 which had not fixed - * a related bug in SpecialCasing.txt. - * - * From the Jitterbug 2344 text: - * ... this bug is listed as a Unicode erratum - * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html - * - * There are two errors in SpecialCasing.txt. - * 1. Missing semicolons on two lines. ... [irrelevant for ICU] - * 2. An incorrect context definition. Correct as follows: - * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE - * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE - * --- - * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE - * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE - * where the context After_I is defined as: - * The last preceding base character was an uppercase I, and there is no - * intervening combining character class 230 (ABOVE). - * - * - * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as: - * - * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. - * # This matches the behavior of the canonically equivalent I-dot_above - * - * See also the description in this place in older versions of uchar.c (revision 1.100). - * - * Markus W. Scherer 2003-feb-15 - */ - -/* Is preceded by base character 'I' with no intervening cc=230 ? */ -static UBool -isPrecededBy_I(UCaseContextIterator *iter, void *context) { - UChar32 c; - int32_t dotType; - int8_t dir; - - if(iter==NULL) { - return FALSE; - } - - for(dir=-1; (c=iter(context, dir))>=0; dir=0) { - if(c==0x49) { - return TRUE; /* preceded by I */ - } - dotType=getDotType(c); - if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* preceded by different base character (not I), or intervening cc==230 */ - } - } - - return FALSE; /* not preceded by I */ -} - -/* Is followed by one or more cc==230 ? */ -static UBool -isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) { - UChar32 c; - int32_t dotType; - int8_t dir; - - if(iter==NULL) { - return FALSE; - } - - for(dir=1; (c=iter(context, dir))>=0; dir=0) { - dotType=getDotType(c); - if(dotType==UCASE_ABOVE) { - return TRUE; /* at least one cc==230 following */ - } else if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* next base character, no more cc==230 following */ - } - } - - return FALSE; /* no more cc==230 following */ -} - -/* Is followed by a dot above (without cc==230 in between) ? */ -static UBool -isFollowedByDotAbove(UCaseContextIterator *iter, void *context) { - UChar32 c; - int32_t dotType; - int8_t dir; - - if(iter==NULL) { - return FALSE; - } - - for(dir=1; (c=iter(context, dir))>=0; dir=0) { - if(c==0x307) { - return TRUE; - } - dotType=getDotType(c); - if(dotType!=UCASE_OTHER_ACCENT) { - return FALSE; /* next base character or cc==230 in between */ - } - } - - return FALSE; /* no dot above following */ -} - -U_CAPI int32_t U_EXPORT2 -ucase_toFullLower(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t loc) { - // The sign of the result has meaning, input must be non-negative so that it can be returned as is. - U_ASSERT(c >= 0); - UChar32 result=c; - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_IS_UPPER_OR_TITLE(props)) { - result=c+UCASE_GET_DELTA(props); - } - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; - uint16_t excWord=*pe++; - int32_t full; - - pe2=pe; - - if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { - /* use hardcoded conditions and mappings */ - - /* - * Test for conditional mappings first - * (otherwise the unconditional default mappings are always taken), - * then test for characters that have unconditional mappings in SpecialCasing.txt, - * then get the UnicodeData.txt mappings. - */ - if( loc==UCASE_LOC_LITHUANIAN && - /* base characters, find accents above */ - (((c==0x49 || c==0x4a || c==0x12e) && - isFollowedByMoreAbove(iter, context)) || - /* precomposed with accent above, no need to find one */ - (c==0xcc || c==0xcd || c==0x128)) - ) { - /* - # Lithuanian - - # Lithuanian retains the dot in a lowercase i when followed by accents. - - # Introduce an explicit dot above when lowercasing capital I's and J's - # whenever there are more accents above. - # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) - - 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I - 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J - 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK - 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE - 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE - 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE - */ - switch(c) { - case 0x49: /* LATIN CAPITAL LETTER I */ - *pString=iDot; - return 2; - case 0x4a: /* LATIN CAPITAL LETTER J */ - *pString=jDot; - return 2; - case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ - *pString=iOgonekDot; - return 2; - case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ - *pString=iDotGrave; - return 3; - case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ - *pString=iDotAcute; - return 3; - case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ - *pString=iDotTilde; - return 3; - default: - return 0; /* will not occur */ - } - /* # Turkish and Azeri */ - } else if(loc==UCASE_LOC_TURKISH && c==0x130) { - /* - # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri - # The following rules handle those cases. - - 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE - */ - return 0x69; - } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) { - /* - # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. - # This matches the behavior of the canonically equivalent I-dot_above - - 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE - 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE - */ - *pString=nullptr; - return 0; /* remove the dot (continue without output) */ - } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) { - /* - # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. - - 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I - 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I - */ - return 0x131; - } else if(c==0x130) { - /* - # Preserve canonical equivalence for I with dot. Turkic is handled below. - - 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE - */ - *pString=iDot; - return 2; - } else if( c==0x3a3 && - !isFollowedByCasedLetter(iter, context, 1) && - isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */ - ) { - /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ - /* - # Special case for final form of sigma - - 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA - */ - return 0x3c2; /* greek small final sigma */ - } else { - /* no known conditional special case mapping, use a normal mapping */ - } - } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { - GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); - full&=UCASE_FULL_LOWER; - if(full!=0) { - /* set the output pointer to the lowercase mapping */ - *pString=reinterpret_cast(pe+1); - - /* return the string length */ - return full; - } - } - - if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); - return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - } - if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { - GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result); - } - } - - return (result==c) ? ~result : result; -} - -/* internal */ -static int32_t -toUpperOrTitle(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t loc, - UBool upperNotTitle) { - // The sign of the result has meaning, input must be non-negative so that it can be returned as is. - U_ASSERT(c >= 0); - UChar32 result=c; - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_GET_TYPE(props)==UCASE_LOWER) { - result=c+UCASE_GET_DELTA(props); - } - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; - uint16_t excWord=*pe++; - int32_t full, idx; - - pe2=pe; - - if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { - /* use hardcoded conditions and mappings */ - if(loc==UCASE_LOC_TURKISH && c==0x69) { - /* - # Turkish and Azeri - - # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri - # The following rules handle those cases. - - # When uppercasing, i turns into a dotted capital I - - 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I - 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I - */ - return 0x130; - } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) { - /* - # Lithuanian - - # Lithuanian retains the dot in a lowercase i when followed by accents. - - # Remove DOT ABOVE after "i" with upper or titlecase - - 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE - */ - *pString=nullptr; - return 0; /* remove the dot (continue without output) */ - } else { - /* no known conditional special case mapping, use a normal mapping */ - } - } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { - GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); - - /* start of full case mapping strings */ - ++pe; - - /* skip the lowercase and case-folding result strings */ - pe+=full&UCASE_FULL_LOWER; - full>>=4; - pe+=full&0xf; - full>>=4; - - if(upperNotTitle) { - full&=0xf; - } else { - /* skip the uppercase result string */ - pe+=full&0xf; - full=(full>>4)&0xf; - } - - if(full!=0) { - /* set the output pointer to the result string */ - *pString=reinterpret_cast(pe); - - /* return the string length */ - return full; - } - } - - if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) { - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); - return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - } - if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) { - idx=UCASE_EXC_TITLE; - } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { - /* here, titlecase is same as uppercase */ - idx=UCASE_EXC_UPPER; - } else { - return ~c; - } - GET_SLOT_VALUE(excWord, idx, pe2, result); - } - - return (result==c) ? ~result : result; -} - -U_CAPI int32_t U_EXPORT2 -ucase_toFullUpper(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t caseLocale) { - return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE); -} - -U_CAPI int32_t U_EXPORT2 -ucase_toFullTitle(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t caseLocale) { - return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE); -} - -/* case folding ------------------------------------------------------------- */ - -/* - * Case folding is similar to lowercasing. - * The result may be a simple mapping, i.e., a single code point, or - * a full mapping, i.e., a string. - * If the case folding for a code point is the same as its simple (1:1) lowercase mapping, - * then only the lowercase mapping is stored. - * - * Some special cases are hardcoded because their conditions cannot be - * parsed and processed from CaseFolding.txt. - * - * Unicode 3.2 CaseFolding.txt specifies for its status field: - -# C: common case folding, common mappings shared by both simple and full mappings. -# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. -# S: simple case folding, mappings to single characters where different from F. -# T: special case for uppercase I and dotted uppercase I -# - For non-Turkic languages, this mapping is normally not used. -# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. -# -# Usage: -# A. To do a simple case folding, use the mappings with status C + S. -# B. To do a full case folding, use the mappings with status C + F. -# -# The mappings with status T can be used or omitted depending on the desired case-folding -# behavior. (The default option is to exclude them.) - - * Unicode 3.2 has 'T' mappings as follows: - -0049; T; 0131; # LATIN CAPITAL LETTER I -0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE - - * while the default mappings for these code points are: - -0049; C; 0069; # LATIN CAPITAL LETTER I -0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE - - * U+0130 has no simple case folding (simple-case-folds to itself). - */ - -/* return the simple case folding mapping for c */ -U_CAPI UChar32 U_EXPORT2 -ucase_fold(UChar32 c, uint32_t options) { - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_IS_UPPER_OR_TITLE(props)) { - c+=UCASE_GET_DELTA(props); - } - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); - uint16_t excWord=*pe++; - int32_t idx; - if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { - /* special case folding mappings, hardcoded */ - if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) { - /* default mappings */ - if(c==0x49) { - /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ - return 0x69; - } else if(c==0x130) { - /* no simple case folding for U+0130 */ - return c; - } - } else { - /* Turkic mappings */ - if(c==0x49) { - /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ - return 0x131; - } else if(c==0x130) { - /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ - return 0x69; - } - } - } - if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) { - return c; - } - if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); - return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - } - if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) { - idx=UCASE_EXC_FOLD; - } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { - idx=UCASE_EXC_LOWER; - } else { - return c; - } - GET_SLOT_VALUE(excWord, idx, pe, c); - } - return c; -} - -/* - * Issue for canonical caseless match (UAX #21): - * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve - * canonical equivalence, unlike default-option casefolding. - * For example, I-grave and I + grave fold to strings that are not canonically - * equivalent. - * For more details, see the comment in unorm_compare() in unorm.cpp - * and the intermediate prototype changes for Jitterbug 2021. - * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) - * - * This did not get fixed because it appears that it is not possible to fix - * it for uppercase and lowercase characters (I-grave vs. i-grave) - * together in a way that they still fold to common result strings. - */ - -U_CAPI int32_t U_EXPORT2 -ucase_toFullFolding(UChar32 c, - const UChar **pString, - uint32_t options) { - // The sign of the result has meaning, input must be non-negative so that it can be returned as is. - U_ASSERT(c >= 0); - UChar32 result=c; - uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); - if(!UCASE_HAS_EXCEPTION(props)) { - if(UCASE_IS_UPPER_OR_TITLE(props)) { - result=c+UCASE_GET_DELTA(props); - } - } else { - const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; - uint16_t excWord=*pe++; - int32_t full, idx; - - pe2=pe; - - if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { - /* use hardcoded conditions and mappings */ - if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) { - /* default mappings */ - if(c==0x49) { - /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ - return 0x69; - } else if(c==0x130) { - /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ - *pString=iDot; - return 2; - } - } else { - /* Turkic mappings */ - if(c==0x49) { - /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ - return 0x131; - } else if(c==0x130) { - /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ - return 0x69; - } - } - } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { - GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); - - /* start of full case mapping strings */ - ++pe; - - /* skip the lowercase result string */ - pe+=full&UCASE_FULL_LOWER; - full=(full>>4)&0xf; - - if(full!=0) { - /* set the output pointer to the result string */ - *pString=reinterpret_cast(pe); - - /* return the string length */ - return full; - } - } - - if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) { - return ~c; - } - if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { - int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); - return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; - } - if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) { - idx=UCASE_EXC_FOLD; - } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { - idx=UCASE_EXC_LOWER; - } else { - return ~c; - } - GET_SLOT_VALUE(excWord, idx, pe2, result); - } - - return (result==c) ? ~result : result; -} - -/* case mapping properties API ---------------------------------------------- */ - -/* public API (see uchar.h) */ - -U_CAPI UBool U_EXPORT2 -u_isULowercase(UChar32 c) { - return (UBool)(UCASE_LOWER==ucase_getType(c)); -} - -U_CAPI UBool U_EXPORT2 -u_isUUppercase(UChar32 c) { - return (UBool)(UCASE_UPPER==ucase_getType(c)); -} - -/* Transforms the Unicode character to its lower case equivalent.*/ -U_CAPI UChar32 U_EXPORT2 -u_tolower(UChar32 c) { - return ucase_tolower(c); -} - -/* Transforms the Unicode character to its upper case equivalent.*/ -U_CAPI UChar32 U_EXPORT2 -u_toupper(UChar32 c) { - return ucase_toupper(c); -} - -/* Transforms the Unicode character to its title case equivalent.*/ -U_CAPI UChar32 U_EXPORT2 -u_totitle(UChar32 c) { - return ucase_totitle(c); -} - -/* return the simple case folding mapping for c */ -U_CAPI UChar32 U_EXPORT2 -u_foldCase(UChar32 c, uint32_t options) { - return ucase_fold(c, options); -} - -U_CFUNC int32_t U_EXPORT2 -ucase_hasBinaryProperty(UChar32 c, UProperty which) { - /* case mapping properties */ - const UChar *resultString; - switch(which) { - case UCHAR_LOWERCASE: - return (UBool)(UCASE_LOWER==ucase_getType(c)); - case UCHAR_UPPERCASE: - return (UBool)(UCASE_UPPER==ucase_getType(c)); - case UCHAR_SOFT_DOTTED: - return ucase_isSoftDotted(c); - case UCHAR_CASE_SENSITIVE: - return ucase_isCaseSensitive(c); - case UCHAR_CASED: - return (UBool)(UCASE_NONE!=ucase_getType(c)); - case UCHAR_CASE_IGNORABLE: - return (UBool)(ucase_getTypeOrIgnorable(c)>>2); - /* - * Note: The following Changes_When_Xyz are defined as testing whether - * the NFD form of the input changes when Xyz-case-mapped. - * However, this simpler implementation of these properties, - * ignoring NFD, passes the tests. - * The implementation needs to be changed if the tests start failing. - * When that happens, optimizations should be used to work with the - * per-single-code point ucase_toFullXyz() functions unless - * the NFD form has more than one code point, - * and the property starts set needs to be the union of the - * start sets for normalization and case mappings. - */ - case UCHAR_CHANGES_WHEN_LOWERCASED: - return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); - case UCHAR_CHANGES_WHEN_UPPERCASED: - return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); - case UCHAR_CHANGES_WHEN_TITLECASED: - return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); - /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ - case UCHAR_CHANGES_WHEN_CASEMAPPED: - return (UBool)( - ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || - ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || - ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); - default: - return FALSE; - } -} diff --git a/deps/node/deps/icu-small/source/common/ucase.h b/deps/node/deps/icu-small/source/common/ucase.h deleted file mode 100644 index b0a453b8..00000000 --- a/deps/node/deps/icu-small/source/common/ucase.h +++ /dev/null @@ -1,444 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucase.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004aug30 -* created by: Markus W. Scherer -* -* Low-level Unicode character/string case mapping code. -*/ - -#ifndef __UCASE_H__ -#define __UCASE_H__ - -#include "unicode/utypes.h" -#include "unicode/uset.h" -#include "putilimp.h" -#include "uset_imp.h" -#include "udataswp.h" -#include "utrie2.h" - -#ifdef __cplusplus -U_NAMESPACE_BEGIN - -class UnicodeString; - -U_NAMESPACE_END -#endif - -/* library API -------------------------------------------------------------- */ - -U_CFUNC void U_EXPORT2 -ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); - -/** - * Requires non-NULL locale ID but otherwise does the equivalent of - * checking for language codes as if uloc_getLanguage() were called: - * Accepts both 2- and 3-letter codes and accepts case variants. - */ -U_CFUNC int32_t -ucase_getCaseLocale(const char *locale); - -/* Casing locale types for ucase_getCaseLocale */ -enum { - UCASE_LOC_UNKNOWN, - UCASE_LOC_ROOT, - UCASE_LOC_TURKISH, - UCASE_LOC_LITHUANIAN, - UCASE_LOC_GREEK, - UCASE_LOC_DUTCH -}; - -/** - * Bit mask for getting just the options from a string compare options word - * that are relevant for case-insensitive string comparison. - * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. - * @internal - */ -#define _STRCASECMP_OPTIONS_MASK 0xffff - -/** - * Bit mask for getting just the options from a string compare options word - * that are relevant for case folding (of a single string or code point). - * - * Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I. - * It is conceivable that at some point we might use one more bit for using uppercase sharp s. - * It is conceivable that at some point we might want the option to use only simple case foldings - * when operating on strings. - * - * See stringoptions.h. - * @internal - */ -#define _FOLD_CASE_OPTIONS_MASK 7 - -/* single-code point functions */ - -U_CAPI UChar32 U_EXPORT2 -ucase_tolower(UChar32 c); - -U_CAPI UChar32 U_EXPORT2 -ucase_toupper(UChar32 c); - -U_CAPI UChar32 U_EXPORT2 -ucase_totitle(UChar32 c); - -U_CAPI UChar32 U_EXPORT2 -ucase_fold(UChar32 c, uint32_t options); - -/** - * Adds all simple case mappings and the full case folding for c to sa, - * and also adds special case closure mappings. - * c itself is not added. - * For example, the mappings - * - for s include long s - * - for sharp s include ss - * - for k include the Kelvin sign - */ -U_CFUNC void U_EXPORT2 -ucase_addCaseClosure(UChar32 c, const USetAdder *sa); - -/** - * Maps the string to single code points and adds the associated case closure - * mappings. - * The string is mapped to code points if it is their full case folding string. - * In other words, this performs a reverse full case folding and then - * adds the case closure items of the resulting code points. - * If the string is found and its closure applied, then - * the string itself is added as well as part of its code points' closure. - * It must be length>=0. - * - * @return TRUE if the string was found - */ -U_CFUNC UBool U_EXPORT2 -ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa); - -#ifdef __cplusplus -U_NAMESPACE_BEGIN - -/** - * Iterator over characters with more than one code point in the full default Case_Folding. - */ -class U_COMMON_API FullCaseFoldingIterator { -public: - /** Constructor. */ - FullCaseFoldingIterator(); - /** - * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding. - * Returns a negative cp value at the end of the iteration. - */ - UChar32 next(UnicodeString &full); -private: - FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy - FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment - - const UChar *unfold; - int32_t unfoldRows; - int32_t unfoldRowWidth; - int32_t unfoldStringWidth; - int32_t currentRow; - int32_t rowCpIndex; -}; - -/** - * Fast case mapping data for ASCII/Latin. - * Linear arrays of delta bytes: 0=no mapping; EXC=exception. - * Deltas must not cross the ASCII boundary, or else they cannot be easily used - * in simple UTF-8 code. - */ -namespace LatinCase { - -/** Case mapping/folding data for code points up to U+017F. */ -constexpr UChar LIMIT = 0x180; -/** U+017F case-folds and uppercases crossing the ASCII boundary. */ -constexpr UChar LONG_S = 0x17f; -/** Exception: Complex mapping, or too-large delta. */ -constexpr int8_t EXC = -0x80; - -/** Deltas for lowercasing for most locales, and default case folding. */ -extern const int8_t TO_LOWER_NORMAL[LIMIT]; -/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ -extern const int8_t TO_LOWER_TR_LT[LIMIT]; - -/** Deltas for uppercasing for most locales. */ -extern const int8_t TO_UPPER_NORMAL[LIMIT]; -/** Deltas for uppercasing for tr/az. */ -extern const int8_t TO_UPPER_TR[LIMIT]; - -} // namespace LatinCase - -U_NAMESPACE_END -#endif - -/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ -U_CAPI int32_t U_EXPORT2 -ucase_getType(UChar32 c); - -/** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */ -U_CAPI int32_t U_EXPORT2 -ucase_getTypeOrIgnorable(UChar32 c); - -U_CAPI UBool U_EXPORT2 -ucase_isSoftDotted(UChar32 c); - -U_CAPI UBool U_EXPORT2 -ucase_isCaseSensitive(UChar32 c); - -/* string case mapping functions */ - -U_CDECL_BEGIN - -/** - * Iterator function for string case mappings, which need to look at the - * context (surrounding text) of a given character for conditional mappings. - * - * The iterator only needs to go backward or forward away from the - * character in question. It does not use any indexes on this interface. - * It does not support random access or an arbitrary change of - * iteration direction. - * - * The code point being case-mapped itself is never returned by - * this iterator. - * - * @param context A pointer to the iterator's working data. - * @param dir If <0 then start iterating backward from the character; - * if >0 then start iterating forward from the character; - * if 0 then continue iterating in the current direction. - * @return Next code point, or <0 when the iteration is done. - */ -typedef UChar32 U_CALLCONV -UCaseContextIterator(void *context, int8_t dir); - -/** - * Sample struct which may be used by some implementations of - * UCaseContextIterator. - */ -struct UCaseContext { - void *p; - int32_t start, index, limit; - int32_t cpStart, cpLimit; - int8_t dir; - int8_t b1, b2, b3; -}; -typedef struct UCaseContext UCaseContext; - -U_CDECL_END - -#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -enum { - /** - * For string case mappings, a single character (a code point) is mapped - * either to itself (in which case in-place mapping functions do nothing), - * or to another single code point, or to a string. - * Aside from the string contents, these are indicated with a single int32_t - * value as follows: - * - * Mapping to self: Negative values (~self instead of -self to support U+0000) - * - * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH - * - * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is - * returned. Note that the string result may indeed have zero length. - */ - UCASE_MAX_STRING_LENGTH=0x1f -}; - -/** - * Get the full lowercase mapping for c. - * - * @param csp Case mapping properties. - * @param c Character to be mapped. - * @param iter Character iterator, used for context-sensitive mappings. - * See UCaseContextIterator for details. - * If iter==NULL then a context-independent result is returned. - * @param context Pointer to be passed into iter. - * @param pString If the mapping result is a string, then the pointer is - * written to *pString. - * @param caseLocale Case locale value from ucase_getCaseLocale(). - * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH. - * - * @see UCaseContextIterator - * @see UCASE_MAX_STRING_LENGTH - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ucase_toFullLower(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t caseLocale); - -U_CAPI int32_t U_EXPORT2 -ucase_toFullUpper(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t caseLocale); - -U_CAPI int32_t U_EXPORT2 -ucase_toFullTitle(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t caseLocale); - -U_CAPI int32_t U_EXPORT2 -ucase_toFullFolding(UChar32 c, - const UChar **pString, - uint32_t options); - -U_CFUNC int32_t U_EXPORT2 -ucase_hasBinaryProperty(UChar32 c, UProperty which); - - -U_CDECL_BEGIN - -/** - * @internal - */ -typedef int32_t U_CALLCONV -UCaseMapFull(UChar32 c, - UCaseContextIterator *iter, void *context, - const UChar **pString, - int32_t caseLocale); - -U_CDECL_END - -/* file definitions --------------------------------------------------------- */ - -#define UCASE_DATA_NAME "ucase" -#define UCASE_DATA_TYPE "icu" - -/* format "cAsE" */ -#define UCASE_FMT_0 0x63 -#define UCASE_FMT_1 0x41 -#define UCASE_FMT_2 0x53 -#define UCASE_FMT_3 0x45 - -/* indexes into indexes[] */ -enum { - UCASE_IX_INDEX_TOP, - UCASE_IX_LENGTH, - UCASE_IX_TRIE_SIZE, - UCASE_IX_EXC_LENGTH, - UCASE_IX_UNFOLD_LENGTH, - - UCASE_IX_MAX_FULL_LENGTH=15, - UCASE_IX_TOP=16 -}; - -/* definitions for 16-bit case properties word ------------------------------ */ - -U_CFUNC const UTrie2 * U_EXPORT2 -ucase_getTrie(); - -/* 2-bit constants for types of cased characters */ -#define UCASE_TYPE_MASK 3 -enum { - UCASE_NONE, - UCASE_LOWER, - UCASE_UPPER, - UCASE_TITLE -}; - -#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) -#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) - -#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2) - -#define UCASE_IGNORABLE 4 -#define UCASE_EXCEPTION 8 -#define UCASE_SENSITIVE 0x10 - -#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) - -#define UCASE_DOT_MASK 0x60 -enum { - UCASE_NO_DOT=0, /* normal characters with cc=0 */ - UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */ - UCASE_ABOVE=0x40, /* "above" accents with cc=230 */ - UCASE_OTHER_ACCENT=0x60 /* other accent character (0>UCASE_DELTA_SHIFT) -#else -# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT)) -#endif - -/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */ -#define UCASE_EXC_SHIFT 4 -#define UCASE_EXC_MASK 0xfff0 -#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1) - -/* definitions for 16-bit main exceptions word ------------------------------ */ - -/* first 8 bits indicate values in optional slots */ -enum { - UCASE_EXC_LOWER, - UCASE_EXC_FOLD, - UCASE_EXC_UPPER, - UCASE_EXC_TITLE, - UCASE_EXC_DELTA, - UCASE_EXC_5, /* reserved */ - UCASE_EXC_CLOSURE, - UCASE_EXC_FULL_MAPPINGS, - UCASE_EXC_ALL_SLOTS /* one past the last slot */ -}; - -/* each slot is 2 uint16_t instead of 1 */ -#define UCASE_EXC_DOUBLE_SLOTS 0x100 - -enum { - UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200, - UCASE_EXC_DELTA_IS_NEGATIVE=0x400, - UCASE_EXC_SENSITIVE=0x800 -}; - -/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<locale; -} - -U_CAPI uint32_t U_EXPORT2 -ucasemap_getOptions(const UCaseMap *csm) { - return csm->options; -} - -U_CAPI void U_EXPORT2 -ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - if (locale != NULL && *locale == 0) { - csm->locale[0] = 0; - csm->caseLocale = UCASE_LOC_ROOT; - return; - } - - int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { - *pErrorCode=U_ZERO_ERROR; - /* we only really need the language code for case mappings */ - length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); - } - if(length==sizeof(csm->locale)) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - if(U_SUCCESS(*pErrorCode)) { - csm->caseLocale=UCASE_LOC_UNKNOWN; - csm->caseLocale = ucase_getCaseLocale(csm->locale); - } else { - csm->locale[0]=0; - csm->caseLocale = UCASE_LOC_ROOT; - } -} - -U_CAPI void U_EXPORT2 -ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - csm->options=options; -} - -/* UTF-8 string case mappings ----------------------------------------------- */ - -/* TODO(markus): Move to a new, separate utf8case.cpp file. */ - -namespace { - -/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ -inline UBool -appendResult(int32_t cpLength, int32_t result, const UChar *s, - ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) { - U_ASSERT(U_SUCCESS(errorCode)); - - /* decode the result */ - if(result<0) { - /* (not) original code point */ - if(edits!=NULL) { - edits->addUnchanged(cpLength); - } - if((options & U_OMIT_UNCHANGED_TEXT) == 0) { - ByteSinkUtil::appendCodePoint(cpLength, ~result, sink); - } - } else { - if(result<=UCASE_MAX_STRING_LENGTH) { - // string: "result" is the UTF-16 length - return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode); - } else { - ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits); - } - } - return TRUE; -} - -// See unicode/utf8.h U8_APPEND_UNSAFE(). -inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } -inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } - -UChar32 U_CALLCONV -utf8_caseContextIterator(void *context, int8_t dir) { - UCaseContext *csc=(UCaseContext *)context; - UChar32 c; - - if(dir<0) { - /* reset for backward iteration */ - csc->index=csc->cpStart; - csc->dir=dir; - } else if(dir>0) { - /* reset for forward iteration */ - csc->index=csc->cpLimit; - csc->dir=dir; - } else { - /* continue current iteration direction */ - dir=csc->dir; - } - - if(dir<0) { - if(csc->startindex) { - U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c); - return c; - } - } else { - if(csc->indexlimit) { - U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c); - return c; - } - } - return U_SENTINEL; -} - -/** - * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. - * caseLocale < 0: Case-folds [srcStart..srcLimit[. - */ -void toLower(int32_t caseLocale, uint32_t options, - const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, - icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { - const int8_t *latinToLower; - if (caseLocale == UCASE_LOC_ROOT || - (caseLocale >= 0 ? - !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : - (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { - latinToLower = LatinCase::TO_LOWER_NORMAL; - } else { - latinToLower = LatinCase::TO_LOWER_TR_LT; - } - const UTrie2 *trie = ucase_getTrie(); - int32_t prev = srcStart; - int32_t srcIndex = srcStart; - for (;;) { - // fast path for simple cases - int32_t cpStart; - UChar32 c; - for (;;) { - if (U_FAILURE(errorCode) || srcIndex >= srcLimit) { - c = U_SENTINEL; - break; - } - uint8_t lead = src[srcIndex++]; - if (lead <= 0x7f) { - int8_t d = latinToLower[lead]; - if (d == LatinCase::EXC) { - cpStart = srcIndex - 1; - c = lead; - break; - } - if (d == 0) { continue; } - ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, - sink, options, edits, errorCode); - char ascii = (char)(lead + d); - sink.Append(&ascii, 1); - if (edits != nullptr) { - edits->addReplace(1, 1); - } - prev = srcIndex; - continue; - } else if (lead < 0xe3) { - uint8_t t; - if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit && - (t = src[srcIndex] - 0x80) <= 0x3f) { - // U+0080..U+017F - ++srcIndex; - c = ((lead - 0xc0) << 6) | t; - int8_t d = latinToLower[c]; - if (d == LatinCase::EXC) { - cpStart = srcIndex - 2; - break; - } - if (d == 0) { continue; } - ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, - sink, options, edits, errorCode); - ByteSinkUtil::appendTwoBytes(c + d, sink); - if (edits != nullptr) { - edits->addReplace(2, 2); - } - prev = srcIndex; - continue; - } - } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && - (srcIndex + 2) <= srcLimit && - U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { - // most of CJK: no case mappings - srcIndex += 2; - continue; - } - cpStart = --srcIndex; - U8_NEXT(src, srcIndex, srcLimit, c); - if (c < 0) { - // ill-formed UTF-8 - continue; - } - uint16_t props = UTRIE2_GET16(trie, c); - if (UCASE_HAS_EXCEPTION(props)) { break; } - int32_t delta; - if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { - continue; - } - ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, - sink, options, edits, errorCode); - ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); - prev = srcIndex; - } - if (c < 0) { - break; - } - // slow path - const UChar *s; - if (caseLocale >= 0) { - csc->cpStart = cpStart; - csc->cpLimit = srcIndex; - c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale); - } else { - c = ucase_toFullFolding(c, &s, options); - } - if (c >= 0) { - ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, - sink, options, edits, errorCode); - appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); - prev = srcIndex; - } - } - ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, - sink, options, edits, errorCode); -} - -void toUpper(int32_t caseLocale, uint32_t options, - const uint8_t *src, UCaseContext *csc, int32_t srcLength, - icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { - const int8_t *latinToUpper; - if (caseLocale == UCASE_LOC_TURKISH) { - latinToUpper = LatinCase::TO_UPPER_TR; - } else { - latinToUpper = LatinCase::TO_UPPER_NORMAL; - } - const UTrie2 *trie = ucase_getTrie(); - int32_t prev = 0; - int32_t srcIndex = 0; - for (;;) { - // fast path for simple cases - int32_t cpStart; - UChar32 c; - for (;;) { - if (U_FAILURE(errorCode) || srcIndex >= srcLength) { - c = U_SENTINEL; - break; - } - uint8_t lead = src[srcIndex++]; - if (lead <= 0x7f) { - int8_t d = latinToUpper[lead]; - if (d == LatinCase::EXC) { - cpStart = srcIndex - 1; - c = lead; - break; - } - if (d == 0) { continue; } - ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, - sink, options, edits, errorCode); - char ascii = (char)(lead + d); - sink.Append(&ascii, 1); - if (edits != nullptr) { - edits->addReplace(1, 1); - } - prev = srcIndex; - continue; - } else if (lead < 0xe3) { - uint8_t t; - if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength && - (t = src[srcIndex] - 0x80) <= 0x3f) { - // U+0080..U+017F - ++srcIndex; - c = ((lead - 0xc0) << 6) | t; - int8_t d = latinToUpper[c]; - if (d == LatinCase::EXC) { - cpStart = srcIndex - 2; - break; - } - if (d == 0) { continue; } - ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, - sink, options, edits, errorCode); - ByteSinkUtil::appendTwoBytes(c + d, sink); - if (edits != nullptr) { - edits->addReplace(2, 2); - } - prev = srcIndex; - continue; - } - } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && - (srcIndex + 2) <= srcLength && - U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { - // most of CJK: no case mappings - srcIndex += 2; - continue; - } - cpStart = --srcIndex; - U8_NEXT(src, srcIndex, srcLength, c); - if (c < 0) { - // ill-formed UTF-8 - continue; - } - uint16_t props = UTRIE2_GET16(trie, c); - if (UCASE_HAS_EXCEPTION(props)) { break; } - int32_t delta; - if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { - continue; - } - ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, - sink, options, edits, errorCode); - ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); - prev = srcIndex; - } - if (c < 0) { - break; - } - // slow path - csc->cpStart = cpStart; - csc->cpLimit = srcIndex; - const UChar *s; - c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale); - if (c >= 0) { - ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, - sink, options, edits, errorCode); - appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); - prev = srcIndex; - } - } - ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, - sink, options, edits, errorCode); -} - -} // namespace - -#if !UCONFIG_NO_BREAK_ITERATION - -U_CFUNC void U_CALLCONV -ucasemap_internalUTF8ToTitle( - int32_t caseLocale, uint32_t options, BreakIterator *iter, - const uint8_t *src, int32_t srcLength, - ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) { - return; - } - - /* set up local variables */ - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - int32_t prev=0; - UBool isFirstIndex=TRUE; - - /* titlecasing loop */ - while(prevfirst(); - } else { - index=iter->next(); - } - if(index==UBRK_DONE || index>srcLength) { - index=srcLength; - } - - /* - * Segment [prev..index[ into 3 parts: - * a) skipped characters (copy as-is) [prev..titleStart[ - * b) first letter (titlecase) [titleStart..titleLimit[ - * c) subsequent characters (lowercase) [titleLimit..index[ - */ - if(prev=0) { - csc.cpStart=titleStart; - csc.cpLimit=titleLimit; - const UChar *s; - c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale); - if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) { - return; - } - } else { - // Malformed UTF-8. - if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart, - sink, options, edits, errorCode)) { - return; - } - } - - /* Special case Dutch IJ titlecasing */ - if (titleStart+1 < index && - caseLocale == UCASE_LOC_DUTCH && - (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { - if (src[titleStart+1] == 0x006A) { - ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits); - titleLimit++; - } else if (src[titleStart+1] == 0x004A) { - // Keep the capital J from getting lowercased. - if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1, - sink, options, edits, errorCode)) { - return; - } - titleLimit++; - } - } - - /* lowercase [titleLimit..index[ */ - if(titleLimit 0) { - uint32_t upper = data & UPPER_MASK; - // Add a dialytika to this iota or ypsilon vowel - // if we removed a tonos from the previous vowel, - // and that previous vowel did not also have (or gain) a dialytika. - // Adding one only to the final vowel in a longer sequence - // (which does not occur in normal writing) would require lookahead. - // Set the same flag as for preserving an existing dialytika. - if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && - (upper == 0x399 || upper == 0x3A5)) { - data |= HAS_DIALYTIKA; - } - int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. - if ((data & HAS_YPOGEGRAMMENI) != 0) { - numYpogegrammeni = 1; - } - // Skip combining diacritics after this Greek letter. - int32_t nextNextIndex = nextIndex; - while (nextIndex < srcLength) { - UChar32 c2; - U8_NEXT(src, nextNextIndex, srcLength, c2); - uint32_t diacriticData = getDiacriticData(c2); - if (diacriticData != 0) { - data |= diacriticData; - if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { - ++numYpogegrammeni; - } - nextIndex = nextNextIndex; - } else { - break; // not a Greek diacritic - } - } - if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { - nextState |= AFTER_VOWEL_WITH_ACCENT; - } - // Map according to Greek rules. - UBool addTonos = FALSE; - if (upper == 0x397 && - (data & HAS_ACCENT) != 0 && - numYpogegrammeni == 0 && - (state & AFTER_CASED) == 0 && - !isFollowedByCasedLetter(src, nextIndex, srcLength)) { - // Keep disjunctive "or" with (only) a tonos. - // We use the same "word boundary" conditions as for the Final_Sigma test. - if (i == nextIndex) { - upper = 0x389; // Preserve the precomposed form. - } else { - addTonos = TRUE; - } - } else if ((data & HAS_DIALYTIKA) != 0) { - // Preserve a vowel with dialytika in precomposed form if it exists. - if (upper == 0x399) { - upper = 0x3AA; - data &= ~HAS_EITHER_DIALYTIKA; - } else if (upper == 0x3A5) { - upper = 0x3AB; - data &= ~HAS_EITHER_DIALYTIKA; - } - } - - UBool change; - if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { - change = TRUE; // common, simple usage - } else { - // Find out first whether we are changing the text. - U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block - change = (i + 2) > nextIndex || - src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) || - numYpogegrammeni > 0; - int32_t i2 = i + 2; - if ((data & HAS_EITHER_DIALYTIKA) != 0) { - change |= (i2 + 2) > nextIndex || - src[i2] != (uint8_t)u8"\u0308"[0] || - src[i2 + 1] != (uint8_t)u8"\u0308"[1]; - i2 += 2; - } - if (addTonos) { - change |= (i2 + 2) > nextIndex || - src[i2] != (uint8_t)u8"\u0301"[0] || - src[i2 + 1] != (uint8_t)u8"\u0301"[1]; - i2 += 2; - } - int32_t oldLength = nextIndex - i; - int32_t newLength = (i2 - i) + numYpogegrammeni * 2; // 2 bytes per U+0399 - change |= oldLength != newLength; - if (change) { - if (edits != NULL) { - edits->addReplace(oldLength, newLength); - } - } else { - if (edits != NULL) { - edits->addUnchanged(oldLength); - } - // Write unchanged text? - change = (options & U_OMIT_UNCHANGED_TEXT) == 0; - } - } - - if (change) { - ByteSinkUtil::appendTwoBytes(upper, sink); - if ((data & HAS_EITHER_DIALYTIKA) != 0) { - sink.Append(u8"\u0308", 2); // restore or add a dialytika - } - if (addTonos) { - sink.Append(u8"\u0301", 2); - } - while (numYpogegrammeni > 0) { - sink.Append(u8"\u0399", 2); - --numYpogegrammeni; - } - } - } else if(c>=0) { - const UChar *s; - c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); - if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) { - return; - } - } else { - // Malformed UTF-8. - if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i, - sink, options, edits, errorCode)) { - return; - } - } - i = nextIndex; - state = nextState; - } -} - -} // namespace GreekUpper -U_NAMESPACE_END - -static void U_CALLCONV -ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED - const uint8_t *src, int32_t srcLength, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - toLower( - caseLocale, options, - src, &csc, 0, srcLength, - sink, edits, errorCode); -} - -static void U_CALLCONV -ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED - const uint8_t *src, int32_t srcLength, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - if (caseLocale == UCASE_LOC_GREEK) { - GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode); - } else { - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - toUpper( - caseLocale, options, - src, &csc, srcLength, - sink, edits, errorCode); - } -} - -static void U_CALLCONV -ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED - const uint8_t *src, int32_t srcLength, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - toLower( - -1, options, - src, nullptr, 0, srcLength, - sink, edits, errorCode); -} - -void -ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - const char *src, int32_t srcLength, - UTF8CaseMapper *stringCaseMapper, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode) { - /* check argument values */ - if (U_FAILURE(errorCode)) { - return; - } - if ((src == nullptr && srcLength != 0) || srcLength < -1) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - // Get the string length. - if (srcLength == -1) { - srcLength = (int32_t)uprv_strlen((const char *)src); - } - - if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { - edits->reset(); - } - stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR - (const uint8_t *)src, srcLength, sink, edits, errorCode); - sink.Flush(); - if (U_SUCCESS(errorCode)) { - if (edits != nullptr) { - edits->copyErrorTo(errorCode); - } - } -} - -int32_t -ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UTF8CaseMapper *stringCaseMapper, - icu::Edits *edits, - UErrorCode &errorCode) { - /* check argument values */ - if(U_FAILURE(errorCode)) { - return 0; - } - if( destCapacity<0 || - (dest==NULL && destCapacity>0) || - (src==NULL && srcLength!=0) || srcLength<-1 - ) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* get the string length */ - if(srcLength==-1) { - srcLength=(int32_t)uprv_strlen((const char *)src); - } - - /* check for overlapping source and destination */ - if( dest!=NULL && - ((src>=dest && src<(dest+destCapacity)) || - (dest>=src && dest<(src+srcLength))) - ) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - CheckedArrayByteSink sink(dest, destCapacity); - if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { - edits->reset(); - } - stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR - (const uint8_t *)src, srcLength, sink, edits, errorCode); - sink.Flush(); - if (U_SUCCESS(errorCode)) { - if (sink.Overflowed()) { - errorCode = U_BUFFER_OVERFLOW_ERROR; - } else if (edits != nullptr) { - edits->copyErrorTo(errorCode); - } - } - return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode); -} - -/* public API functions */ - -U_CAPI int32_t U_EXPORT2 -ucasemap_utf8ToLower(const UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return ucasemap_mapUTF8( - csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8ToLower, NULL, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucasemap_utf8ToUpper(const UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return ucasemap_mapUTF8( - csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8ToUpper, NULL, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucasemap_utf8FoldCase(const UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return ucasemap_mapUTF8( - UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8Fold, NULL, *pErrorCode); -} - -U_NAMESPACE_BEGIN - -void CaseMap::utf8ToLower( - const char *locale, uint32_t options, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode) { - ucasemap_mapUTF8( - ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL - src.data(), src.length(), - ucasemap_internalUTF8ToLower, sink, edits, errorCode); -} - -void CaseMap::utf8ToUpper( - const char *locale, uint32_t options, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode) { - ucasemap_mapUTF8( - ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL - src.data(), src.length(), - ucasemap_internalUTF8ToUpper, sink, edits, errorCode); -} - -void CaseMap::utf8Fold( - uint32_t options, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode) { - ucasemap_mapUTF8( - UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL - src.data(), src.length(), - ucasemap_internalUTF8Fold, sink, edits, errorCode); -} - -int32_t CaseMap::utf8ToLower( - const char *locale, uint32_t options, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - return ucasemap_mapUTF8( - ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8ToLower, edits, errorCode); -} - -int32_t CaseMap::utf8ToUpper( - const char *locale, uint32_t options, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - return ucasemap_mapUTF8( - ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8ToUpper, edits, errorCode); -} - -int32_t CaseMap::utf8Fold( - uint32_t options, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - return ucasemap_mapUTF8( - UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8Fold, edits, errorCode); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/ucasemap_imp.h b/deps/node/deps/icu-small/source/common/ucasemap_imp.h deleted file mode 100644 index 7788fd93..00000000 --- a/deps/node/deps/icu-small/source/common/ucasemap_imp.h +++ /dev/null @@ -1,282 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// ucasemap_imp.h -// created: 2017feb08 Markus W. Scherer - -#ifndef __UCASEMAP_IMP_H__ -#define __UCASEMAP_IMP_H__ - -#include "unicode/utypes.h" -#include "unicode/ucasemap.h" -#include "unicode/uchar.h" -#include "ucase.h" - -/** - * Bit mask for the titlecasing iterator options bit field. - * Currently only 3 out of 8 values are used: - * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. - * See stringoptions.h. - * @internal - */ -#define U_TITLECASE_ITERATOR_MASK 0xe0 - -/** - * Bit mask for the titlecasing index adjustment options bit set. - * Currently two bits are defined: - * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED. - * See stringoptions.h. - * @internal - */ -#define U_TITLECASE_ADJUSTMENT_MASK 0x600 - -/** - * Internal API, used by u_strcasecmp() etc. - * Compare strings case-insensitively, - * in code point order or code unit order. - */ -U_CFUNC int32_t -u_strcmpFold(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode); - -/** - * Internal API, used for detecting length of - * shared prefix case-insensitively. - * @param s1 input string 1 - * @param length1 length of string 1, or -1 (NULL terminated) - * @param s2 input string 2 - * @param length2 length of string 2, or -1 (NULL terminated) - * @param options compare options - * @param matchLen1 (output) length of partial prefix match in s1 - * @param matchLen2 (output) length of partial prefix match in s2 - * @param pErrorCode receives error status - */ -U_CAPI void -u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - int32_t *matchLen1, int32_t *matchLen2, - UErrorCode *pErrorCode); - -#ifdef __cplusplus - -U_NAMESPACE_BEGIN - -class BreakIterator; // unicode/brkiter.h -class ByteSink; -class Locale; // unicode/locid.h - -/** Returns TRUE if the options are valid. Otherwise FALSE, and sets an error. */ -inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return FALSE; } - if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) { - // Both options together. - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - return TRUE; -} - -inline UBool ustrcase_isLNS(UChar32 c) { - // Letter, number, symbol, - // or a private use code point because those are typically used as letters or numbers. - // Consider modifier letters only if they are cased. - const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK; - int gc = u_charType(c); - return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE); -} - -#if !UCONFIG_NO_BREAK_ITERATION - -/** Returns nullptr if error. Pass in either locale or locID, not both. */ -U_CFUNC -BreakIterator *ustrcase_getTitleBreakIterator( - const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter, - LocalPointer &ownedIter, UErrorCode &errorCode); - -#endif - -U_NAMESPACE_END - -#include "unicode/unistr.h" // for UStringCaseMapper - -/* - * Internal string casing functions implementing - * ustring.h/ustrcase.cpp and UnicodeString case mapping functions. - */ - -struct UCaseMap : public icu::UMemory { - /** Implements most of ucasemap_open(). */ - UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode); - ~UCaseMap(); - -#if !UCONFIG_NO_BREAK_ITERATION - icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */ -#endif - char locale[32]; - int32_t caseLocale; - uint32_t options; -}; - -#if UCONFIG_NO_BREAK_ITERATION -# define UCASEMAP_BREAK_ITERATOR_PARAM -# define UCASEMAP_BREAK_ITERATOR_UNUSED -# define UCASEMAP_BREAK_ITERATOR -# define UCASEMAP_BREAK_ITERATOR_NULL -#else -# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter, -# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *, -# define UCASEMAP_BREAK_ITERATOR iter, -# define UCASEMAP_BREAK_ITERATOR_NULL NULL, -#endif - -U_CFUNC int32_t -ustrcase_getCaseLocale(const char *locale); - -// TODO: swap src / dest if approved for new public api -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, - icu::BreakIterator *iter, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -#endif - -/** Implements UStringCaseMapper. */ -U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -/** - * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz(). - * Implements argument checking. - */ -U_CFUNC int32_t -ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UStringCaseMapper *stringCaseMapper, - icu::Edits *edits, - UErrorCode &errorCode); - -/** - * Common string case mapping implementation for old-fashioned u_strToXyz() functions - * that allow the source string to overlap the destination buffer. - * Implements argument checking and internally works with an intermediate buffer if necessary. - */ -U_CFUNC int32_t -ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UStringCaseMapper *stringCaseMapper, - UErrorCode &errorCode); - -/** - * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). - * UTF-8 version of UStringCaseMapper. - * All error checking must be done. - * The UCaseMap must be fully initialized, with locale and/or iter set as needed. - */ -typedef void U_CALLCONV -UTF8CaseMapper(int32_t caseLocale, uint32_t options, -#if !UCONFIG_NO_BREAK_ITERATION - icu::BreakIterator *iter, -#endif - const uint8_t *src, int32_t srcLength, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** Implements UTF8CaseMapper. */ -U_CFUNC void U_CALLCONV -ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options, - icu::BreakIterator *iter, - const uint8_t *src, int32_t srcLength, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode); - -#endif - -void -ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - const char *src, int32_t srcLength, - UTF8CaseMapper *stringCaseMapper, - icu::ByteSink &sink, icu::Edits *edits, - UErrorCode &errorCode); - -/** - * Implements argument checking and buffer handling - * for UTF-8 string case mapping as a common function. - */ -int32_t -ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UTF8CaseMapper *stringCaseMapper, - icu::Edits *edits, - UErrorCode &errorCode); - -U_NAMESPACE_BEGIN -namespace GreekUpper { - -// Data bits. -static const uint32_t UPPER_MASK = 0x3ff; -static const uint32_t HAS_VOWEL = 0x1000; -static const uint32_t HAS_YPOGEGRAMMENI = 0x2000; -static const uint32_t HAS_ACCENT = 0x4000; -static const uint32_t HAS_DIALYTIKA = 0x8000; -// Further bits during data building and processing, not stored in the data map. -static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000; -static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000; - -static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; -static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = - HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; -static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; - -// State bits. -static const uint32_t AFTER_CASED = 1; -static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2; - -uint32_t getLetterData(UChar32 c); - -/** - * Returns a non-zero value for each of the Greek combining diacritics - * listed in The Unicode Standard, version 8, chapter 7.2 Greek, - * plus some perispomeni look-alikes. - */ -uint32_t getDiacriticData(UChar32 c); - -} // namespace GreekUpper -U_NAMESPACE_END - -#endif // __cplusplus - -#endif // __UCASEMAP_IMP_H__ diff --git a/deps/node/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp b/deps/node/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp deleted file mode 100644 index c21dfb76..00000000 --- a/deps/node/deps/icu-small/source/common/ucasemap_titlecase_brkiter.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ucasemap_titlecase_brkiter.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011jun02 -* created by: Markus W. Scherer -* -* Titlecasing functions that are based on BreakIterator -* were moved here to break dependency cycles among parts of the common library. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/brkiter.h" -#include "unicode/ubrk.h" -#include "unicode/casemap.h" -#include "unicode/ucasemap.h" -#include "cmemory.h" -#include "ucase.h" -#include "ucasemap_imp.h" - -U_NAMESPACE_BEGIN - -void CaseMap::utf8ToTitle( - const char *locale, uint32_t options, BreakIterator *iter, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return; - } - UText utext = UTEXT_INITIALIZER; - utext_openUTF8(&utext, src.data(), src.length(), &errorCode); - LocalPointer ownedIter; - iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); - if (iter == nullptr) { - utext_close(&utext); - return; - } - iter->setText(&utext, errorCode); - ucasemap_mapUTF8( - ustrcase_getCaseLocale(locale), options, iter, - src.data(), src.length(), - ucasemap_internalUTF8ToTitle, sink, edits, errorCode); - utext_close(&utext); -} - -int32_t CaseMap::utf8ToTitle( - const char *locale, uint32_t options, BreakIterator *iter, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return 0; - } - UText utext=UTEXT_INITIALIZER; - utext_openUTF8(&utext, src, srcLength, &errorCode); - LocalPointer ownedIter; - iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); - if(iter==NULL) { - utext_close(&utext); - return 0; - } - iter->setText(&utext, errorCode); - int32_t length=ucasemap_mapUTF8( - ustrcase_getCaseLocale(locale), options, iter, - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8ToTitle, edits, errorCode); - utext_close(&utext); - return length; -} - -U_NAMESPACE_END - -U_NAMESPACE_USE - -U_CAPI const UBreakIterator * U_EXPORT2 -ucasemap_getBreakIterator(const UCaseMap *csm) { - return reinterpret_cast(csm->iter); -} - -U_CAPI void U_EXPORT2 -ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - delete csm->iter; - csm->iter=reinterpret_cast(iterToAdopt); -} - -U_CAPI int32_t U_EXPORT2 -ucasemap_utf8ToTitle(UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - UText utext=UTEXT_INITIALIZER; - utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if(csm->iter==NULL) { - LocalPointer ownedIter; - BreakIterator *iter = ustrcase_getTitleBreakIterator( - nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode); - if (iter == nullptr) { - utext_close(&utext); - return 0; - } - csm->iter = ownedIter.orphan(); - } - csm->iter->setText(&utext, *pErrorCode); - int32_t length=ucasemap_mapUTF8( - csm->caseLocale, csm->options, csm->iter, - dest, destCapacity, - src, srcLength, - ucasemap_internalUTF8ToTitle, NULL, *pErrorCode); - utext_close(&utext); - return length; -} - -#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/deps/node/deps/icu-small/source/common/ucat.cpp b/deps/node/deps/icu-small/source/common/ucat.cpp deleted file mode 100644 index dac56eeb..00000000 --- a/deps/node/deps/icu-small/source/common/ucat.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2003, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: March 19 2003 -* Since: ICU 2.6 -********************************************************************** -*/ -#include "unicode/ucat.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "uassert.h" - -/* Separator between set_num and msg_num */ -static const char SEPARATOR = '%'; - -/* Maximum length of a set_num/msg_num key, incl. terminating zero. - * Longest possible key is "-2147483648%-2147483648" */ -#define MAX_KEY_LEN (24) - -/** - * Fill in buffer with a set_num/msg_num key string, given the numeric - * values. Numeric values must be >= 0. Buffer must be of length - * MAX_KEY_LEN or more. - */ -static char* -_catkey(char* buffer, int32_t set_num, int32_t msg_num) { - int32_t i = 0; - i = T_CString_integerToString(buffer, set_num, 10); - buffer[i++] = SEPARATOR; - T_CString_integerToString(buffer+i, msg_num, 10); - return buffer; -} - -U_CAPI u_nl_catd U_EXPORT2 -u_catopen(const char* name, const char* locale, UErrorCode* ec) { - return (u_nl_catd) ures_open(name, locale, ec); -} - -U_CAPI void U_EXPORT2 -u_catclose(u_nl_catd catd) { - ures_close((UResourceBundle*) catd); /* may be NULL */ -} - -U_CAPI const UChar* U_EXPORT2 -u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num, - const UChar* s, - int32_t* len, UErrorCode* ec) { - - char key[MAX_KEY_LEN]; - const UChar* result; - - if (ec == NULL || U_FAILURE(*ec)) { - goto ERROR; - } - - result = ures_getStringByKey((const UResourceBundle*) catd, - _catkey(key, set_num, msg_num), - len, ec); - if (U_FAILURE(*ec)) { - goto ERROR; - } - - return result; - - ERROR: - /* In case of any failure, return s */ - if (len != NULL) { - *len = u_strlen(s); - } - return s; -} - -/*eof*/ diff --git a/deps/node/deps/icu-small/source/common/uchar.cpp b/deps/node/deps/icu-small/source/common/uchar.cpp deleted file mode 100644 index 996c3fdc..00000000 --- a/deps/node/deps/icu-small/source/common/uchar.cpp +++ /dev/null @@ -1,725 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 1996-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -* -* File UCHAR.C -* -* Modification History: -* -* Date Name Description -* 04/02/97 aliu Creation. -* 4/15/99 Madhu Updated all the function definitions for C Implementation -* 5/20/99 Madhu Added the function u_getVersion() -* 8/19/1999 srl Upgraded scripts to Unicode3.0 -* 11/11/1999 weiv added u_isalnum(), cleaned comments -* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. -* 06/20/2000 helena OS/400 port changes; mostly typecast. -****************************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/uscript.h" -#include "unicode/udata.h" -#include "uassert.h" -#include "cmemory.h" -#include "ucln_cmn.h" -#include "utrie2.h" -#include "udataswp.h" -#include "uprops.h" -#include "ustr_imp.h" - -/* uchar_props_data.h is machine-generated by genprops --csource */ -#define INCLUDED_FROM_UCHAR_C -#include "uchar_props_data.h" - -/* constants and macros for access to the data ------------------------------ */ - -/* getting a uint32_t properties word from the data */ -#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)); - -/* API functions ------------------------------------------------------------ */ - -/* Gets the Unicode character's general category.*/ -U_CAPI int8_t U_EXPORT2 -u_charType(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (int8_t)GET_CATEGORY(props); -} - -/* Enumerate all code points with their general categories. */ -struct _EnumTypeCallback { - UCharEnumTypeRange *enumRange; - const void *context; -}; - -static uint32_t U_CALLCONV -_enumTypeValue(const void *context, uint32_t value) { - (void)context; - return GET_CATEGORY(value); -} - -static UBool U_CALLCONV -_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - /* just cast the value to UCharCategory */ - return ((struct _EnumTypeCallback *)context)-> - enumRange(((struct _EnumTypeCallback *)context)->context, - start, end+1, (UCharCategory)value); -} - -U_CAPI void U_EXPORT2 -u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { - struct _EnumTypeCallback callback; - - if(enumRange==NULL) { - return; - } - - callback.enumRange=enumRange; - callback.context=context; - utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); -} - -/* Checks if ch is a lower case letter.*/ -U_CAPI UBool U_EXPORT2 -u_islower(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); -} - -/* Checks if ch is an upper case letter.*/ -U_CAPI UBool U_EXPORT2 -u_isupper(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); -} - -/* Checks if ch is a title case letter; usually upper case letters.*/ -U_CAPI UBool U_EXPORT2 -u_istitle(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); -} - -/* Checks if ch is a decimal digit. */ -U_CAPI UBool U_EXPORT2 -u_isdigit(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); -} - -U_CAPI UBool U_EXPORT2 -u_isxdigit(UChar32 c) { - uint32_t props; - - /* check ASCII and Fullwidth ASCII a-fA-F */ - if( - (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || - (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) - ) { - return TRUE; - } - - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); -} - -/* Checks if the Unicode character is a letter.*/ -U_CAPI UBool U_EXPORT2 -u_isalpha(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); -} - -U_CAPI UBool U_EXPORT2 -u_isUAlphabetic(UChar32 c) { - return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; -} - -/* Checks if c is a letter or a decimal digit */ -U_CAPI UBool U_EXPORT2 -u_isalnum(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); -} - -/** - * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. - * @internal - */ -U_CFUNC UBool -u_isalnumPOSIX(UChar32 c) { - return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); -} - -/* Checks if ch is a unicode character with assigned character type.*/ -U_CAPI UBool U_EXPORT2 -u_isdefined(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)!=0); -} - -/* Checks if the Unicode character is a base form character that can take a diacritic.*/ -U_CAPI UBool U_EXPORT2 -u_isbase(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); -} - -/* Checks if the Unicode character is a control character.*/ -U_CAPI UBool U_EXPORT2 -u_iscntrl(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); -} - -U_CAPI UBool U_EXPORT2 -u_isISOControl(UChar32 c) { - return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); -} - -/* Some control characters that are used as space. */ -#define IS_THAT_CONTROL_SPACE(c) \ - (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) - -/* Java has decided that U+0085 New Line is not whitespace any more. */ -#define IS_THAT_ASCII_CONTROL_SPACE(c) \ - (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) - -/* Checks if the Unicode character is a space character.*/ -U_CAPI UBool U_EXPORT2 -u_isspace(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); -} - -U_CAPI UBool U_EXPORT2 -u_isJavaSpaceChar(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); -} - -/* Checks if the Unicode character is a whitespace character.*/ -U_CAPI UBool U_EXPORT2 -u_isWhitespace(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)( - ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && - c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ - IS_THAT_ASCII_CONTROL_SPACE(c) - ); -} - -U_CAPI UBool U_EXPORT2 -u_isblank(UChar32 c) { - if((uint32_t)c<=0x9f) { - return c==9 || c==0x20; /* TAB or SPACE */ - } else { - /* Zs */ - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); - } -} - -U_CAPI UBool U_EXPORT2 -u_isUWhiteSpace(UChar32 c) { - return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; -} - -/* Checks if the Unicode character is printable.*/ -U_CAPI UBool U_EXPORT2 -u_isprint(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* comparing ==0 returns FALSE for the categories mentioned */ - return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); -} - -/** - * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. - * Implements UCHAR_POSIX_PRINT. - * @internal - */ -U_CFUNC UBool -u_isprintPOSIX(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* - * The only cntrl character in graph+blank is TAB (in blank). - * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). - */ - return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); -} - -U_CAPI UBool U_EXPORT2 -u_isgraph(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* comparing ==0 returns FALSE for the categories mentioned */ - return (UBool)((CAT_MASK(props)& - (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) - ==0); -} - -/** - * Checks if c is in - * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] - * with space=\p{Whitespace} and Control=Cc. - * Implements UCHAR_POSIX_GRAPH. - * @internal - */ -U_CFUNC UBool -u_isgraphPOSIX(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ - /* comparing ==0 returns FALSE for the categories mentioned */ - return (UBool)((CAT_MASK(props)& - (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) - ==0); -} - -U_CAPI UBool U_EXPORT2 -u_ispunct(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); -} - -/* Checks if the Unicode character can start a Unicode identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isIDStart(UChar32 c) { - /* same as u_isalpha() */ - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); -} - -/* Checks if the Unicode character can be a Unicode identifier part other than starting the - identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isIDPart(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)( - (CAT_MASK(props)& - (U_GC_ND_MASK|U_GC_NL_MASK| - U_GC_L_MASK| - U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) - )!=0 || - u_isIDIgnorable(c)); -} - -/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isIDIgnorable(UChar32 c) { - if(c<=0x9f) { - return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); - } else { - uint32_t props; - GET_PROPS(c, props); - return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); - } -} - -/*Checks if the Unicode character can start a Java identifier.*/ -U_CAPI UBool U_EXPORT2 -u_isJavaIDStart(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); -} - -/*Checks if the Unicode character can be a Java identifier part other than starting the - * identifier. - */ -U_CAPI UBool U_EXPORT2 -u_isJavaIDPart(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return (UBool)( - (CAT_MASK(props)& - (U_GC_ND_MASK|U_GC_NL_MASK| - U_GC_L_MASK| - U_GC_SC_MASK|U_GC_PC_MASK| - U_GC_MC_MASK|U_GC_MN_MASK) - )!=0 || - u_isIDIgnorable(c)); -} - -U_CAPI int32_t U_EXPORT2 -u_charDigitValue(UChar32 c) { - uint32_t props; - int32_t value; - GET_PROPS(c, props); - value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; - if(value<=9) { - return value; - } else { - return -1; - } -} - -U_CAPI double U_EXPORT2 -u_getNumericValue(UChar32 c) { - uint32_t props; - int32_t ntv; - GET_PROPS(c, props); - ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); - - if(ntv==UPROPS_NTV_NONE) { - return U_NO_NUMERIC_VALUE; - } else if(ntv>4)-12; - int32_t denominator=(ntv&0xf)+1; - return (double)numerator/denominator; - } else if(ntv>5)-14; - int32_t exp=(ntv&0x1f)+2; - numValue=mant; - - /* multiply by 10^exp without math.h */ - while(exp>=4) { - numValue*=10000.; - exp-=4; - } - switch(exp) { - case 3: - numValue*=1000.; - break; - case 2: - numValue*=100.; - break; - case 1: - numValue*=10.; - break; - case 0: - default: - break; - } - - return numValue; - } else if(ntv>2)-0xbf; - int32_t exp=(ntv&3)+1; - - switch(exp) { - case 4: - numValue*=60*60*60*60; - break; - case 3: - numValue*=60*60*60; - break; - case 2: - numValue*=60*60; - break; - case 1: - numValue*=60; - break; - case 0: - default: - break; - } - - return numValue; - } else if(ntv>2); - return (double)numerator/denominator; - } else { - /* reserved */ - return U_NO_NUMERIC_VALUE; - } -} - -U_CAPI int32_t U_EXPORT2 -u_digit(UChar32 ch, int8_t radix) { - int8_t value; - if((uint8_t)(radix-2)<=(36-2)) { - value=(int8_t)u_charDigitValue(ch); - if(value<0) { - /* ch is not a decimal digit, try latin letters */ - if(ch>=0x61 && ch<=0x7A) { - value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ - } else if(ch>=0x41 && ch<=0x5A) { - value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ - } else if(ch>=0xFF41 && ch<=0xFF5A) { - value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ - } else if(ch>=0xFF21 && ch<=0xFF3A) { - value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ - } - } - } else { - value=-1; /* invalid radix */ - } - return (int8_t)((value(36-2) || (uint32_t)digit>=(uint32_t)radix) { - return 0; - } else if(digit<10) { - return (UChar32)(0x30+digit); - } else { - return (UChar32)((0x61-10)+digit); - } -} - -/* miscellaneous, and support for uprops.cpp -------------------------------- */ - -U_CAPI void U_EXPORT2 -u_getUnicodeVersion(UVersionInfo versionArray) { - if(versionArray!=NULL) { - uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); - } -} - -U_CFUNC uint32_t -u_getMainProperties(UChar32 c) { - uint32_t props; - GET_PROPS(c, props); - return props; -} - -U_CFUNC uint32_t -u_getUnicodeProperties(UChar32 c, int32_t column) { - U_ASSERT(column>=0); - if(column>=propsVectorsColumns) { - return 0; - } else { - uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); - return propsVectors[vecIndex+column]; - } -} - -U_CFUNC int32_t -uprv_getMaxValues(int32_t column) { - switch(column) { - case 0: - return indexes[UPROPS_MAX_VALUES_INDEX]; - case 2: - return indexes[UPROPS_MAX_VALUES_2_INDEX]; - default: - return 0; - } -} - -U_CAPI void U_EXPORT2 -u_charAge(UChar32 c, UVersionInfo versionArray) { - if(versionArray!=NULL) { - uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; - versionArray[0]=(uint8_t)(version>>4); - versionArray[1]=(uint8_t)(version&0xf); - versionArray[2]=versionArray[3]=0; - } -} - -U_CAPI UScriptCode U_EXPORT2 -uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { - uint32_t scriptX; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return USCRIPT_INVALID_CODE; - } - if((uint32_t)c>0x10ffff) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return USCRIPT_INVALID_CODE; - } - scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; - if(scriptX=UPROPS_SCRIPT_X_WITH_OTHER) { - scx=scriptExtensions+scx[1]; - } - if(sc>=USCRIPT_CODE_LIMIT) { - /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ - return FALSE; - } - while(sc>*scx) { - ++scx; - } - return sc==(*scx&0x7fff); -} - -U_CAPI int32_t U_EXPORT2 -uscript_getScriptExtensions(UChar32 c, - UScriptCode *scripts, int32_t capacity, - UErrorCode *pErrorCode) { - uint32_t scriptX; - int32_t length; - const uint16_t *scx; - uint16_t sx; - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(capacity<0 || (capacity>0 && scripts==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; - if(scriptX=UPROPS_SCRIPT_X_WITH_OTHER) { - scx=scriptExtensions+scx[1]; - } - length=0; - do { - sx=*scx++; - if(lengthcapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return length; -} - -U_CAPI UBlockCode U_EXPORT2 -ublock_getCode(UChar32 c) { - return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); -} - -/* property starts for UnicodeSet ------------------------------------------- */ - -static UBool U_CALLCONV -_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - /* add the start code point to the USet */ - const USetAdder *sa=(const USetAdder *)context; - sa->add(sa->set, start); - (void)end; - (void)value; - return TRUE; -} - -#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) - -U_CFUNC void U_EXPORT2 -uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* add the start code point of each same-value range of the main trie */ - utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); - - /* add code points with hardcoded properties, plus the ones following them */ - - /* add for u_isblank() */ - USET_ADD_CP_AND_NEXT(sa, TAB); - - /* add for IS_THAT_CONTROL_SPACE() */ - sa->add(sa->set, CR+1); /* range TAB..CR */ - sa->add(sa->set, 0x1c); - sa->add(sa->set, 0x1f+1); - USET_ADD_CP_AND_NEXT(sa, NL); - - /* add for u_isIDIgnorable() what was not added above */ - sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ - sa->add(sa->set, HAIRSP); - sa->add(sa->set, RLM+1); - sa->add(sa->set, INHSWAP); - sa->add(sa->set, NOMDIG+1); - USET_ADD_CP_AND_NEXT(sa, ZWNBSP); - - /* add no-break spaces for u_isWhitespace() what was not added above */ - USET_ADD_CP_AND_NEXT(sa, NBSP); - USET_ADD_CP_AND_NEXT(sa, FIGURESP); - USET_ADD_CP_AND_NEXT(sa, NNBSP); - - /* add for u_digit() */ - sa->add(sa->set, U_a); - sa->add(sa->set, U_z+1); - sa->add(sa->set, U_A); - sa->add(sa->set, U_Z+1); - sa->add(sa->set, U_FW_a); - sa->add(sa->set, U_FW_z+1); - sa->add(sa->set, U_FW_A); - sa->add(sa->set, U_FW_Z+1); - - /* add for u_isxdigit() */ - sa->add(sa->set, U_f+1); - sa->add(sa->set, U_F+1); - sa->add(sa->set, U_FW_f+1); - sa->add(sa->set, U_FW_F+1); - - /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ - sa->add(sa->set, WJ); /* range WJ..NOMDIG */ - sa->add(sa->set, 0xfff0); - sa->add(sa->set, 0xfffb+1); - sa->add(sa->set, 0xe0000); - sa->add(sa->set, 0xe0fff+1); - - /* add for UCHAR_GRAPHEME_BASE and others */ - USET_ADD_CP_AND_NEXT(sa, CGJ); -} - -U_CFUNC void U_EXPORT2 -upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - - /* add the start code point of each same-value range of the properties vectors trie */ - utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); -} diff --git a/deps/node/deps/icu-small/source/common/uchar_props_data.h b/deps/node/deps/icu-small/source/common/uchar_props_data.h deleted file mode 100644 index e9c23190..00000000 --- a/deps/node/deps/icu-small/source/common/uchar_props_data.h +++ /dev/null @@ -1,3707 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// Copyright (C) 1999-2016, International Business Machines -// Corporation and others. All Rights Reserved. -// -// file name: uchar_props_data.h -// -// machine-generated by: icu/tools/unicode/c/genprops/corepropsbuilder.cpp - - -#ifdef INCLUDED_FROM_UCHAR_C - -static const UVersionInfo dataVersion={0xb,0,0,0}; - -static const uint16_t propsTrie_index[21452]={ -0x45d,0x465,0x46d,0x475,0x48d,0x495,0x49d,0x4a5,0x4ad,0x4b5,0x4bb,0x4c3,0x4cb,0x4d3,0x4db,0x4e3, -0x4e9,0x4f1,0x4f9,0x501,0x504,0x50c,0x514,0x51c,0x524,0x52c,0x528,0x530,0x538,0x540,0x545,0x54d, -0x555,0x55d,0x561,0x569,0x571,0x579,0x581,0x589,0x585,0x58d,0x592,0x59a,0x5a0,0x5a8,0x5b0,0x5b8, -0x5c0,0x5c8,0x5d0,0x5d8,0x5dd,0x5e5,0x5e8,0x5f0,0x5f8,0x600,0x606,0x60e,0x60d,0x615,0x61d,0x625, -0x635,0x62d,0x63d,0x645,0x47d,0x655,0x65d,0x64d,0x66d,0x66f,0x677,0x665,0x687,0x68d,0x695,0x67f, -0x6a5,0x6ab,0x6b3,0x69d,0x6c3,0x6c9,0x6d1,0x6bb,0x6e1,0x6e7,0x6ef,0x6d9,0x6ff,0x707,0x70f,0x6f7, -0x71f,0x725,0x72d,0x717,0x73d,0x743,0x74b,0x735,0x75b,0x760,0x768,0x753,0x778,0x77f,0x787,0x770, -0x609,0x78f,0x797,0x47d,0x79f,0x7a7,0x7af,0x47d,0x7b7,0x7bf,0x7c7,0x7cc,0x7d4,0x7db,0x7e3,0x47d, -0x5c8,0x7eb,0x7f3,0x7fb,0x803,0x555,0x813,0x80b,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x81b,0x5c8,0x823,0x827,0x82f,0x5c8,0x835,0x5c8,0x83b,0x843,0x84b,0x555,0x555,0x853, -0x85b,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x860,0x868,0x5c8,0x5c8,0x870,0x878,0x880,0x888,0x890,0x5c8,0x898,0x8a0,0x8a8, -0x8b8,0x5c8,0x8c0,0x8c2,0x8ca,0x8b0,0x5c8,0x8cd,0x8e1,0x8d5,0x8dd,0x8e9,0x5c8,0x8f1,0x8f7,0x8ff, -0x907,0x5c8,0x917,0x91f,0x927,0x90f,0x47d,0x47d,0x937,0x93a,0x942,0x92f,0x952,0x94a,0x5c8,0x959, -0x5c8,0x968,0x961,0x970,0x978,0x97c,0x984,0x98c,0x4fd,0x994,0x997,0x99d,0x9a4,0x997,0x524,0x9ac, -0x4ad,0x4ad,0x4ad,0x4ad,0x9b4,0x4ad,0x4ad,0x4ad,0x9c4,0x9cc,0x9d4,0x9dc,0x9e4,0x9e8,0x9f0,0x9bc, -0xa08,0xa10,0x9f8,0xa00,0xa18,0xa20,0xa28,0xa30,0xa48,0xa38,0xa40,0xa50,0xa58,0xa67,0xa6c,0xa5f, -0xa74,0xa74,0xa74,0xa74,0xa74,0xa74,0xa74,0xa74,0xa7c,0xa84,0x8ff,0xa87,0xa8f,0xa96,0xa9b,0xaa3, -0x8ff,0xaaa,0xaa9,0xaba,0xabd,0x8ff,0x8ff,0xab2,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0xacc,0xad4,0xac4, -0x8ff,0x8ff,0x8ff,0xad9,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0xadf,0xae7,0x8ff,0xaef,0xaf6, -0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0xa74,0xa74,0xa74,0xa74,0xafe,0xa74,0xb05,0xb0c, -0xa74,0xa74,0xa74,0xa74,0xa74,0xa74,0xa74,0xa74,0x8ff,0xb14,0xb1b,0xb1f,0xb25,0x8ff,0xb2b,0xaa4, -0x555,0xb3b,0xb33,0xb43,0x4ad,0x4ad,0x4ad,0xb4b,0x4fd,0xb53,0x5c8,0xb59,0xb69,0xb61,0xb61,0x524, -0xb71,0xb79,0xb81,0x47d,0xb89,0x8ff,0x8ff,0xb90,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0xb98,0xb9e, -0xbae,0xba6,0x609,0x5c8,0xbb6,0x85b,0x5c8,0xbbe,0xbc6,0xbca,0x5c8,0x5c8,0xbcf,0xbd7,0x8ff,0xbdf, -0xaa4,0xbe7,0xbed,0x8ff,0xbe7,0xbf5,0x8ff,0xaa4,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff, -0xbfd,0x5c8,0x5c8,0x5c8,0xc05,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0xc0b,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc10,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x8cd,0x8ff,0x8ff, -0xc18,0x5c8,0xc1b,0x5c8,0xc23,0xc29,0xc31,0xc39,0xc3e,0x5c8,0x5c8,0xc42,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc49,0x5c8,0xc50,0xc56,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc5e,0x5c8,0x5c8,0x5c8,0xc66,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc68,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc6f,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0xc76,0x5c8,0x5c8,0x5c8,0xc7d,0xc85,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc8a,0x5c8,0x5c8,0xc92,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc96,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc99,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xc9c,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0xca2,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0xcaa,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0xcaf,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xcb4,0x5c8,0x5c8,0x5c8,0xcb9,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0xcc1,0xcc8,0xccc,0x5c8,0x5c8,0x5c8,0xcd3,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xcd9, -0xce9,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0xce1,0x8ff,0xcf1,0x970,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0xcf6,0xcfe,0x4ad,0xd0e,0xd06,0x5c8,0x5c8,0xd16,0xd1e,0xd2e,0x4ad,0xd33,0xd3b,0xd41,0x47d,0xd26, -0xd49,0xd51,0x5c8,0xd59,0xd69,0xd6c,0xd61,0xd74,0x61d,0xd7c,0xd83,0x8c1,0x66d,0xd93,0xd8b,0xd9b, -0x5c8,0xda3,0xdab,0xdb3,0x5c8,0xdbb,0xdc3,0xdcb,0xdd3,0xddb,0xddf,0xde7,0x4fd,0x4fd,0x5c8,0xdef, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xdf7,0xe03,0xdfb, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b, -0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0x5c8,0x5c8,0x5c8,0xe1b,0x5c8,0xcd4,0xe22,0xe27, -0x5c8,0x5c8,0x5c8,0xe2f,0x5c8,0x5c8,0x8cc,0x47d,0xe45,0xe35,0xe3d,0x5c8,0x5c8,0xe4d,0xe55,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xe5a,0xe62,0x5c8,0xe66,0x5c8,0xe6c,0xe70, -0xe78,0xe80,0xe87,0xe8f,0x5c8,0x5c8,0x5c8,0xe95,0xead,0x46d,0xeb5,0xebd,0xec2,0x8e1,0xe9d,0xea5, -0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b, -0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b,0xe0b, -0x11f4,0x11f4,0x1234,0x1274,0x12b4,0x12ec,0x132c,0x136c,0x13a4,0x13e4,0x1410,0x1450,0x1490,0x14a0,0x14e0,0x1514, -0x1554,0x1584,0x15c4,0x1604,0x1614,0x1648,0x1680,0x16c0,0x1700,0x1740,0x1774,0x17a0,0x17e0,0x1818,0x1834,0x1874, -0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xbc0,0xa40,0xbe2,0xa40,0xa40,0xa40,0xa40,0xc22,0x1db,0x1db, -0xc62,0xca2,0xa40,0xa40,0xa40,0xa40,0xce2,0xd02,0xa40,0xa40,0xd42,0xd82,0xdc2,0xe02,0xe42,0xe82, -0xec2,0xef9,0x1db,0x1db,0xf1d,0xf51,0x1db,0xf79,0x1db,0x1db,0x1db,0x1db,0xfa6,0x1db,0x1db,0x1db, -0x1db,0x1db,0x1db,0x1db,0xfba,0x1db,0xff2,0x1032,0x1db,0x103d,0x1db,0x1db,0x1db,0x1073,0xa40,0x10b3, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0x10f3,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700, -0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1133, -0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700, -0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1133, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0xeca,0xed1,0xed9,0x47d,0x5c8,0x5c8,0x5c8,0xbd7,0xee9,0xee1,0xf00,0xef1,0xef8,0xf08,0xb85,0xf10, -0x47d,0x47d,0x47d,0x47d,0x8c1,0x5c8,0xf18,0xf20,0x5c8,0xf28,0xf30,0xf34,0xf3c,0x5c8,0xf44,0x47d, -0x555,0x55f,0xf4c,0x5c8,0xf50,0xf58,0xf68,0xf60,0x5c8,0xf70,0x5c8,0xf77,0x47d,0x47d,0x47d,0x47d, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xb69,0x8cd,0xe6c,0x47d,0x47d,0x47d,0x47d, -0xf87,0xf7f,0xf8a,0xf92,0x8e1,0xf9a,0x47d,0xfa2,0xfaa,0xfb2,0x47d,0x47d,0x5c8,0xfc2,0xfca,0xfba, -0xfda,0xfe1,0xfd2,0xfe9,0xff1,0x47d,0x1001,0xff9,0x5c8,0x1004,0x100c,0x1014,0x101c,0x1024,0x47d,0x47d, -0x5c8,0x5c8,0x102c,0x47d,0x555,0x1034,0x4fd,0x103c,0x5c8,0x1044,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x104c,0x47d,0x47d,0x47d,0x47d,0x1054,0x105c,0x1063,0x47d,0x47d,0x47d,0x47d,0x47d, -0x1073,0x5fe,0x107b,0x106b,0x952,0x1083,0x108b,0x1091,0x10a9,0x1099,0x10a1,0x10ad,0x952,0x10bd,0x10b5,0x10c5, -0x10d5,0x10cd,0x47d,0x47d,0x10dc,0x10e4,0x620,0x10ec,0x10fc,0x1102,0x110a,0x10f4,0x47d,0x47d,0x47d,0x47d, -0x5c8,0x1112,0x111a,0x47d,0x5c8,0x1122,0x112a,0x47d,0x47d,0x47d,0x47d,0x47d,0x5c8,0x1132,0x113a,0x47d, -0x5c8,0x1142,0x114a,0x1152,0x5c8,0x1162,0x115a,0x47d,0x83b,0x116a,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x5c8,0x1172,0x47d,0x47d,0x47d,0x555,0x4fd,0x117a,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x1192,0x1182,0x118a,0x5c8,0x11a2,0x119a,0x5c8,0x8c2,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x11b8,0x11bd,0x11aa,0x11b2,0x11cd,0x11c5,0x47d,0x47d,0x11dc,0x11e0,0x11d4,0x11f0,0x11e8,0x115a,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x11f4,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x8cc,0x47d,0x47d,0x47d, -0x1204,0x120c,0x1214,0x11fc,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x121c,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x1224,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x1226,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x8c2,0x8e1,0x122e,0x47d,0x47d,0xe62,0x1236,0x5c8,0x123e,0x1246,0x124e,0xcd9,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x555,0x4fd,0x1256,0x47d,0x47d,0x47d,0x5c8,0x5c8,0x125e,0x1263,0x126b,0x47d, -0x47d,0x1273,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x127b,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x1283,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x8e1,0x47d,0x47d,0xe62,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xdfb,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x5c8,0x5c8,0x5c8,0x128b,0x1290,0x1298,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0xb98,0x8ff,0x12a0,0x8ff,0x12a7,0x12af,0x12b5, -0x8ff,0x12bb,0x8ff,0x8ff,0x12c3,0x47d,0x47d,0x47d,0x47d,0x12cb,0x8ff,0x8ff,0xaa6,0x12d3,0x47d,0x47d, -0x47d,0x47d,0x12e3,0x12ea,0x12ef,0x12f5,0x12fd,0x1305,0x130d,0x12e7,0x1315,0x131d,0x1325,0x132a,0x12fc,0x12e3, -0x12ea,0x12e6,0x12f5,0x1332,0x12e4,0x1335,0x12e7,0x133d,0x1345,0x134d,0x1354,0x1340,0x1348,0x1350,0x1357,0x1343, -0x135f,0x12db,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff, -0x8ff,0x8ff,0x524,0x136f,0x524,0x1376,0x137d,0x1367,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x1384,0x138c,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x1394,0x47d,0x555,0x13a4,0x139c,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x13ac,0x13bc,0x13b4,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x13cc,0x13d4,0x13dc,0x13e4,0x13ec,0x13f4,0x47d,0x13c4,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x8ff,0x13fc,0x8ff,0x8ff,0xb90,0x1401,0x1405,0xb98,0x140d,0x8ff,0x8ff,0x13fc,0x8ff,0x12ba, -0x47d,0x1415,0x141d,0x1421,0x1429,0x1431,0x47d,0x47d,0x47d,0x47d,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff, -0x8ff,0x1439,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff, -0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x8ff,0x1441,0x1449,0x8ff,0x8ff,0x8ff,0xb90,0x8ff,0x8ff, -0x1451,0x47d,0x13fc,0x8ff,0x1459,0x8ff,0x1461,0xb9a,0x47d,0x47d,0x13fc,0xaa4,0x8ff,0x1465,0x8ff,0x146d, -0x141d,0x8ff,0x47d,0x47d,0x47d,0xb9a,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x1475,0x5c8,0x5c8,0x147c,0x5c8,0x5c8,0x5c8,0x1484,0x5c8,0x148c,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0xc7a,0x5c8,0x5c8,0x1494,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x149c,0x14a4,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0xcb9,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x14ab,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x14b2,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x14b9,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0xb69,0x47d,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x14bd,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0xf50,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x127f,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x14c2,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x5c8,0x5c8,0x5c8,0x5c8,0x14ca,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8,0x5c8, -0x5c8,0x5c8,0x5c8,0xf50,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x14da,0x14d2,0x14d2,0x14d2,0x47d,0x47d,0x47d,0x47d,0x524,0x524,0x524,0x524,0x524, -0x524,0x524,0x14e2,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d,0x47d, -0x47d,0x47d,0x47d,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13,0xe13, -0xe13,0xe13,0x14ea,0x45c,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xc,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18, -0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17, -0x18,0x18,0x18,0x17,0x17,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x14, -0x17,0x15,0x1a,0x16,0x1a,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x14, -0x18,0x15,0x18,0xf,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xc,0x17,0x19,0x19,0x19,0x19,0x1b,0x17,0x1a,0x1b,5,0x1c, -0x18,0x10,0x1b,0x1a,0x1b,0x18,0x34b,0x38b,0x1a,2,0x17,0x17,0x1a,0x30b,5,0x1d, -0x34cb,0x344b,0x3ccb,0x17,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0x18,1,1,1,1, -1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,0x18,2,2,2,2, -2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,2,1,2,1, -2,1,2,1,2,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,1,2,1,2,1,2,2,2,1,1,2, -1,2,1,1,2,1,1,1,2,2,1,1,1,1,2,1, -1,2,1,1,1,2,2,2,1,1,2,1,1,2,1,2, -1,2,1,1,2,1,2,2,1,2,1,1,2,1,1,1, -2,1,2,1,1,2,2,5,1,2,2,2,5,5,5,5, -1,3,2,1,3,2,1,3,2,1,2,1,2,1,2,1, -2,1,2,1,2,1,2,1,2,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,2,1,3,2, -1,2,1,1,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,2,2,2,2,2,2,1,1, -2,1,1,2,2,1,2,1,1,1,1,2,1,2,1,2, -1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,5,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -4,4,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4,4, -4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, -4,4,4,4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,0x1a,4,0x1a, -0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -1,2,1,2,4,0x1a,1,2,0,0,4,2,2,2,0x17,1, -0,0,0,0,0x1a,0x1a,1,0x17,1,1,1,0,1,0,1,1, -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0,1,1,1,1,1,1,1,1,1,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,1,2,2,1,1,1,2,2,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,2,2,2,2,1,2,0x18,1,2,1,1,2, -2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,0x1b,6,6,6,6,6,7,7,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,1,2,1,2,1,2,1,2,1,2,1, -2,1,2,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,0,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, -0,4,0x17,0x17,0x17,0x17,0x17,0x17,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,0x17,0x13,0,0,0x1b,0x1b,0x19, -0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,0x13,6, -0x17,6,6,0x17,6,6,0x17,6,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,5, -5,5,5,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0, -0x10,0x10,0x10,0x10,0x10,0x10,0x18,0x18,0x18,0x17,0x17,0x19,0x17,0x17,0x1b,0x1b, -6,6,6,6,6,6,6,6,6,6,6,0x17,0x10,0,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,5,5, -6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x17,5,6,6,6,6,6,6,6,0x10,0x1b,6, -6,6,6,6,6,4,4,6,6,0x1b,6,6,6,6,5,5, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,0x1b,0x1b,5, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0x10, -5,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,0,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6, -6,6,6,6,6,5,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6, -6,6,6,6,6,6,6,6,4,4,0x1b,0x17,0x17,0x17,4,0, -0,6,0x19,0x19,6,6,6,6,4,6,6,6,4,6,6,6, -6,6,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,6,6,6,6,4,6, -6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6, -0,0,0x17,0,5,5,5,5,5,5,5,5,5,5,5,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,6,6,0x10,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5, -5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,6, -6,6,6,6,5,5,6,6,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,0x17,4,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,6,6,6,8,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,6,8,6,5,8,8,8,6,6,6, -6,6,6,6,6,8,8,8,8,6,8,8,5,6,6,6, -6,6,6,6,5,5,5,5,5,5,5,5,5,5,6,6, -0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,0x19,0x19, -0x37cb,0x35cb,0x3fcb,0x34cb,0x3ccb,0x94b,0x1b,0x19,5,0x17,6,0,5,6,8,8, -0,5,5,5,5,5,5,5,5,0,0,5,5,0,0,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, -5,5,5,5,5,0,5,0,0,0,5,5,5,5,0,0, -6,5,8,8,8,6,6,6,6,0,0,8,8,0,0,8, -8,6,5,0,0,0,0,0,0,0,0,8,0,0,0,0, -5,5,0,5,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,6,6,5,5,5,6,0x17,0,0,0,0,0, -0,0,0,0,0,6,6,8,0,5,5,5,5,5,5,0, -0,0,0,5,5,0,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5, -0,5,5,0,5,5,0,0,6,0,8,8,8,6,6,0, -0,0,0,6,6,0,0,6,6,6,0,0,0,6,0,0, -0,0,0,0,0,5,5,5,5,0,5,0,5,5,6,6, -0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x19,0,0, -0,0,0,0,0,5,6,6,6,6,6,6,0,6,6,8, -0,5,5,5,5,5,5,5,5,5,0,5,5,5,0,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, -5,5,5,5,5,0,5,5,0,5,5,5,5,5,0,0, -6,5,8,8,8,6,6,6,6,6,0,6,6,8,0,8, -8,6,0,0,5,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,0x1b,5,0x34cb,0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0,0,0,0, -0,0,0,0,0,6,8,8,0,5,5,5,5,5,5,5, -5,0,0,5,5,0,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5, -0,5,5,5,5,5,0,0,6,5,8,6,8,6,6,6, -6,0,0,8,8,0,0,8,8,6,0,0,0,0,0,0, -0,0,6,8,0,0,0,0,5,5,0,5,0,0,0,0, -0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x1b,0,0,0,0,0,0,0,6,5, -0,5,5,5,5,5,5,0,0,0,5,5,5,0,5,5, -5,5,0,0,0,5,5,0,5,0,5,5,0,0,0,5, -5,0,0,0,5,5,5,0,0,0,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,0,0,8,8,6,8,8,0, -0,0,8,8,8,0,8,8,8,6,0,0,5,0,0,0, -0,0,0,8,0,0,0,0,0,0,0,0,5,5,6,6, -0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0, -0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x58b,0x5cb,0x60b,0x1b,6,8,8,8, -6,5,5,5,5,5,5,5,5,0,5,5,5,0,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, -0,5,6,6,6,8,8,8,8,0,6,6,6,0,6,6, -6,6,0,0,0,0,0,0,0,6,6,0,5,5,5,0, -0,0,0,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,0,5,5,0,0,0,0,0,0,0,0,0, -0,0,0,0,5,6,8,8,0x17,5,5,5,5,5,5,5, -5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,5, -0,5,5,5,5,5,0,0,6,5,8,6,8,8,8,8, -8,0,6,8,8,0,8,8,6,6,0,0,0,0,0,0, -0,8,8,0,0,0,0,0,0,0,5,0,5,5,6,6, -0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x34cb, -0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0x1b,5,5,5,5,5,5,6,6,8,8, -0,5,5,5,5,5,5,5,5,0,5,5,5,0,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,6,6,5,8,8, -8,6,6,6,6,0,8,8,8,0,8,8,8,6,5,0x1b, -0,0,0,0,5,5,5,8,0xcc0b,0xca0b,0xcb4b,0xc90b,0x364b,0xc94b,0x350b,5, -0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289, -0,0,8,8,0x17,0,0,0,0,0,0,0,0,0,0,0, -0,0,8,8,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,0,0,0,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,5, -5,5,5,5,5,5,5,5,0,5,0,0,5,5,5,5, -5,5,5,0,0,0,6,0,0,0,0,8,8,8,6,6, -6,0,6,0,8,8,8,8,8,8,8,8,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,6,5,5, -6,6,6,6,6,6,6,0,0,0,0,0x19,5,5,5,5, -5,5,4,6,6,6,6,6,6,6,6,0x17,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0,0,0,0,0,5,5,0, -5,0,0,5,5,0,5,0,0,5,0,0,0,0,0,0, -5,5,5,5,0,5,5,5,5,5,5,5,0,5,5,5, -0,5,0,5,0,0,5,5,0,5,5,5,5,6,5,5, -6,6,6,6,6,6,0,6,6,5,0,0,5,5,5,5, -5,0,4,0,6,6,6,6,6,6,0,0,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,5,5,5,5,5,0x1b,0x1b,0x1b, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x1b, -0x17,0x1b,0x1b,0x1b,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0x344b,0x3c4b,0x444b,0x4c4b,0x544b,0x5c4b,0x644b,0x6c4b,0x744b,0x2c4b, -0x1b,6,0x1b,6,0x1b,6,0x14,0x15,0x14,0x15,8,8,5,5,5,5, -5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,0, -0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,8, -6,6,6,6,6,0x17,6,6,5,5,5,5,5,6,6,6, -6,6,6,6,6,6,6,6,0,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x17,0x17,0x17,0x17, -0x17,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,8,8,6,6,6,6,8,6,6, -6,6,6,6,8,6,6,8,8,6,6,5,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5, -5,5,8,8,6,6,5,5,5,5,6,6,6,5,8,8, -8,5,5,8,8,8,8,8,8,8,5,5,5,6,6,6, -6,5,5,5,5,5,5,5,5,5,5,5,5,5,6,8, -8,6,6,8,8,8,8,8,8,6,5,8,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,8,8,8,6,0x1b,0x1b,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,0x17,4,2,2,2,1,1,1,1, -1,1,0,1,0,0,0,0,0,1,0,0,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,0,0,5,5,5,5, -5,5,5,0,5,0,5,5,5,5,0,0,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, -5,5,0,0,5,5,5,5,5,5,5,0,5,0,5,5, -5,5,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,0,0,6,6,6,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,0x17,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0xa4b, -0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x788b,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0,0,2,2,2,2,2,2,0,0,0x13,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x17,0x17,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0xc,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0x14,0x15,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,0x17,0x17,0x17,0x98a,0x9ca, -0xa0a,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, -5,5,6,6,6,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,6,6,6,0x17,0x17,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,6,6,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, -5,0,6,6,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,6,6,8,6,6,6,6,6,6,6,8,8, -8,8,8,8,8,8,6,8,8,6,6,6,6,6,6,6, -6,6,6,6,0x17,0x17,0x17,4,0x17,0x17,0x17,0x19,5,6,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,6,5,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x17,0x17,6,6,6,0x10,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -5,5,5,4,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0,0,0,0,0,0,0,5,5,5,5,5,6,6,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0, -0,0,0,0,6,6,6,8,8,8,8,6,6,8,8,8, -0,0,0,0,8,8,6,8,8,8,8,8,8,6,6,6, -0,0,0,0,0x1b,0,0,0,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0,5,5,5,5,5,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0x30b,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,6,6,8,8,6,0,0,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,4,0x17,0x17,0x17,0x17,0x17,0x17,0,0,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,7,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,8,6,8,6,6,6,6,6,6,6,0,6,8,6,8, -8,6,6,6,6,6,6,6,6,8,8,8,8,8,8,6, -6,6,6,6,6,6,6,6,6,0,0,6,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0x17,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,6,6,6,6,6, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,6,6,6,6, -8,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,6,8,6,6, -6,6,6,8,6,8,8,8,8,8,6,8,8,5,5,5, -5,5,5,5,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17,5,8,6,6,6,6,8,8, -6,6,8,6,6,6,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,5,5,5,5,5,5,6,6,8,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,6,8,6,6,8,8, -8,6,8,6,6,6,8,8,0,0,0,0,0,0,0,0, -0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, -0,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5, -5,5,5,5,8,8,8,8,8,8,8,8,6,6,6,6, -6,6,6,6,8,8,6,6,0,0,0,0x17,0x17,0x17,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,4,4,4,4,4,4,0x17,0x17, -2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0, -6,6,6,0x17,6,6,6,6,6,6,6,6,6,6,6,6, -6,8,6,6,6,6,6,6,6,5,5,5,5,6,5,5, -5,5,8,8,6,5,5,8,6,6,0,0,0,0,0,0, -2,2,2,2,2,2,2,2,2,2,2,2,4,4,4,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,2, -2,2,2,2,2,2,2,2,2,2,2,2,4,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,4,4,4,4,4, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,0,6,6,6,6,6, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,2,2,2,2,2,2,2,2,1,2, -2,2,2,2,2,2,2,2,1,1,1,1,1,0x1a,0x1a,0x1a, -0,0,2,2,2,0,2,2,1,1,1,1,3,0x1a,0x1a,0, -2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1, -2,2,2,2,2,2,0,0,1,1,1,1,1,1,0,0, -2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1, -2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1, -2,2,2,2,2,2,0,0,1,1,1,1,1,1,0,0, -2,2,2,2,2,2,2,2,0,1,0,1,0,1,0,1, -2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0, -2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, -2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, -2,2,2,2,2,0,2,2,1,1,1,1,3,0x1a,2,0x1a, -0x1a,0x1a,2,2,2,0,2,2,1,1,1,1,3,0x1a,0x1a,0x1a, -2,2,2,2,0,0,2,2,1,1,1,1,0,0x1a,0x1a,0x1a, -0x16,0x17,0x17,0x17,0x18,0x14,0x15,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x18,0x17,0x16,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0xc, -0x10,0x10,0x10,0x10,0x10,0,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, -0x2cb,4,0,0,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18,0x18,0x14,0x15,4, -0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x10,0x10,0x10,0x10,0x10, -0x13,0x13,0x13,0x13,0x13,0x13,0x17,0x17,0x1c,0x1d,0x14,0x1c,0x1c,0x1d,0x14,0x1c, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0xd,0xe,0x10,0x10,0x10,0x10,0x10,0xc, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x1c,0x1d,0x17,0x17,0x17,0x17,0x16, -0x2cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18,0x18,0x14,0x15,0, -4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0, -0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, -0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7, -7,6,7,7,7,6,6,6,6,6,6,6,6,6,6,6, -6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,1,0x1b,1,0x1b,1,0x1b,1,1,1,1,0x1b,2, -1,1,1,1,2,5,5,5,5,2,0x1b,0x1b,2,2,1,1, -0x18,0x18,0x18,0x18,0x18,1,2,2,2,2,0x1b,0x18,0x1b,0x1b,2,0x1b, -0x358b,0x360b,0x364b,0x348b,0x388b,0x350b,0x390b,0x3d0b,0x410b,0x354b,0x454b,0x35cb,0x3dcb,0x45cb,0x4dcb,0x58b, -0x1b,0x1b,1,0x1b,0x1b,0x1b,0x1b,1,0x1b,0x1b,2,1,1,1,2,2, -1,1,1,2,0x1b,1,0x1b,0x1b,0x18,1,1,1,1,1,0x1b,0x1b, -0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a,0x11ca,0x1e4a,0x980a,0x784a, -0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a,0x11ca,0x1e4a,0x980a,0x784a, -0x784a,0x984a,0x788a,1,2,0x6ca,0x11ca,0x988a,0x78ca,0x54b,0x1b,0x1b,0,0,0,0, -0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x18, -0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18, -0x1b,0x1b,0x18,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x2cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0xa4b,0x30b,0x34b,0x38b, -0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x2cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb, -0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0xa4b,0x30b,0x34b,0x38b,0x3cb, -0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0xa4b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15, -0x14,0x15,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x30b,0x34b,0x38b,0x3cb, -0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18, -0x18,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x14,0x15, -0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x14,0x15,0x14,0x15,0x14, -0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14, -0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x14,0x15,0x14,0x15, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x14,0x15,0x18,0x18, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,0,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,1, -1,2,2,1,2,1,2,1,2,1,1,1,1,2,1,2, -2,1,2,2,2,2,2,2,4,4,1,1,1,2,1,2, -2,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,1,2,1,2,6,6,6,1,2, -0,0,0,0,0,0x17,0x17,0x17,0x17,0x344b,0x17,0x17,2,2,2,2, -2,2,0,2,0,0,0,0,0,2,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, -0,0,0,4,0x17,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,6,5,5,5,5,5,5,5,0,5,5,5,5, -5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,5, -5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,0, -0,0,0,0,0x17,0x17,0x1c,0x1d,0x1c,0x1d,0x17,0x17,0x17,0x1c,0x1d,0x17, -0x1c,0x1d,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x13,0x17, -0x1c,0x1d,0x17,0x17,0x1c,0x1d,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17, -0x17,0x17,0x17,4,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x13, -0x17,0x17,0x17,0x17,0x13,0x17,0x14,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a, -0x74a,0x78a,6,6,6,6,8,8,0x13,4,4,4,4,4,0x1b,0x1b, -0x7ca,0xa4a,0xcca,4,5,0x17,0x1b,0x1b,0xc,0x17,0x17,0x17,0x1b,4,5,0x54a, -0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x1b,0x1b,0x14,0x15,0x14,0x15, -0x14,0x15,0x14,0x15,0x13,0x14,0x15,0x15,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -0,6,6,0x1a,0x1a,4,4,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0x17,4,4,4,5,0,0,0,0,0,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,0x1b,0x1b,0x58b,0x5cb, -0x60b,0x64b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b, -0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x7cb,0xa4b,0xccb,0xf4b, -0x11cb,0x144b,0x16cb,0x194b,0x1b,0xa8b,0xacb,0xb0b,0xb4b,0xb8b,0xbcb,0xc0b,0xc4b,0xc8b,0xccb,0xd0b, -0xd4b,0xd8b,0xdcb,0xe0b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0xe4b,0xe8b,0xecb,0xf0b,0xf4b,0xf8b,0xfcb,0x100b,0x104b,0x108b,0x10cb, -0x110b,0x114b,0x118b,0x11cb,5,5,5,5,5,0x685,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,0x5c5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x685,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x705,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0x585,5,5,0x705,5,5,5,0x7885,5,0x605,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0x785,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x5c5,5,5,5,5,5,5,5, -0x685,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0x7985,0x7c5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0x7845,5,5,5,5,5,5,5,5,0x605,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0x685,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x1e45,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x7985,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x7a85,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x5c5,5,0x745,5,0x6c5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0x7c5,5,0x7845,0xa45,0xcc5,5,5, -5,5,5,5,0xf45,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0x605,0x605,0x605,0x605,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0x645,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x585,5,5, -5,5,5,5,5,0x585,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0x585,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0x785,0xa45,5,5,5,5,5,5,5,5,5,5,5,5, -0x585,0x5c5,0x605,5,0x5c5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x7c5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x745,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x705,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x785,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x1e45,5,5,5,5,5,5,5,0x645,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x7885,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x5c5,5, -5,5,5,0x5c5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x5c5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0x7845,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x6c5,5,5,5,5,5,0x1e45,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x6c5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0x545,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,4,5,5,5,5,5,5, -5,5,5,5,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,4,0x17,0x17,0x17,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,5,5,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, -1,2,1,2,4,4,6,6,1,2,1,2,1,2,1,2, -1,2,1,2,1,2,5,6,7,7,7,0x17,6,6,6,6, -6,6,6,6,6,6,0x17,4,5,5,5,5,5,5,0x58a,0x5ca, -0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x54a,6,6,0x17,0x17,0x17,0x17,0x17,0x17, -0,0,0,0,0,0,0,0,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, -0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4, -4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5, -4,4,2,5,5,5,5,5,0x1a,0x1a,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,2,2,1,2,1,2,1,2, -1,2,1,2,1,2,1,2,1,2,1,2,4,2,2,2, -2,2,2,2,2,1,2,1,2,1,1,2,1,2,1,2, -1,2,1,2,4,0x1a,0x1a,1,2,1,2,5,1,2,1,2, -2,2,1,2,1,2,1,2,1,2,1,2,1,2,1,1, -1,1,1,2,1,1,1,1,1,2,1,2,1,2,0,0, -0,0,0,0,5,5,6,5,5,5,6,5,5,5,5,6, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,8,8,6,6,8,0x1b,0x1b,0x1b,0x1b, -0,0,0,0,0x34cb,0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0x1b,0x1b,0x19,0x1b,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x17,0x17,0x17,0x17,0,0,0,0, -0,0,0,0,8,8,8,8,6,6,0,0,0,0,0,0, -0,0,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, -0,0,0,0,8,8,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,8,8,8,8,8,8,8,8,8,8,8,8, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,5,5,5,5,5,5,0x17,0x17,0x17,5,0x17,5,5,6, -5,5,5,5,5,5,6,6,6,6,6,6,6,6,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,6,6,6,6,6,6,6,6,6,6,6,8,8, -0,0,0,0,0,0,0,0,0,0,0,0x17,8,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,4,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6, -8,8,6,6,6,6,8,8,6,8,8,8,5,5,5,5, -5,6,4,5,5,5,5,5,5,5,5,5,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,0,5,5,5,5, -5,5,5,5,5,6,6,6,6,6,6,8,8,6,6,8, -8,6,6,0,0,0,0,0,0,0,0,0,5,5,5,6, -5,5,5,5,5,5,5,5,6,8,0,0,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0x17,0x17,0x17,0x17,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,4,5,5,5, -5,5,5,0x1b,0x1b,0x1b,5,8,6,8,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,6,5,6,6, -6,5,5,6,6,5,5,5,5,5,6,6,5,6,5,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,5,5,4,0x17,0x17,5,5,5,5, -5,5,5,5,5,5,5,8,6,6,8,8,0x17,0x17,5,4, -4,8,6,0,0,0,0,0,0,0,0,0,0,5,5,5, -5,5,5,0,0,5,5,5,5,5,5,0,0,5,5,5, -5,5,5,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,0,5,5,5,5,5,5,5,0,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,0x1a,4,4,4,4,2,2,2,2, -2,2,0,0,0,0,0,0,0,0,0,0,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,5,5,5,8, -8,6,8,8,6,8,8,0x17,8,6,0,0,0x49,0x89,0xc9,0x109, -0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0,0,0,0,5,5,5,5, -5,5,5,0,0,0,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, -0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x11,0x11,0x11,0x11, -0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, -0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,5,5,5,5, -5,5,5,5,5,5,5,0x605,5,5,5,5,5,5,5,0x7c5, -5,5,5,5,0x5c5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0x6c5,5,0x6c5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0x7c5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x18,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, -5,0,5,0,5,5,0,5,5,0,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,2,2,2,2,2,2,2,0,0,0,0,0, -0,0,0,0,0,0,0,2,2,2,2,2,0,0,0,0, -0,5,6,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, -0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0x15,0x14,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0,0,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,0x19,0x1b,0,0, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x14,0x15,0x17,0,0,0,0,0,0, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -0x17,0x13,0x13,0x16,0x16,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14, -0x15,0x17,0x17,0x14,0x15,0x17,0x17,0x17,0x17,0x16,0x16,0x16,0x17,0x17,0x17,0, -0x17,0x17,0x17,0x17,0x13,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x17,0x18,0x13, -0x18,0x18,0x18,0,0x17,0x19,0x17,0x17,0,0,0,0,5,5,5,5, -5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0,0,0x10,0,0,5,5,5,5,5,5,0,0,5,5, -5,5,5,5,0,0,5,5,5,5,5,5,0,0,5,5, -5,0,0,0,0x19,0x19,0x18,0x1a,0x1b,0x19,0x19,0,0x1b,0x18,0x18,0x18, -0x18,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0x10,0x10,0x10, -0x1b,0x1b,0,0,0,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18, -0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17, -0x18,0x18,0x18,0x17,0x1a,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x14, -0x18,0x15,0x18,0x14,0x15,0x17,0x14,0x15,0x17,0x17,5,5,5,5,5,5, -5,5,5,5,4,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,4,4,5,5,5,5,5,5,5,5, -5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b, -0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x17,0x17,0x17,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b, -0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b, -0x900b,0x980b,0xa00b,0xa80b,0x7ca,0x7ca,0x7ca,0x7ca,0x7ca,0xcca,0x11ca,0x11ca,0x11ca,0x11ca,0x1e4a,0x880a, -0x980a,0x980a,0x980a,0x980a,0x980a,0x784a,0x984a,0x68a,0x11ca,0x344b,0x344b,0x388b,0x3ccb,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x54b,0x34cb,0x1b,0x1b,0x1b,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, -0x34ca,0x344a,0x58a,0x68a,0x11ca,0x980a,0x984a,0x988a,0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x984a,0x68a, -0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x788a,0x988a,0x7ca,0x58a,0x58a,0x58a,0x5ca,0x5ca,0x5ca,0x5ca,0x68a, -0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -6,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b, -0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0,0,0,0, -0x58b,0x68b,0x7cb,0x11cb,0,0,0,0,0,0,0,0,0,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x1bca,5,5,5,5,5,5,5,5,0xb80a,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,6,6,6,6,6,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0x17, -5,5,5,5,0,0,0,0,5,5,5,5,5,5,5,5, -0x17,0x58a,0x5ca,0x7ca,0xa4a,0x1e4a,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0,0,0,0,2,2,2,2,2,2,2,2, -5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0,0,0,0,0,0,0,0,0,0,0,0x17,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,5,5,0,0,0,5,0,0,5,5,5,5,5, -5,5,0,0,5,0,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0x17, -0x58b,0x5cb,0x60b,0x7cb,0xa4b,0x1e4b,0x784b,0x788b,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1b, -0x1b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0,0,0,0,0,0,0,0x58b, -0x5cb,0x60b,0x64b,0x64b,0x68b,0x7cb,0xa4b,0x1e4b,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,5,5,0,0, -0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x58b,0x7cb, -0xa4b,0x1e4b,0x5cb,0x60b,0,0,0,0x17,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0,0,0,0,0x17,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b, -0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x88cb,0x90cb,0x98cb,0xa0cb,0xa8cb,0xb0cb,0xb8cb,0x36cb,0x354b, -0x34cb,0x348b,0x46cb,0x344b,0x4ecb,0x388b,0x3ccb,0x454b,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0,0,0,0,0x5ecb,0x344b,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b, -0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0,0,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b, -0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0x30b,0x34b,0x38b,0x3cb,0x7cb,0xa4b,0x1e4b,0x784b, -0x344b,0,0,0,0,0,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0,0,0,0,0,0,0,5,6,6,6,0,6,6,0, -0,0,0,0,6,6,6,6,5,5,5,5,0,5,5,5, -0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,0, -0,0,0,6,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x58b,0x11cb,0x17,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x58b,0x7cb,0xa4b,5,5,5,5,5,6,6,0,0,0,0,0x58b, -0x68b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,0x1b,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0,0,0,0,0,0,0,0x17,0x17,0x17,0x17,0,0,0, -0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,0,0,0,0,0,0,0,0x58b,0x68b,0x7cb,0x11cb,0x1e4b,0x784b, -5,5,5,5,6,6,6,6,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb, -0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x344b,0x34cb,0x348b,0x388b,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x58b,0x5cb,0x60b, -0x64b,0x68b,0x7cb,0xa4b,0xccb,0x1e4b,0x344b,5,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,6,6,6,6,6,6,6,6,6,6,6,0x58b,0x7cb,0xa4b, -0x1e4b,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0x144b,0x16cb,0x194b,0x1bcb, -0x1e4b,0x784b,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,6,8,6,8,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6, -6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0x30b,0x34b, -0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,6, -6,6,6,8,8,6,6,0x17,0x17,0x10,0x17,0x17,0x17,0x17,0,0, -0,0,0,0,0,0,0,0,0,0x10,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,0,0, -0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, -0,0,0,0,5,5,5,5,5,5,5,6,6,6,6,6, -8,6,6,6,6,6,6,6,6,0,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,5,8,8,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,6,6,6,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,6,0x17,0x17,5,0,0,0,0,0, -0,0,0,0,8,5,5,5,5,0x17,0x17,0x17,0x17,6,6,6, -6,0x17,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,0x17, -5,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,8,8,8,6,6,6,6,6,6, -6,6,6,8,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b, -0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x784b,0,0,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -8,8,8,6,6,6,8,8,6,8,6,6,0x17,0x17,0x17,0x17, -0x17,0x17,6,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,0,5,0,5,5,5,5,0,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,5, -5,5,5,5,5,5,5,5,5,0x17,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -8,8,8,6,6,6,6,6,6,6,6,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -5,5,8,8,0,0,6,6,6,6,6,6,6,0,0,0, -6,6,6,6,6,0,0,0,0,0,0,0,0,0,0,0, -6,6,8,8,0,5,5,5,5,5,5,5,5,0,0,5, -5,0,0,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0,5,5,5,5,5,5,5,0,5,5,0,5,5,5, -5,5,0,6,6,5,8,8,6,8,8,8,8,0,0,8, -8,0,0,8,8,8,0,0,5,0,0,0,0,0,0,8, -0,0,0,0,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,8,8,8, -6,6,6,6,6,6,6,6,8,8,6,6,6,8,6,5, -5,5,5,0x17,0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0x17,0,0x17,6,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,6, -6,8,6,8,8,8,8,6,6,8,6,6,5,5,0x17,5, -0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,8,8,8,6,6,6,6,0,0, -8,8,8,8,6,6,8,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -5,5,5,5,6,6,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,6, -6,6,6,8,8,6,8,6,6,0x17,0x17,0x17,5,0,0,0, -0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, -0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,6,8,6,8,8,6,6,6,6,6,6,8,6, -0,0,0,0,0,0,0,0,8,8,6,6,6,6,8,6, -6,6,6,6,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0x7cb,0xa4b,0x17,0x17,0x17,0x1b,5,5,5,5,5,5,5,5, -5,5,5,5,8,8,8,6,6,6,6,6,6,6,6,6, -8,6,6,0x17,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0,0,0,0,0, -0,0,0,0,0,0,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6, -6,8,5,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,6, -0,0,0,0,0,0,0,0,5,6,6,6,6,6,6,8, -8,6,6,6,5,5,5,5,5,6,6,6,6,6,6,6, -6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x17,0x17,0x17,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,0,0,5,5, -5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,8, -6,6,0x17,0x17,0x17,5,0x17,0x17,5,0x17,0x17,0x17,0x17,0x17,0,0, -0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb, -0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0,0,0,0x17,0x17,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,8,6,6,6,6,6,6,6,0,6,6,6,6, -6,6,8,6,6,6,6,6,6,6,6,6,0,8,6,6, -6,6,6,6,6,8,6,6,8,6,6,0,0,0,0,0, -0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0,0,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,5,6,0,0,0,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, -5,5,5,5,5,5,5,0,5,5,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,6,6,6,6,6,6,0,0,0,6,0,6,6,0,6, -5,5,5,5,5,5,5,5,5,5,8,8,8,8,8,0, -6,6,0,8,8,6,8,6,5,0,0,0,0,0,0,0, -5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,6,6,8,8,0x17,0x17,0,0,0,0,0,0,0, -0x34ca,0x354a,0x34ca,0x34ca,0x344a,0x348a,0x388a,0xf4a,0x11ca,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0, -0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0, -0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x64a, -0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca, -0x60a,0x64a,0x68a,0x5ca,0x60a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x60a, -0x64a,0x68a,0xc08a,0xc18a,0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0x60a,0x60a,0x64a,0x64a,0x64a,0x64a, -0x6ca,0x70a,0x70a,0x70a,0x74a,0x74a,0x78a,0x78a,0x78a,0x78a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x58a, -0x5ca,0x60a,0x64a,0x64a,0x68a,0x68a,0x5ca,0x60a,0x58a,0x5ca,0x348a,0x388a,0x454a,0x348a,0x388a,0x35ca, -5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0,0,0,0x17,0x17,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,0,0,6,6,6,6,6,0x17,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,0x17, -0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,4,4,4,4,0x17,0x1b,0,0, -0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0,0x7cb,0x1e4b,0x788b,0x790b,0x798b,0x7a0b,0x7a8b,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0,0,0,0,0,5,5,5,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b, -0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0x58b,0x5cb,0x60b,0x17, -0x17,0x17,0x17,0,0,0,0,0,5,5,5,5,5,0,0,0, -0,0,0,0,0,0,0,0,5,8,8,8,8,8,8,8, -8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, -8,8,8,8,8,8,8,8,8,8,8,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,6,6,6,6,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0, -5,5,5,5,5,5,5,5,5,5,0,0,0x1b,6,6,0x17, -0x10,0x10,0x10,0x10,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,8,8,6,6,6,0x1b,0x1b,0x1b,8,8,8,8,8,8,0x10, -0x10,0x10,0x10,0x10,0x10,0x10,0x10,6,6,6,6,6,6,6,6,0x1b, -0x1b,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6, -6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,6,6, -6,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x54b,0x58b,0x5cb,0x60b, -0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b, -0,0,0,0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b, -0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x58b,0x5cb, -0x60b,0x64b,0x68b,0x58b,0x68b,0,0,0,0,0,0,0,0x249,0x289,0x49,0x89, -0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, -0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, -2,0,2,2,2,2,2,2,2,2,2,2,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -1,0,1,1,0,0,1,0,0,1,1,0,0,1,1,1, -1,0,1,1,1,1,1,1,1,1,2,2,2,2,0,2, -0,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2, -2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,2,2,2,2,1,1,0,1,1,1,1,0, -0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1, -1,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,1,1,0,1, -1,1,1,0,1,1,1,1,1,0,1,0,0,0,1,1, -1,1,1,1,1,0,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x18,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x18, -2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x18, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,0x18,2,2,2,2,2,2,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,0x18, -2,2,2,2,2,2,1,2,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, -0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,0x1b,0x1b,0x1b,0x1b,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6, -6,6,6,6,6,6,6,0,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,0,0,6,6,6,6,6, -6,6,0,6,6,0,6,6,6,6,6,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,5,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b, -6,6,6,6,6,6,6,0,0,0,0,0,0,0,0,0, -2,2,2,2,6,6,6,6,6,6,6,0,0,0,0,0, -0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17, -1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b, -0x78cb,0x794b,0x814b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x1b,0x34cb,0x344b,0x3ccb, -0x19,0x58b,0x5cb,0x788b,0x78cb,0,0,0,0,0,0,0,0,0,0,0, -0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b, -0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -0,5,5,0,5,0,0,5,0,5,5,5,5,5,5,5, -5,5,5,0,5,5,5,5,0,5,0,5,0,0,0,0, -0,0,5,0,0,0,0,5,0,5,0,5,0,5,5,5, -0,5,5,0,5,0,0,5,0,5,0,5,0,5,0,5, -0,5,5,0,5,0,0,5,5,5,5,0,5,5,5,5, -5,5,5,0,5,5,5,5,0,5,5,5,5,0,5,0, -5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, -0,5,5,5,0,5,5,5,5,5,0,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x2cb,0x2cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x54b, -0x54b,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, -0,0,0,0,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1a, -0x1a,0x1a,0x1a,0x1a,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, -0,0,0,0,5,0x705,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x645,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x685,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0xcc5,5,5,5,5,5,5,5,5,0xf45,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,0xf45,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x6c5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x605,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0x605,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5,5, -5,0x645,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x785,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, -0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, -0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0,0x10,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, -0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, -0x11,0x11,0x11,0x11,0x11,0x11,0,0,0,0,0,0 -}; - -static const UTrie2 propsTrie={ - propsTrie_index, - propsTrie_index+4468, - NULL, - 4468, - 16984, - 0xa40, - 0x11f4, - 0x0, - 0x0, - 0x110000, - 0x53c8, - NULL, 0, FALSE, FALSE, 0, NULL -}; - -static const uint16_t propsVectorsTrie_index[30012]={ -0x4d5,0x4dd,0x4e5,0x4ed,0x505,0x50d,0x515,0x51d,0x525,0x52d,0x535,0x53d,0x545,0x54d,0x555,0x55d, -0x564,0x56c,0x574,0x57c,0x57f,0x587,0x58f,0x597,0x59f,0x5a7,0x5af,0x5b7,0x5bf,0x5c7,0x5cf,0x5d7, -0x5df,0x5e7,0x5ee,0x5f6,0x5fe,0x606,0x60e,0x616,0x61e,0x626,0x62b,0x633,0x63a,0x642,0x64a,0x652, -0x65a,0x662,0x66a,0x672,0x679,0x681,0x689,0x691,0x699,0x6a1,0x6a9,0x6b1,0x6b9,0x6c1,0x6c9,0x6d1, -0x19a7,0xd68,0xe47,0x6d9,0x4f5,0xeae,0xeb6,0x1b46,0x1276,0x128e,0x127e,0x1286,0x7ae,0x7b4,0x7bc,0x7c4, -0x7cc,0x7d2,0x7da,0x7e2,0x7ea,0x7f0,0x7f8,0x800,0x808,0x80e,0x816,0x81e,0x826,0x82e,0x836,0x83d, -0x845,0x84b,0x853,0x85b,0x863,0x869,0x871,0x879,0x881,0x1296,0x889,0x891,0x899,0x8a0,0x8a8,0x8b0, -0x8b8,0x8bc,0x8c4,0x8cb,0x8d3,0x8db,0x8e3,0x8eb,0x15a6,0x15ae,0x8f3,0x8fb,0x903,0x90b,0x913,0x91a, -0x160c,0x15fc,0x1604,0x18e2,0x18ea,0x12a6,0x922,0x129e,0x14f0,0x14f0,0x14f2,0x12ba,0x12bb,0x12ae,0x12b0,0x12b2, -0x1614,0x1616,0x92a,0x1616,0x932,0x937,0x93f,0x161b,0x945,0x1616,0x94b,0x953,0xc3d,0x1623,0x1623,0x95b, -0x1633,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634,0x1634, -0x1634,0x1634,0x1634,0x162b,0x963,0x163c,0x163c,0x96b,0xb52,0xb5a,0xb62,0xb6a,0x164c,0x1644,0x973,0x97b, -0x983,0x1656,0x165e,0x98b,0x1654,0x993,0x19af,0xd70,0xb72,0xb7a,0xb82,0xb87,0x1850,0xc70,0xc77,0x17b8, -0xc0d,0x19b7,0xd78,0xd80,0xd88,0xd90,0xf66,0xf66,0x18a8,0x18ad,0xcab,0xcb3,0x191e,0x1926,0x1a68,0xe4f, -0x192e,0xcfc,0xd04,0x1936,0x112a,0x11ca,0xf46,0xd98,0x17d8,0x17c0,0x17d0,0x17c8,0x1868,0x1860,0x1828,0xc1d, -0x12c3,0x12c3,0x12c3,0x12c3,0x12c6,0x12c3,0x12c3,0x12ce,0x99b,0x12d6,0x99f,0x9a7,0x12d6,0x9af,0x9b7,0x9bf, -0x12e6,0x12de,0x12ee,0x9c7,0x9cf,0x12f6,0x9d7,0x9df,0x12fe,0x1306,0x130e,0x1316,0x9e7,0x131e,0x1325,0x132d, -0x1335,0x133d,0x1345,0x134d,0x1355,0x135c,0x1364,0x136c,0x1374,0x137c,0x137f,0x1381,0x1666,0x174b,0x1751,0x1898, -0x1389,0x9ef,0x9f7,0x14a3,0x14a8,0x14ab,0x14b3,0x1391,0x14bb,0x14bb,0x13a1,0x1399,0x13a9,0x13b1,0x13b9,0x13c1, -0x13c9,0x13d1,0x13d9,0x13e1,0x1759,0x17b0,0x18f2,0x1a48,0x13f1,0x13f8,0x1400,0x1408,0x13e9,0x1410,0x1761,0x1768, -0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x1770,0x1773,0x1770,0x1770,0x177b,0x1782,0x1784,0x178b, -0x1793,0x1797,0x1797,0x179a,0x1797,0x1797,0x17a0,0x1797,0x17e0,0x18a0,0x18fa,0xb8f,0xb95,0x1b7a,0xb9b,0xba0, -0x1840,0xc4d,0xc51,0x18b5,0x1830,0x1830,0x1830,0xc25,0x1838,0xc45,0x1880,0xc9b,0xc2d,0xc35,0xc35,0x193e, -0x1870,0x1902,0xc87,0xc8b,0x9ff,0x1676,0x1676,0xa07,0x167e,0x167e,0x167e,0x167e,0x167e,0x167e,0xa0f,0x6dd, -0x14d8,0x14fa,0xa17,0x1502,0xa1f,0x150a,0x1512,0x151a,0xa27,0xa2c,0x1522,0x1529,0xa31,0xa39,0x1890,0xc15, -0xa41,0x1580,0x1587,0x1531,0x158f,0x1596,0x1539,0xa49,0x1552,0x1552,0x1554,0x1541,0x1549,0x1549,0x154a,0x159e, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x122b,0x17e8,0x17e8, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c, -0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x155c,0x1563,0x199f,0x1233, -0x168e,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694, -0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694,0x1694, -0x1694,0x1694,0x1694,0x1694,0xa51,0x169c,0xa59,0x19bf,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a, -0x1946,0xd0c,0x195a,0x1952,0x195c,0x19c7,0x19c7,0xda0,0x1848,0x18bd,0x1912,0x1916,0x190a,0xcbb,0xcc2,0xcc5, -0x1878,0xc93,0x18c5,0xccd,0x1964,0x1967,0xd14,0x19cf,0x1977,0x196f,0xd1c,0xda8,0x19d7,0x19db,0xdb0,0x100d, -0x197f,0xd24,0xd2c,0x19e3,0x19f3,0x19eb,0xdb8,0xf09,0xe57,0xe5f,0x1bc1,0xfc5,0x1c66,0x1c66,0x19fb,0xdc0, -0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef, -0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1, -0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3, -0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee, -0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0, -0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2, -0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4, -0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef, -0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1, -0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3, -0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee, -0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0, -0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2, -0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4, -0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef, -0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1, -0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3, -0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee, -0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0, -0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2, -0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4, -0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0x15f4,0x15ee,0x15ef,0x15f0,0x15f1,0x15f2,0x15f3,0xa61,0xdc8,0xdcb, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, -0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6,0x15c6, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3, -0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x14c3,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b,0x156b, -0x1570,0x1578,0x17a8,0x123b,0x1888,0x1888,0x123f,0x1246,0xa69,0xa71,0xa79,0x1430,0x1437,0x143f,0xa81,0x1447, -0x1478,0x1478,0x1420,0x1428,0x144f,0x146f,0x1470,0x1480,0x1457,0x1418,0xa89,0x145f,0xa91,0x1467,0xa99,0xa9d, -0xca3,0x1488,0xaa5,0xaad,0x1490,0x1496,0x149b,0xab5,0xac5,0x14e0,0x14e8,0x14cb,0x14d0,0xacd,0xad5,0xabd, -0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6, -0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15b6,0x15be,0x15be,0x15be,0x15be, -0x13d4,0x13d4,0x1414,0x1454,0x1494,0x14d4,0x1514,0x1554,0x1590,0x15d0,0x15fc,0x163c,0x167c,0x16bc,0x16fc,0x173c, -0x177c,0x17b8,0x17f8,0x1838,0x1878,0x18ac,0x18e8,0x1928,0x1968,0x19a8,0x19e4,0x1a24,0x1a64,0x1aa4,0x1ae4,0x1b24, -0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xbc0,0xa40,0xea8,0xa40,0xa40,0xa40,0xa40,0xc00,0x12d3,0x12d3, -0xee8,0xf28,0xa40,0xa40,0xa40,0xa40,0xc40,0xc60,0xa40,0xa40,0xca0,0xce0,0xd20,0xd60,0xe68,0xdd8, -0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213,0x1213, -0x1213,0x1213,0x1213,0x1213,0xf68,0x1253,0x1088,0x10c8,0x1293,0x10d3,0x1313,0x1313,0x1313,0xfa8,0xfc8,0x1008, -0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8, -0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0xfc8,0x1048, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0xe18,0xe28,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, -0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd98, -0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193, -0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1193,0x1113, -0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3, -0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x1153, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0xba8,0xbaf,0xbb7,0xbbf,0x17f0,0x17f0,0x17f0,0xbc7,0xbcf,0xbd2,0x1820,0x1818,0xc05,0xd34,0xd38,0xd3c, -0x4f5,0x4f5,0x4f5,0x4f5,0xd44,0x1987,0xd4c,0xf5e,0x16a4,0xadd,0xae3,0x101d,0xbda,0x1858,0xc7f,0x4f5, -0x16b9,0x16ac,0x16b1,0x17f8,0xbe2,0xbea,0x1168,0x116e,0x1ba9,0xf7b,0x1b99,0x6e5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1bc9,0x1bc9,0x1bc9,0x1bc9,0x1bc9,0x1bc9,0x1bc9,0x1bc9,0x1bc9,0xfcd,0xfd5,0xfdd,0x4f5,0x4f5,0x4f5,0x4f5, -0xbf2,0xbf5,0xdd3,0x1c11,0x1015,0x6ed,0x4f5,0x10ae,0xcd5,0xd54,0x4f5,0x4f5,0x1b56,0xf11,0xf19,0x1c51, -0xc59,0xc60,0xc68,0x1a03,0x1bf1,0x4f5,0x1bd1,0xfed,0x1a0b,0xddb,0xde3,0xdeb,0x103d,0x6f5,0x4f5,0x4f5, -0x1a13,0x1a13,0x6fd,0x4f5,0x1c7e,0x10c6,0x1c76,0x10ce,0x1d26,0x11e0,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0xdf3,0x4f5,0x4f5,0x4f5,0x4f5,0x1d46,0x1208,0x120f,0x705,0x4f5,0x4f5,0x4f5,0x4f5, -0x1a70,0x1a72,0xe67,0xe6e,0x1a1b,0x1a23,0xdfb,0xf3e,0x1b4e,0xef9,0xf01,0xfe5,0x1b66,0x1b6a,0xf36,0x105d, -0xfb0,0xfb5,0x70d,0x4f5,0x10b6,0x10be,0x1bb9,0xfbd,0xf92,0xf98,0xfa0,0xfa8,0x4f5,0x4f5,0x4f5,0x4f5, -0x1cc6,0x1cbe,0x1158,0x1160,0x1c39,0x1c31,0x1084,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x1c21,0x1045,0x104d,0x1055, -0x1be9,0x1be1,0xffd,0x1150,0x1b72,0xf4e,0x715,0x4f5,0x1094,0x109c,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1d1e,0x11c2,0x71d,0x4f5,0x4f5,0x1c49,0x1c41,0x108c,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1cf6,0x1cee,0x11b2,0x1ce6,0x11aa,0x725,0x1c19,0x1035,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x110e,0x1113,0x111b,0x1122,0x1142,0x1148,0x4f5,0x4f5,0x118e,0x1192,0x119a,0x11d2,0x11d8,0x72d,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x11f0,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd, -0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18cd,0x18d2,0xcdd,0xce4,0xce4,0xce4, -0x18da,0x18da,0x18da,0xcec,0x1c6e,0x1c6e,0x1c6e,0x1c6e,0x1c6e,0x1c6e,0x735,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b, -0x1a2b,0x1a2b,0x1a2d,0x1a2b,0x1a35,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a2b,0x1a38,0x1a2b,0x1a2b,0x1a2b,0x1a2b, -0x1a2b,0x73d,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a,0x1a7a, -0x1a7a,0xe76,0x1005,0x745,0x4f5,0x4f5,0x749,0xf56,0x1c09,0x1c01,0x1025,0x102d,0x751,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x1d3e,0x1d36,0x1200,0x4f5,0x4f5,0x4f5,0x1b5e,0x1b5e,0xf21,0xf26,0xf2e,0x4f5,0x4f5,0x113a, -0x1a8a,0x1cfe,0x1cfe,0x1cfe,0x1cfe,0x1cfe,0x1cfe,0x1cfe,0x1186,0x759,0x4f5,0x75d,0x1d16,0x1d16,0x1d16,0x1d16, -0x1d16,0x1d16,0x1d16,0x1d16,0x1d16,0x1d16,0x1d16,0x11a2,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1ba1,0x1ba1,0x1ba1,0xf6e,0xf73,0x765,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x16c1,0x16c1,0x16c1,0x16c1,0x16c1,0x16c1,0x16c1,0xaeb,0x16d1,0xaf3,0x16d2,0x16c9,0x16da,0x16e0,0x16e8,0xafb, -0x1810,0x1810,0x76d,0x4f5,0x4f5,0x4f5,0x4f5,0x11f8,0x1800,0x1800,0xbfd,0xcf4,0x4f5,0x4f5,0x4f5,0x4f5, -0x1719,0x1720,0xb03,0x1723,0xb0b,0xb13,0xb1b,0x171d,0xb23,0xb2b,0xb33,0x1722,0x172a,0x1719,0x1720,0x171c, -0x1723,0x172b,0x171a,0x1721,0x171d,0xb3a,0x16f0,0x16f8,0x16ff,0x1706,0x16f3,0x16fb,0x1702,0x1709,0xb42,0x1711, -0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96,0x1c96, -0x1c86,0x1c89,0x1c86,0x1c90,0x10fe,0x775,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1132,0x77d,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x1bd9,0x1bd9,0x1bd9,0x1bd9,0x1bd9,0x1bd9,0xff5,0x4f5,0x1cb6,0x1cae,0x1106,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x781,0x1d2e,0x11e8,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0xebe,0xec6,0xece,0xed6,0xede,0xee6,0xeed,0xef1,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x789,0x1065,0x1c29,0x106b,0x1c29,0x1073,0x1078,0x107c,0x107c, -0x10d6,0x10de,0x1cce,0x10e6,0x1ca6,0x10ee,0x10f6,0x1d06,0x11ba,0x11ba,0x11ba,0x791,0x795,0x795,0x795,0x795, -0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795, -0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x795, -0x795,0x795,0x795,0x795,0x795,0x795,0x795,0x796,0xb4a,0x1733,0x1733,0x1733,0x79e,0x79e,0x79e,0x79e, -0x1808,0x1808,0x1808,0x1808,0x1808,0x1808,0x1808,0x7a6,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e, -0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e, -0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e, -0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e, -0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x79e,0x198f,0xd5c,0x1997,0x1997,0xd60,0xe7e,0xe86,0xe8e, -0xe03,0x1a40,0x1a58,0xe0b,0x1a50,0xe13,0xe17,0xe1e,0xe26,0xe2d,0xe35,0xe3d,0xe3f,0xe3f,0xe3f,0xe3f, -0x1ab1,0x1ab9,0x1ab1,0x1abf,0x1ac7,0x1a92,0x1acf,0x1ad7,0x1ab1,0x1adf,0x1ae7,0x1aee,0x1af6,0x1a9a,0x1ab1,0x1af8, -0x1aa2,0x1aa9,0x1b00,0x1b06,0x1b8a,0x1b91,0x1b82,0x1b0e,0x1b16,0x1b1e,0x1b26,0x1bf9,0x1b2e,0x1b36,0xe96,0xe9e, -0x1a82,0x1a82,0x1a82,0xea6,0x1bb1,0x1bb1,0xf83,0xf8a,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59, -0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c59,0x1c5e,0x1c59,0x1c59,0x1c59,0x10a4,0x10a6,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1176,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde, -0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x1cde,0x117e, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5, -0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x4f5,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b, -0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b, -0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b, -0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x124e,0x1217, -0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e, -0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e, -0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x121f,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743, -0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1743,0x1256,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1223,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217, -0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1217,0x1223,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x125e,0x1b3e,0x1b3e,0x1b3e,0x1b3e,0x1b3e,0x1b3e,0x1266,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x126e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e, -0x1d0e,0x1d0e,0x1d0e,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15ce,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15d6,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de, -0x15de,0x15de,0x15de,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6,0x15e6, -0x15e6,0x15e6,0x15e6,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b, -0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b, -0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b, -0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b,0x173b, -0x173b,0x173b,0x173b,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60,0x1a60, -0x1a60,0x1a60,0x1a60,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e,0x1c9e, -0x1c9e,0x1c9e,0x1c9e,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6,0x1cd6, -0x1cd6,0x1cd6,0x1cd6,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e, -0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e, -0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e, -0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e,0x1d0e, -0x1d0e,0x1d0e,0x1d0e,0x4d4,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a3,0x2ac,0x2a6, -0x2a6,0x2a9,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0, -0x2a0,0x2a0,0x2a0,0x2a0,0x7d4,0x7ce,0x7b3,0x7aa,0x7a1,0x79e,0x795,0x7b0,0x79b,0x7a7,0x7aa,0x7c5, -0x7bc,0x7ad,0x7d1,0x7a4,0x792,0x792,0x792,0x792,0x792,0x792,0x792,0x792,0x792,0x792,0x7b9,0x7b6, -0x7bf,0x7bf,0x7bf,0x7ce,0x795,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7e0,0x7da,0x7da,0x7da,0x7da,0x7da, -0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x7da,0x79b, -0x7a1,0x7a7,0x7cb,0x78f,0x7c8,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7dd,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7, -0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x7d7,0x79b, -0x7c2,0x798,0x7bf,0x2a0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x2af,0x2af,0x2af,0x2af,0x2af,0x2be,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af, -0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af, -0x2af,0x2af,0x2af,0x2af,0x2b2,0x62a,0x7e9,0x7ec,0x630,0x7ec,0x7e6,0x627,0x61e,0x2b8,0x63c,0x2bb, -0x7ef,0x615,0x633,0x7e3,0x62d,0x639,0x61b,0x61b,0x621,0x2b5,0x627,0x624,0x61e,0x61b,0x63c,0x2bb, -0x618,0x618,0x618,0x62a,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x645,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4, -0x2c4,0x2c4,0x2c4,0x2c4,0x645,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x2c4,0x636,0x645,0x2c4,0x2c4,0x2c4, -0x2c4,0x2c4,0x645,0x63f,0x642,0x642,0x2c1,0x2c1,0x2c1,0x2c1,0x63f,0x2c1,0x642,0x642,0x642,0x2c1, -0x642,0x642,0x2c1,0x2c1,0x63f,0x2c1,0x642,0x642,0x2c1,0x2c1,0x2c1,0x636,0x63f,0x642,0x642,0x2c1, -0x642,0x2c1,0x63f,0x2c1,0x2d0,0x64b,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7, -0x2d0,0x2c7,0x2d0,0x2c7,0x2cd,0x648,0x2d0,0x64b,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x64b, -0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x651,0x648,0x2d0,0x2c7,0x2d0,0x64b, -0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x648,0x654,0x64e,0x2d0,0x2c7,0x2d0,0x2c7,0x648,0x2d0,0x2c7,0x2d0, -0x2c7,0x2d0,0x2c7,0x654,0x64e,0x651,0x648,0x2d0,0x64b,0x2d0,0x2c7,0x2d0,0x64b,0x657,0x651,0x648, -0x2d0,0x64b,0x2d0,0x2c7,0x2d0,0x2c7,0x651,0x648,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7, -0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x651,0x648,0x2d0,0x2c7,0x2d0,0x64b, -0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2c7,0x2d0,0x2d0,0x2c7,0x2d0, -0x2c7,0x2d0,0x2c7,0x2ca,0x2d3,0x2df,0x2df,0x2d3,0x2df,0x2d3,0x2df,0x2df,0x2d3,0x2df,0x2df,0x2df, -0x2d3,0x2d3,0x2df,0x2df,0x2df,0x2df,0x2d3,0x2df,0x2df,0x2d3,0x2df,0x2df,0x2df,0x2d3,0x2d3,0x2d3, -0x2df,0x2df,0x2d3,0x2df,0x2e2,0x2d6,0x2df,0x2d3,0x2df,0x2d3,0x2df,0x2df,0x2d3,0x2df,0x2d3,0x2d3, -0x2df,0x2d3,0x2df,0x2e2,0x2d6,0x2df,0x2df,0x2df,0x2d3,0x2df,0x2d3,0x2df,0x2df,0x2d3,0x2d3,0x2dc, -0x2df,0x2d3,0x2d3,0x2d3,0x2dc,0x2dc,0x2dc,0x2dc,0x2e5,0x2e5,0x2d9,0x2e5,0x2e5,0x2d9,0x2e5,0x2e5, -0x2d9,0x2e2,0x65a,0x2e2,0x65a,0x2e2,0x65a,0x2e2,0x65a,0x2e2,0x65a,0x2e2,0x65a,0x2e2,0x65a,0x2e2, -0x65a,0x2d3,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x2df,0x2d3,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6, -0x2e2,0x2d6,0x2e2,0x2d6,0x2d6,0x2e5,0x2e5,0x2d9,0x2e2,0x2d6,0x9c3,0x9c3,0x9c6,0x9c0,0x2e2,0x2d6, -0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6, -0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x2e2,0x2d6,0x9c6,0x9c0,0x9c6,0x9c0,0x9c3,0x9bd,0x9c6,0x9c0, -0xb88,0xc90,0x9c3,0x9bd,0x9c3,0x9bd,0x9c6,0x9c0,0x9c6,0x9c0,0x9c6,0x9c0,0x9c6,0x9c0,0x9c6,0x9c0, -0x9c6,0x9c0,0x9c6,0x9c0,0xc90,0xc90,0xc90,0xd8f,0xd8f,0xd8f,0xd92,0xd92,0xd8f,0xd92,0xd92,0xd8f, -0xd8f,0xd92,0xed3,0xed6,0xed6,0xed6,0xed6,0xed3,0xed6,0xed3,0xed6,0xed3,0xed6,0xed3,0xed6,0xed3, -0x2e8,0x65d,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8, -0x2e8,0x65d,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8, -0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8, -0x2eb,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x2e8, -0x2e8,0x2e8,0x2e8,0x2e8,0x2e8,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0xc93,0xc93,0x303,0x303,0x303,0x303, -0x303,0x303,0x303,0x303,0x303,0x2fa,0x2fa,0x2fa,0x2fa,0x2fa,0x2fa,0x2fa,0x2f7,0x2f7,0x2f4,0x2f4, -0x663,0x2f4,0x2fa,0x666,0x2fd,0x666,0x666,0x666,0x2fd,0x666,0x2fa,0x2fa,0x669,0x300,0x2f4,0x2f4, -0x2f4,0x2f4,0x2f4,0x2f4,0x660,0x660,0x660,0x660,0x2f1,0x660,0x2f4,0xb01,0x303,0x303,0x303,0x303, -0x303,0x2ee,0x2ee,0x2ee,0x2ee,0x2ee,0x9d2,0x9d2,0x9cf,0x9cc,0x9cf,0xc96,0xc96,0xc96,0xc96,0xc96, -0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0xc96,0x66c,0x66c,0x66c,0x66c, -0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c, -0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c, -0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c, -0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66c,0x66f,0x66f,0x927,0x66f, -0x66f,0x92a,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xb04,0xc45,0xd5c,0xd5c,0xd5c,0xd5c, -0xd5c,0xd5c,0xd5c,0xd5c,0xe9a,0xe9a,0xe9a,0xe9a,0xe9d,0xd5f,0xd5f,0xd5f,0x672,0x672,0xb07,0xc8d, -0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xc8d,0xf81,0xf7e,0xf81,0xf7e, -0x30f,0x318,0xf81,0xf7e,9,9,0x31e,0xed9,0xed9,0xed9,0x306,0x14d6,9,9,9,9, -0x31b,0x309,0x32d,0x30c,0x32d,0x32d,0x32d,9,0x32d,9,0x32d,0x32d,0x324,0x678,0x678,0x678, -0x678,0x678,0x678,0x678,0x678,0x678,0x678,0x678,0x678,0x678,0x678,0x678,0x678,0x678,9,0x678, -0x678,0x678,0x678,0x678,0x678,0x678,0x32d,0x32d,0x324,0x324,0x324,0x324,0x324,0x675,0x675,0x675, -0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x675,0x321,0x675, -0x675,0x675,0x675,0x675,0x675,0x675,0x324,0x324,0x324,0x324,0x324,0xf81,0x330,0x330,0x333,0x32d, -0x32d,0x330,0x327,0x9d5,0xb91,0xb8e,0x32a,0x9d5,0x32a,0x9d5,0x32a,0x9d5,0x32a,0x9d5,0x315,0x312, -0x315,0x312,0x315,0x312,0x315,0x312,0x315,0x312,0x315,0x312,0x315,0x312,0x330,0x330,0x327,0x321, -0xb40,0xb3d,0xb8b,0xc9c,0xc99,0xc9f,0xc9c,0xc99,0xd95,0xd98,0xd98,0xd98,0x9e4,0x684,0x33f,0x342, -0x33f,0x33f,0x33f,0x342,0x33f,0x33f,0x33f,0x33f,0x342,0x9e4,0x342,0x33f,0x681,0x681,0x681,0x681, -0x681,0x681,0x681,0x681,0x681,0x684,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681, -0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x681,0x67b,0x67b,0x67b,0x67b, -0x67b,0x67b,0x67b,0x67b,0x67b,0x67e,0x67b,0x67b,0x67b,0x67b,0x67b,0x67b,0x67b,0x67b,0x67b,0x67b, -0x67b,0x67b,0x67b,0x67b,0x67b,0x67b,0x67b,0x67b,0x9de,0x67e,0x339,0x33c,0x339,0x339,0x339,0x33c, -0x339,0x339,0x339,0x339,0x33c,0x9de,0x33c,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339, -0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x342,0x33c, -0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x336,0x933,0x936,0x918,0x918,0x112b, -0x9d8,0x9d8,0xb97,0xb94,0x9e1,0x9db,0x9e1,0x9db,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339, -0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339, -0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339, -0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x339,0x33f,0x342,0x33c,0x33f,0x339,0xb97,0xb94,0x33f, -0x339,0xb97,0xb94,0x33f,0x339,0xb97,0xb94,0xedc,0x342,0x33c,0x342,0x33c,0x33f,0x339,0x342,0x33c, -0x33f,0x339,0x342,0x33c,0x342,0x33c,0x342,0x33c,0x33f,0x339,0x342,0x33c,0x342,0x33c,0x342,0x33c, -0x33f,0x339,0x342,0x33c,0x9e4,0x9de,0x342,0x33c,0x342,0x33c,0x342,0x33c,0x342,0x33c,0xd9e,0xd9b, -0x342,0x33c,0xedf,0xedc,0xedf,0xedc,0xedf,0xedc,0xc06,0xc03,0xc06,0xc03,0xc06,0xc03,0xc06,0xc03, -0xc06,0xc03,0xc06,0xc03,0xc06,0xc03,0xc06,0xc03,0xf0c,0xf09,0xf0c,0xf09,0xfff,0xffc,0xfff,0xffc, -0xfff,0xffc,0xfff,0xffc,0xfff,0xffc,0xfff,0xffc,0xfff,0xffc,0xfff,0xffc,0x1164,0x1161,0x134a,0x1347, -0x150f,0x150c,0x150f,0x150c,0x150f,0x150c,0x150f,0x150c,0xc,0x354,0x354,0x354,0x354,0x354,0x354,0x354, -0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354,0x354, -0x354,0x354,0x354,0xc,0xc,0x357,0x345,0x348,0x348,0x34b,0x348,0x345,0x1917,0x34e,0x34e,0x34e, -0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e, -0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x34e,0x351, -0x1917,0x89d,0x9e7,0xc,0xc,0x14d9,0x14d9,0x13f5,0xf,0x957,0x957,0x957,0x957,0x957,0x957,0x957, -0x957,0x957,0x957,0x957,0x957,0x957,0x957,0x957,0x957,0x957,0xda1,0x957,0x957,0x957,0x957,0x957, -0x957,0x957,0x957,0x957,0x957,0x957,0x957,0x957,0x35a,0x35a,0x35a,0x35a,0x35a,0x35a,0x35a,0x35a, -0x35a,0x35a,0xee2,0x35a,0x35a,0x35a,0x366,0x35a,0x35d,0x35a,0x35a,0x369,0x95a,0xda4,0xda7,0xda4, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c, -0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c,0x36c, -0x36c,0x36c,0x36c,0xf,0xf,0xf,0xf,0x191a,0x36c,0x36c,0x36c,0x363,0x360,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xcb4,0xcb4,0xcb4,0xcb4,0x13f8,0x14dc,0xf8a,0xf8a, -0xf8a,0xf87,0xf87,0xdad,0x8a3,0xcae,0xcab,0xcab,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xf84,0xf84, -0xf84,0xf84,0xf84,0x8a0,0x14d3,0x12,0xdb0,0x8a6,0x1311,0x387,0x38a,0x38a,0x38a,0x38a,0x38a,0x387, -0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387, -0x387,0x387,0x387,0xf8d,0xf8d,0xf8d,0xf8d,0xf8d,0x8a9,0x387,0x387,0x387,0x387,0x387,0x387,0x387, -0x387,0x387,0x387,0x91e,0x91e,0x91e,0x91e,0x91e,0x91e,0x91e,0x91e,0xb37,0xb37,0xb37,0xca2,0xca8, -0xca5,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0x130e,0x939,0x939,0x939,0x939,0x939,0x939,0x939,0x939, -0x939,0x939,0x381,0x37e,0x37b,0x378,0xb9a,0xb9a,0x91b,0x387,0x387,0x393,0x387,0x38d,0x38d,0x38d, -0x38d,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387, -0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387, -0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387, -0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x9ed,0x9ed,0x387,0x387, -0x387,0x387,0x387,0x9ed,0x38a,0x387,0x38a,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387,0x387, -0x387,0x387,0x387,0x9ed,0x387,0x387,0x387,0x38a,0x93c,0x387,0x372,0x372,0x372,0x372,0x372,0x372, -0x372,0x36f,0x378,0x375,0x375,0x372,0x372,0x372,0x372,0x390,0x390,0x372,0x372,0x378,0x375,0x375, -0x375,0x372,0xcb1,0xcb1,0x384,0x384,0x384,0x384,0x384,0x384,0x384,0x384,0x384,0x384,0x9ed,0x9ed, -0x9ed,0x9ea,0x9ea,0xcb1,0xa02,0xa02,0xa02,0x9fc,0x9fc,0x9fc,0x9fc,0x9fc,0x9fc,0x9fc,0x9fc,0x9f9, -0x9fc,0x9f9,0x15,0xa05,0x9ff,0x9f0,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff, -0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff,0x9ff, -0x9ff,0xcb7,0xcb7,0xcb7,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6,0x9f6, -0x9f6,0x9f6,0x9f6,0x9f6,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x9f3,0x15, -0x15,0xcb7,0xcb7,0xcb7,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10, -0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10,0xe10, -0xe10,0xe10,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011,0x1011, -0x1011,0x1011,0x1011,0x1011,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b, -0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b, -0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa0b,0xa08,0xa08,0xa08,0xa08,0xa08,0xa08, -0xa08,0xa08,0xa08,0xa08,0xa08,0xb9d,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf24,0xf27,0xf27, -0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27, -0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf27,0xf1b, -0xf1b,0xf1b,0xf1b,0xf1b,0xf1b,0xf1b,0xf1b,0xf1b,0xf2a,0xf2a,0xf1e,0xf1e,0xf21,0xf30,0xf2d,0x10e, -0x10e,0x193e,0x1941,0x1941,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x25b, -0x25b,0x25b,0x25b,0x25b,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xb13,0xb13,0xb16,0xb16,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13,0xb13, -0x72,0x72,0x72,0x72,0x1593,0x1593,0x1593,0x1593,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2, -0x1c2,0x1c2,0x1c2,0x1590,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x164d,0x164d,0x164d,0x164d,0x164d, -0x164d,0x164d,0x164d,0x164d,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x1686,0x1686,0x1686, -0x1686,0x1686,0x1686,0x1686,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x177,0x177,0x177, -0x177,0x177,0x177,0x177,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f, -0x27f,0x27f,0x27f,0x27f,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7,0x1d7, -0x1d7,0x1d7,0x1d7,0x1d7,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x14af,0x14af,0x14af,0x14af,0x14af,0x14af,0x14af,0x14af,0x14af,0x14af,0x1bc,0x1bc, -0x1bc,0x1bc,0x1bc,0x1bc,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264, -0x264,0x264,0x264,0x264,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x18c0,0x18c3,0x18c3,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258, -0x258,0x258,0x258,0x258,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x198c,0x198c,0x198c,0x198c,0x198c,0x198c,0x198c,0x198c,0x198c,0x198c,0x26a,0x26a, -0x26a,0x26a,0x26a,0x26a,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1782,0x1782,0x1782,0x1782,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c, -0x21c,0x21c,0x21c,0x21c,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b, -0x127b,0x127b,0x127b,0x180,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1638,0x1ef,0x1ef, -0x1ef,0x1ef,0x163e,0x163e,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a, -0x158a,0x158a,0x158a,0x158a,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674, -0x1674,0x1674,0x1674,0x1674,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f, -0x24f,0x24f,0x24f,0x24f,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911, -0x1911,0x1911,0x1911,0x1911,0x16ec,0x16ec,0x16ec,0x16ec,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204, -0x204,0x204,0x204,0x204,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0xe07,0xe07,0xe04,0xe04,0xe04,0xe07,0xdb,0xdb,0xdb,0xdb,0xdb,0xdb, -0xdb,0xdb,0xdb,0xdb,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x22b,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a, -0x179a,0x179a,0x179a,0x179a,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x181b,0x181b,0x237,0x181b,0x181b,0x237,0x181b,0x181b,0x181b,0x181b,0x181b,0x237, -0x237,0x237,0x237,0x237,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x270,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1, -0x19a1,0x19a1,0x19a1,0x19a1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0x954,0x954,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e,0x196e, -0x196e,0x196e,0x261,0x261,3,3,3,3,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,0x954,0x954,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65, -0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,6,6,6,6,6,6,6,6, -6,6,6,6,6,6,6,6,0x14e2,0x3ab,0x3ba,0x3ba,0x1b,0x3c0,0x3c0,0x3c0, -0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x1b,0x1b,0x3c0,0x3c0,0x1b,0x1b,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0, -0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x1b,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0, -0x3c0,0x1b,0x3c0,0x1b,0x1b,0x1b,0x3c0,0x3c0,0x3c0,0x3c0,0x1b,0x1b,0x3ae,0xcbd,0x3ab,0x3ba, -0x3ba,0x3ab,0x3ab,0x3ab,0x3ab,0x1b,0x1b,0x3ba,0x3ba,0x1b,0x1b,0x3bd,0x3bd,0x3b1,0xdb6,0x1b, -0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x3ab,0x1b,0x1b,0x1b,0x1b,0x3c3,0x3c3,0x1b,0x3c3, -0x3c0,0x3c0,0x3ab,0x3ab,0x1b,0x1b,0x942,0x942,0x942,0x942,0x942,0x942,0x942,0x942,0x942,0x942, -0x3c0,0x3c0,0x3b7,0x3b7,0x3b4,0x3b4,0x3b4,0x3b4,0x3b4,0x3b7,0x3b4,0x113a,0x187b,0x1878,0x191d,0x1b, -0x1e,0xcc0,0x3c6,0xcc3,0x1e,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x1e,0x1e,0x1e,0x1e,0x3d2, -0x3d2,0x1e,0x1e,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2, -0x3d2,0x1e,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x1e,0x3d2,0x3d5,0x1e,0x3d2,0x3d5,0x1e, -0x3d2,0x3d2,0x1e,0x1e,0x3c9,0x1e,0x3cf,0x3cf,0x3cf,0x3c6,0x3c6,0x1e,0x1e,0x1e,0x1e,0x3c6, -0x3c6,0x1e,0x1e,0x3c6,0x3c6,0x3cc,0x1e,0x1e,0x1e,0xf96,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e, -0x1e,0x3d5,0x3d5,0x3d5,0x3d2,0x1e,0x3d5,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x945,0x945, -0x945,0x945,0x945,0x945,0x945,0x945,0x945,0x945,0x3c6,0x3c6,0x3d2,0x3d2,0x3d2,0xf96,0x1920,0x1e, -0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x21,0x3d8,0x3d8,0x3e1,0x21,0x3e4,0x3e4,0x3e4, -0x3e4,0x3e4,0x3e4,0x3e4,0xccc,0x3e4,0x21,0x3e4,0x3e4,0x3e4,0x21,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4, -0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x21,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4, -0x3e4,0x21,0x3e4,0x3e4,0x21,0x3e4,0x3e4,0x3e4,0x3e4,0x3e4,0x21,0x21,0x3db,0x3e4,0x3e1,0x3e1, -0x3e1,0x3d8,0x3d8,0x3d8,0x3d8,0x3d8,0x21,0x3d8,0x3d8,0x3e1,0x21,0x3e1,0x3e1,0x3de,0x21,0x21, -0x3e4,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21, -0x3e4,0xccc,0xcc6,0xcc6,0x21,0x21,0x948,0x948,0x948,0x948,0x948,0x948,0x948,0x948,0x948,0x948, -0x13fb,0xcc9,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x16fe,0x187e,0x187e,0x187e,0x1881,0x1881,0x1881, -0x24,0x3e7,0x3f6,0x3f6,0x24,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x24,0x24,0x3fc, -0x3fc,0x24,0x24,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc, -0x3fc,0x24,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x24,0x3fc,0x3fc,0x24,0xccf,0x3fc,0x3fc, -0x3fc,0x3fc,0x24,0x24,0x3ea,0x3fc,0x3e7,0x3e7,0x3f6,0x3e7,0x3e7,0x3e7,0xf99,0x24,0x24,0x3f6, -0x3f9,0x24,0x24,0x3f9,0x3f9,0x3ed,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x3e7,0x3e7, -0x24,0x24,0x24,0x24,0x3ff,0x3ff,0x24,0x3fc,0x3fc,0x3fc,0xf99,0xf99,0x24,0x24,0x3f3,0x3f3, -0x3f3,0x3f3,0x3f3,0x3f3,0x3f3,0x3f3,0x3f3,0x3f3,0x3f0,0xccf,0x131d,0x131d,0x131d,0x131d,0x131d,0x131d, -0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x27,0x27,0x402,0x40e,0x27,0x40e,0x40e,0x40e, -0x40e,0x40e,0x40e,0x27,0x27,0x27,0x40e,0x40e,0x40e,0x27,0x40e,0x40e,0x411,0x40e,0x27,0x27, -0x27,0x40e,0x40e,0x27,0x40e,0x27,0x40e,0x40e,0x27,0x27,0x27,0x40e,0x40e,0x27,0x27,0x27, -0x40e,0x40e,0x40e,0x27,0x27,0x27,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0xdb9,0x40e, -0x40e,0x40e,0x27,0x27,0x27,0x27,0x402,0x408,0x402,0x408,0x408,0x27,0x27,0x27,0x408,0x408, -0x408,0x27,0x40b,0x40b,0x40b,0x405,0x27,0x27,0xf9c,0x27,0x27,0x27,0x27,0x27,0x27,0x402, -0x27,0x27,0x27,0x27,0x27,0x27,0x27,0x27,0x27,0x27,0xed0,0x94e,0x94e,0x94e,0x94e,0x94e, -0x94e,0x94e,0x94e,0x94e,0x94b,0x94b,0x94b,0xd89,0xcd2,0xcd2,0xcd2,0xcd2,0xcd2,0xcd5,0xcd2,0x27, -0x27,0x27,0x27,0x27,0x14e5,0x420,0x420,0x420,0x1923,0x423,0x423,0x423,0x423,0x423,0x423,0x423, -0x423,0x2a,0x423,0x423,0x423,0x2a,0x423,0x423,0x423,0x423,0x423,0x423,0x423,0x423,0x423,0x423, -0x423,0x423,0x423,0x423,0x423,0x2a,0x423,0x423,0x423,0x423,0x423,0x423,0x423,0x423,0x423,0x423, -0x14e8,0x423,0x423,0x423,0x423,0x423,0x2a,0x2a,0x2a,0xfa5,0x414,0x414,0x414,0x420,0x420,0x420, -0x420,0x2a,0x414,0x414,0x417,0x2a,0x414,0x414,0x414,0x41a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a, -0x2a,0x414,0x414,0x2a,0xfa5,0xfa5,0x1701,0x2a,0x2a,0x2a,0x2a,0x2a,0x423,0x423,0xf9f,0xf9f, -0x2a,0x2a,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d,0x2a,0x2a,0x2a,0x2a, -0x2a,0x2a,0x2a,0x2a,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0x17be,0x14eb,0x42f,0x42f, -0x1926,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x2d,0x435,0x435,0x435,0x2d,0x435,0x435, -0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x2d,0x435,0x435, -0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x435,0x2d,0x435,0x435,0x435,0x435,0x435,0x2d,0x2d, -0xcd8,0xcdb,0x42f,0x426,0x432,0x42f,0x426,0x42f,0x42f,0x2d,0x426,0x432,0x432,0x2d,0x432,0x432, -0x426,0x429,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x426,0x426,0x2d,0x2d,0x2d,0x2d,0x2d, -0x2d,0x2d,0x435,0x2d,0x435,0x435,0xee8,0xee8,0x2d,0x2d,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c, -0x42c,0x42c,0x42c,0x42c,0x2d,0xeeb,0xeeb,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d, -0x2d,0x2d,0x2d,0x2d,0x1884,0x14ee,0x441,0x441,0x30,0x447,0x447,0x447,0x447,0x447,0x447,0x447, -0x447,0x30,0x447,0x447,0x447,0x30,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447, -0x447,0x447,0x447,0x447,0x441,0x438,0x438,0x438,0xfa8,0x30,0x441,0x441,0x441,0x30,0x444,0x444, -0x444,0x43b,0x1323,0x17c1,0x30,0x30,0x30,0x30,0x17c4,0x17c4,0x17c4,0x438,0x17c1,0x17c1,0x17c1,0x17c1, -0x17c1,0x17c1,0x17c1,0x1704,0x447,0x447,0xfa8,0xfa8,0x30,0x30,0x43e,0x43e,0x43e,0x43e,0x43e,0x43e, -0x43e,0x43e,0x43e,0x43e,0xfab,0xfab,0xfab,0xfab,0xfab,0xfab,0x17c1,0x17c1,0x17c1,0xfae,0xfb1,0xfb1, -0xfb1,0xfb1,0xfb1,0xfb1,0x33,0x33,0xa17,0xa17,0x33,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d, -0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0x33,0x33,0x33,0xa1d,0xa1d, -0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d, -0xa1d,0xa1d,0x33,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0x33,0xa1d,0x33,0x33, -0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0x33,0x33,0x33,0xa11,0x33,0x33,0x33,0x33,0xa0e, -0xa17,0xa17,0xa0e,0xa0e,0xa0e,0x33,0xa0e,0x33,0xa17,0xa17,0xa1a,0xa17,0xa1a,0xa1a,0xa1a,0xa0e, -0x33,0x33,0x33,0x33,0x33,0x33,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1,0x14f1, -0x33,0x33,0xa17,0xa17,0xa14,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33, -0x36,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462, -0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462,0x462, -0x462,0x44d,0x462,0x45f,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x453,0x36,0x36,0x36,0x36,0x44a, -0x468,0x468,0x468,0x468,0x468,0x462,0x465,0x450,0x450,0x450,0x450,0x450,0x450,0x44d,0x450,0x456, -0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x459,0x459,0x36,0x36,0x36,0x36, -0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36, -0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x39,0x477,0x477,0x39, -0x477,0x39,0x39,0x477,0x477,0x39,0x477,0x39,0x39,0x477,0x39,0x39,0x39,0x39,0x39,0x39, -0x477,0x477,0x477,0x477,0x39,0x477,0x477,0x477,0x477,0x477,0x477,0x477,0x39,0x477,0x477,0x477, -0x39,0x477,0x39,0x477,0x39,0x39,0x477,0x477,0x39,0x477,0x477,0x477,0x477,0x46b,0x477,0x474, -0x46b,0x46b,0x46b,0x46b,0x46b,0x46b,0x39,0x46b,0x46b,0x477,0x39,0x39,0x480,0x480,0x480,0x480, -0x480,0x39,0x47d,0x39,0x46e,0x46e,0x46e,0x46e,0x46e,0x46b,0x39,0x39,0x471,0x471,0x471,0x471, -0x471,0x471,0x471,0x471,0x471,0x471,0x39,0x39,0x47a,0x47a,0x13fe,0x13fe,0x39,0x39,0x39,0x39, -0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39, -0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x990,0x990,0x990,0x993, -0x990,0x990,0x990,0x990,0x3c,0x990,0x990,0x990,0x990,0x993,0x990,0x990,0x990,0x990,0x993,0x990, -0x990,0x990,0x990,0x993,0x990,0x990,0x990,0x990,0x993,0x990,0x990,0x990,0x990,0x990,0x990,0x990, -0x990,0x990,0x990,0x990,0x990,0x993,0xa2c,0xfbd,0xfbd,0x3c,0x3c,0x3c,0x3c,0x95d,0x95d,0x960, -0x95d,0x960,0x960,0x969,0x960,0x969,0x95d,0x95d,0x95d,0x95d,0x95d,0x98a,0x95d,0x960,0x963,0x963, -0x966,0x96f,0x963,0x963,0x990,0x990,0x990,0x990,0x132c,0x1326,0x1326,0x1326,0x95d,0x95d,0x95d,0x960, -0x95d,0x95d,0xa20,0x95d,0x3c,0x95d,0x95d,0x95d,0x95d,0x960,0x95d,0x95d,0x95d,0x95d,0x960,0x95d, -0x95d,0x95d,0x95d,0x960,0x95d,0x95d,0x95d,0x95d,0x960,0x95d,0xa20,0xa20,0xa20,0x95d,0x95d,0x95d, -0x95d,0x95d,0x95d,0x95d,0xa20,0x960,0xa20,0xa20,0xa20,0x3c,0xa29,0xa29,0xa26,0xa26,0xa26,0xa26, -0xa26,0xa26,0xa23,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0x3c,0xfb4,0xa26,0xdbc,0xdbc,0xfb7,0xfba, -0xfb4,0x113d,0x113d,0x113d,0x113d,0x1329,0x1329,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c, -0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c, -0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x486,0x486,0x486,0x486,0x486,0x486,0x3f,0x1404, -0x3f,0x3f,0x3f,0x3f,0x3f,0x1404,0x3f,0x3f,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483, -0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xdcb, -0xa56,0x42,0xa56,0xa56,0xa56,0xa56,0x42,0x42,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0x42, -0xa56,0x42,0xa56,0xa56,0xa56,0xa56,0x42,0x42,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xdcb, -0xa56,0x42,0xa56,0xa56,0xa56,0xa56,0x42,0x42,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xdcb,0xa56,0x42,0xa56,0xa56, -0xa56,0xa56,0x42,0x42,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0x42,0xa56,0x42,0xa56,0xa56, -0xa56,0xa56,0x42,0x42,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xdcb,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0x42,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0xdcb,0xa56,0x42,0xa56,0xa56,0xa56,0xa56,0x42,0x42,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0xdcb,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0x42,0x42,0x132f,0x132f,0xdc5,0xdc8,0xa50,0xa59,0xa4d, -0xa4d,0xa4d,0xa4d,0xa59,0xa59,0xa53,0xa53,0xa53,0xa53,0xa53,0xa53,0xa53,0xa53,0xa53,0xa4a,0xa4a, -0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0xa4a,0x42,0x42,0x42,0xa5c,0xa5c,0xa5c,0xa5c, -0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c, -0xa5c,0x170a,0x45,0x45,0x1707,0x1707,0x1707,0x1707,0x1707,0x1707,0x45,0x45,0xa6e,0xa71,0xa71,0xa71, -0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71, -0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71,0xa6b,0xa68,0x48,0x48,0x48,0xa77,0xa77,0xa77,0xa77, -0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa74,0xa74,0xa74,0xa77,0xa77,0xa77,0x14f4,0x14f4,0x14f4, -0x14f4,0x14f4,0x14f4,0x14f4,0x14f4,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0xa98,0xa98,0xa98,0xa98, -0xa98,0xa98,0xa7a,0xa98,0xa98,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa80,0xa7d, -0xa8f,0xa8f,0xa92,0xa9b,0xa89,0xa86,0xa8f,0xa8c,0xa9b,0xcde,0x4e,0x4e,0xa95,0xa95,0xa95,0xa95, -0xa95,0xa95,0xa95,0xa95,0xa95,0xa95,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xce1,0xce1,0xce1,0xce1, -0xce1,0xce1,0xce1,0xce1,0xce1,0xce1,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xaaa,0xaaa,0xb2e,0xb31, -0xab0,0xb2b,0xaad,0xaaa,0xab3,0xac2,0xab6,0xac5,0xac5,0xac5,0xaa1,0x51,0xab9,0xab9,0xab9,0xab9, -0xab9,0xab9,0xab9,0xab9,0xab9,0xab9,0x51,0x51,0x51,0x51,0x51,0x51,0xabc,0xabc,0xabc,0xabc, -0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc, -0xabc,0xabc,0xabc,0xabc,0x1929,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0xabc,0xabc,0xabc,0xabc, -0xabc,0xabc,0xabc,0xabc,0xabc,0xaa4,0xfde,0x51,0x51,0x51,0x51,0x51,0x1194,0x1194,0x1194,0x1194, -0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x4a4,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x4a4,0x54,0x54,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x54,0x54,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x4a4,0x4a4,0x4a4,0x54,0x4a7,0x54,0x4a7,0x54,0x4a7,0x54,0x4a7,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x4a4,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x54,0x54,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x4a4,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x54,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x49e,0x4a4,0x49e,0x49e,0x49b,0x4a4,0x4a4, -0x4a4,0x54,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x49b,0x49b,0x49b,0x4a4,0x4a4,0x4a4,0x4a4, -0x54,0x54,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x54,0x49b,0x49b,0x49b,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a4,0x4a4,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x49b,0x49b,0x49b,0x54,0x54,0x4a4,0x4a4, -0x4a4,0x54,0x4a4,0x4a4,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a1,0x49e,0x54,0xba3,0xba6,0xba6,0xba6, -0xfe7,0x57,0x14d0,0x14d0,0x14d0,0x14d0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4fb,0xbb8,0x5a,0x5a, -0x6ba,0x4fb,0x4fb,0x4fb,0x4fb,0x4fb,0x501,0x513,0x501,0x50d,0x507,0x6bd,0x4f8,0x6b7,0x6b7,0x6b7, -0x6b7,0x4f8,0x4f8,0x4f8,0x4f8,0x4f8,0x4fe,0x510,0x4fe,0x50a,0x504,0x5a,0xdd4,0xdd4,0xdd4,0xdd4, -0xdd4,0x1332,0x1332,0x1332,0x1332,0x1332,0x1332,0x1332,0x1332,0x5a,0x5a,0x5a,0x5d,0x5d,0x5d,0x5d, -0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x522,0x522,0x522,0x522, -0x522,0x522,0x522,0x522,0x522,0x522,0x522,0x522,0x522,0x51f,0x51f,0x51f,0x51f,0x522,0xad7,0xada, -0xbbe,0xbc4,0xbc4,0xbc1,0xbc1,0xbc1,0xbc1,0xdda,0xeee,0xeee,0xeee,0xeee,0x1128,0x60,0x60,0x60, -0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x552,0x552,0x552,0xae3, -0xef7,0xfed,0xfed,0xfed,0xfed,0x1287,0x1710,0x1710,0x63,0x63,0x63,0x63,0x6e4,0x6e4,0x6e4,0x6e4, -0x6e7,0x6e7,0x6e7,0x6e7,0x6e7,0x6e7,0x55e,0x55e,0x55b,0x55b,0x55b,0x55b,0x585,0x585,0x585,0x585, -0x585,0xaef,0xaef,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66, -0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x588,0x588,0x588,0x588, -0x588,0x588,0x588,0x588,0x588,0x588,0x588,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69, -0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0x6c,0xb0a,0xb0a,0xb0a,0xb0a,0xb0d,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0d, -0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x75,0x81c,0x816,0x81c, -0x816,0x81c,0x816,0x81c,0x816,0x81c,0x816,0x816,0x819,0x816,0x819,0x816,0x819,0x816,0x819,0x816, -0x819,0x816,0x819,0x816,0x819,0x816,0x819,0x816,0x819,0x816,0x819,0x816,0x816,0x816,0x816,0x81c, -0x816,0x81c,0x816,0x81c,0x816,0x816,0x816,0x816,0x816,0x816,0x81c,0x816,0x816,0x816,0x816,0x816, -0x819,0xc6c,0xc6c,0x75,0x75,0x930,0x930,0x8fa,0x8fa,0x81f,0x822,0xc69,0x78,0x78,0x78,0x78, -0x78,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834, -0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x1116,0x18f3,0x19d7, -0x7b,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837, -0x837,0x837,0x837,0x7b,0x903,0x903,0x906,0x906,0x906,0x906,0x906,0x906,0x906,0x906,0x906,0x906, -0x906,0x906,0x906,0x906,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c, -0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0x13c5,0x13c5,0x13c5,0x7e, -0x7e,0x7e,0x7e,0x7e,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840, -0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840,0x840, -0x840,0xd71,0xd71,0x81,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846, -0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846, -0x846,0x846,0x846,0x81,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb22,0x84,0x84,0x84,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xb28,0xc75,0xb28,0xb28,0xb28,0xc75,0xb28,0x87,0x87,0x87,0x87,0x87, -0x87,0x87,0x87,0x87,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb, -0x11bb,0x11bb,0x11bb,0x11bb,0x9b4,0x9b4,0x9b4,0x9b4,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a, -0x8a,0x8a,0x8a,0x8a,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230, -0x1230,0x1230,0x1230,0x1230,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x5e5,0x8d,0x8d,0x8d,0x8d,0x8d, -0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x5d3,0x5d3,0x5d3,0x5d3,0x5d3,0x8d,0x8d,0x8d,0x8d, -0x8d,0xafb,0x5d6,0x5dc,0x5e2,0x5e2,0x5e2,0x5e2,0x5e2,0x5e2,0x5e2,0x5e2,0x5e2,0x5d9,0x5dc,0x5dc, -0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x8d,0x5dc,0x5dc,0x5dc,0x5dc, -0x5dc,0x8d,0x5dc,0x8d,0x5dc,0x5dc,0x8d,0x5dc,0x5dc,0x8d,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc, -0x5dc,0x5dc,0x5dc,0x5df,0x5f7,0x5f1,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1, -0x5f4,0x5fa,0x5f7,0x5f1,0x1344,0x1344,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90, -0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f7,0x5f1,0x5f7, -0x5f1,0x5f7,0x5f7,0x5f1,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90, -0x90,0x90,0x90,0x90,0x5f4,0x5f1,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f1,0x5f4,0x5f1,0x5f1, -0x5f4,0x5f4,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f4,0x5f1,0x5f1,0x5f4,0x5f1,0x5f4,0x5f4,0x5f4,0x5f1, -0x5f4,0x5f4,0x5f4,0x5f4,0x90,0x90,0x5f4,0x5f4,0x5f4,0x5f4,0x5f1,0x5f1,0x5f4,0x5f1,0x5f1,0x5f1, -0x5f1,0x5f4,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f4,0x5f4,0x5f4,0x5f1,0x5f1,0x90,0x90,0x90,0x90, -0x90,0x90,0x90,0x90,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43, -0xb43,0xb43,0xb43,0xb43,0x5f7,0x5f7,0x951,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5ee,0x5ee, -0xbfd,0xd8c,0x90,0x90,0x858,0x86a,0x867,0x86a,0x867,0xc8a,0xc8a,0xd7d,0xd7a,0x85b,0x85b,0x85b, -0x85b,0x86d,0x86d,0x86d,0x885,0x888,0x897,0x93,0x88b,0x88e,0x89a,0x89a,0x882,0x879,0x873,0x879, -0x873,0x879,0x873,0x876,0x876,0x891,0x891,0x894,0x891,0x891,0x891,0x93,0x891,0x87f,0x87c,0x876, -0x93,0x93,0x93,0x93,0x603,0x60f,0x603,0xc00,0x603,0x96,0x603,0x60f,0x603,0x60f,0x603,0x60f, -0x603,0x60f,0x603,0x60f,0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609, -0x60f,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x60c, -0x606,0x96,0x96,0x600,0x756,0x759,0x76e,0x771,0x750,0x759,0x759,0x9c,0x738,0x73b,0x73b,0x73b, -0x73b,0x738,0x738,0x9c,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0x99,0xafe,0xafe,0xafe, -0x9b7,0x732,0x612,0x612,0x9c,0x780,0x75f,0x750,0x759,0x756,0x750,0x762,0x753,0x74d,0x750,0x76e, -0x765,0x75c,0x77d,0x750,0x77a,0x77a,0x77a,0x77a,0x77a,0x77a,0x77a,0x77a,0x77a,0x77a,0x76b,0x768, -0x76e,0x76e,0x76e,0x780,0x741,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e, -0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e, -0x73e,0x73e,0x73e,0x9c,0x9c,0x9c,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x9c,0x9c,0x73e,0x73e, -0x73e,0x73e,0x73e,0x73e,0x9c,0x9c,0x73e,0x73e,0x73e,0x73e,0x73e,0x73e,0x9c,0x9c,0x73e,0x73e, -0x73e,0x9c,0x9c,0x9c,0xb46,0xb46,0xb46,0xb46,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f,0x9f, -0x9f,0x1890,0x1890,0x1890,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, -0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xa2,0xa2,0xa2,0xa2,0xa2,0x165c,0x165c,0x165c,0x165c, -0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0xb55,0xb55,0xb55,0xb55, -0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55, -0xb55,0xb55,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xb61,0xb61,0xb61,0xb61, -0xb61,0xb61,0xb61,0xa8,0xa8,0xff9,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61, -0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0x1716,0x1716,0x1716,0x1716, -0x1716,0x1716,0x1716,0x1716,0x1716,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8, -0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xa8,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xab,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb79,0xb79,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb79,0xab,0xb79,0xb79,0xab,0xab,0xb79,0xab, -0xab,0xb79,0xb79,0xab,0xab,0xb79,0xb79,0xb79,0xb79,0xab,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb76,0xb76,0xb76,0xb76,0xab,0xb76,0xab,0xb76,0xb76,0xb76,0xb76,0xd02,0xb76,0xb76, -0xab,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb76,0xb76,0xb76,0xb76, -0xb79,0xb79,0xab,0xb79,0xb79,0xb79,0xb79,0xab,0xab,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xab,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xab,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb79,0xb79,0xab,0xb79,0xb79,0xb79,0xb79,0xab,0xb79,0xb79,0xb79,0xb79, -0xb79,0xab,0xb79,0xab,0xab,0xab,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xab,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xdef,0xdef,0xab,0xab, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb76,0xb76,0xb76,0xb70,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xf06,0xf03,0xab,0xab,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73, -0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xae,0xb7f,0xae,0xae,0xae,0xae,0xae,0xae, -0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae, -0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xae,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xc0f, -0xc0f,0xc0f,0xc0f,0xc0f,0xc0f,0xb1,0xc0f,0xc0f,0xc0f,0xc0f,0xc09,0xc09,0xc0c,0xb1,0xb1,0xb1, -0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18, -0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc12,0xc12,0xc15,0xc7e,0xc7e,0xb4, -0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e, -0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1e,0xc1b,0xc1b,0xb7,0xb7,0xb7,0xb7, -0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24,0xc24, -0xc24,0xc24,0xc24,0xc24,0xc24,0xba,0xc24,0xc24,0xc24,0xba,0xc21,0xc21,0xba,0xba,0xba,0xba, -0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xba,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14, -0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14,0xd14, -0xd14,0xd14,0xd14,0xd14,0xd14,0x1512,0x1512,0xbd,0xd05,0xd05,0xd05,0xd11,0xd11,0xd11,0xd11,0xd05, -0xd05,0xd11,0xd11,0xd11,0xbd,0xbd,0xbd,0xbd,0xd11,0xd11,0xd05,0xd11,0xd11,0xd11,0xd11,0xd11, -0xd11,0xd08,0xd08,0xd08,0xbd,0xbd,0xbd,0xbd,0xd0b,0xbd,0xbd,0xbd,0xd17,0xd17,0xd0e,0xd0e, -0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd0e,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a, -0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xc0,0xc0,0xd1a,0xd1a,0xd1a,0xd1a, -0xd1a,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0xc3,0xc3,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0xc3,0xc3,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0xc3,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1893,0x192c, -0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x1719,0x1719,0x1719,0x1719, -0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0xc3, -0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xc6,0xd41,0xd41,0xd41, -0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41, -0xd41,0xd41,0xd41,0xc6,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41, -0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xc6,0xd41,0xd41,0xc6,0xd41,0xd41,0xd41,0xd41,0xd41, -0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xc6,0xc6,0xd41,0xd41,0xd41,0xd41, -0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6, -0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6, -0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xc6,0xd44,0xd44,0xd44,0xd44, -0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, -0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xc9,0xc9,0xc9,0xc9,0xc9,0xd86,0xd86,0xd86,0xcc, -0xcc,0xcc,0xcc,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80, -0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xcc,0xcc,0xcc,0xd83, -0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a, -0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a, -0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xcf,0xd47,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, -0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, -0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd2,0xd2,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50,0xd50, -0xd50,0xd50,0xd2,0xd2,0xd2,0xd2,0xd2,0xd2,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851, -0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd5,0xd5, -0xd56,0xd5,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56, -0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd56,0xd5,0xd56,0xd56,0xd5,0xd5,0xd5, -0xd56,0xd5,0xd5,0xd56,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59, -0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd8,0xd8,0xd8,0xd8,0xd8, -0xd8,0xd8,0xd8,0xd8,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0x1518, -0x1518,0x17ca,0x17ca,0xde,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5,0x10f5, -0x135,0x135,0x135,0x135,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c, -0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe1c,0xe13,0xe13,0xe19,0xe19,0xe13, -0xe1,0xe1,0xe16,0xe16,0x1125,0x1125,0x1125,0x1125,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4,0xe4, -0xe4,0xe4,0xe4,0xe4,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b,0xc7b, -0xc7b,0xc7b,0xc7b,0xc7b,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x151b,0x151b,0x151b,0x151b,0x151b, -0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151e,0x1896,0x1896,0x1896,0x1896,0xe7,0x17cd, -0x1350,0x1167,0xf15,0xf15,0xe2e,0xe2b,0xe2e,0xe2b,0xe2b,0xe22,0xe22,0xe22,0xe22,0xe22,0xe22,0x1170, -0x116d,0x1170,0x116d,0x116a,0x116a,0x116a,0x140d,0x140a,0xea,0xea,0xea,0xea,0xea,0xe28,0xe25,0xe25, -0xe25,0xe22,0xe28,0xe25,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31, -0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xed,0xed,0xed,0xed,0xed, -0xed,0xed,0xed,0xed,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xed,0xe31,0xe31,0xe31,0xe31, -0xe31,0xe31,0xe31,0xed,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xed,0xe31,0xe31,0xe31,0xe31, -0xe31,0xe31,0xe31,0xed,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37, -0xe37,0xe37,0xe37,0xe37,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xf0,0xf0, -0xf0,0xf0,0xf0,0xf0,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xf3,0x1410,0xf3,0xf3,0xf3,0xf3, -0xf3,0x1410,0xf3,0xf3,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94, -0xe94,0xe94,0xe94,0xe94,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40, -0xe40,0xe40,0xe40,0xf6,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d, -0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d,0xe3d, -0xe3d,0xe3d,0xe3d,0xf6,0xe52,0xe46,0xe46,0xe46,0xf9,0xe46,0xe46,0xf9,0xf9,0xf9,0xf9,0xf9, -0xe46,0xe46,0xe46,0xe46,0xe52,0xe52,0xe52,0xe52,0xf9,0xe52,0xe52,0xe52,0xf9,0xe52,0xe52,0xe52, -0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52,0xe52, -0xe52,0xe52,0xe52,0xe52,0x1932,0x1932,0xf9,0xf9,0xe43,0xe43,0xe43,0xf9,0xf9,0xf9,0xf9,0xe49, -0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0xe4c,0x192f,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9, -0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe4f,0xe55,0xe55,0xe4c,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9, -0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0x1176,0x1176,0xfc,0xfc,0xfc,0xfc, -0xe61,0xe61,0xe61,0xe61,0xe61,0xe64,0xe64,0xe64,0xe61,0xe61,0xe64,0xe61,0xe61,0xe61,0xe61,0xe61, -0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xe5e,0xe5e,0xe5e,0xe5e, -0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0x1173,0xfc,0xfc,0xfc,0xe5b,0xe5b,0xe6a,0xe6a,0xe6a,0xe6a, -0xff,0xff,0xff,0xff,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe67,0xe6a,0xe6a,0xe6a, -0xe6a,0xe6a,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x1527,0x152d,0x152a,0x1875, -0x17d0,0x1899,0x1899,0x1899,0x1899,0x1899,0x1938,0x1935,0x193b,0x1935,0x193b,0x102,0x102,0x102,0x102,0x102, -0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102, -0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0x102,0xe91,0xe91,0xe91,0xe8e, -0xe8e,0xe85,0xe85,0xe8e,0xe8b,0xe8b,0xe8b,0xe8b,0x105,0x105,0x105,0x105,0x12ed,0x12ed,0x12ed,0x12f0, -0x12f0,0x12f0,0x12e7,0x12e7,0x12ea,0x12e7,0x159,0x159,0x159,0x159,0x159,0x159,0xe94,0xe94,0xe94,0xe94, -0xe94,0xe94,0x141c,0x141c,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0xe97,0x1356,0x108,0x108,0x108, -0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x108,0x1353,0xc4b,0xc4b,0xc4b,0xc4b, -0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4e,0xec4,0xeb5,0xeaf,0xec1, -0xebe,0xeb8,0xeb8,0xec7,0xeb2,0xebb,0x10b,0x10b,0x10b,0x10b,0x10b,0x10b,0xf48,0xf48,0xf33,0xf48, -0xf4b,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0x111,0x111,0x111,0x111,0xf42,0xf42,0xf42,0xf42, -0xf42,0xf42,0xf42,0xf42,0xf42,0xf42,0xf54,0xf54,0xf39,0xf3f,0xf54,0xf54,0xf3c,0xf39,0xf39,0xf39, -0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36,0xf36, -0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0x111,0x111,0x111,0x135c,0x1359,0x135c,0x1359, -0x135c,0x1359,0x135c,0x1359,0x135c,0x1359,0x1422,0x1539,0x1539,0x1539,0x17d3,0x1944,0x1539,0x1539,0x1722,0x1722, -0x1722,0x171c,0x1722,0x171c,0x1947,0x1944,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114, -0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114, -0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x1536,0x1425,0x1425,0x1359,0x1050, -0x1050,0x1050,0x1050,0x1050,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63, -0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf60,0xf60,0xf66,0xf66,0x117,0x117,0x117,0x117, -0x117,0x117,0x117,0x117,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f, -0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf6f,0xf69,0xf69,0xf69,0xf69,0x117f,0x117f, -0x11a,0x11a,0x11a,0xf6c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x1725,0x11d,0x11d, -0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d, -0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d, -0xf78,0xf78,0xf78,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x120, -0xf75,0xf75,0xf75,0xf75,0x153f,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x120, -0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b,0xf7b, -0xf7b,0xf7b,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x194a,0x123,0x123,0x123,0x123,0x123,0x123,0x123, -0x1077,0x1077,0x1077,0x1077,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1074,0x1065,0x1065,0x1065,0x1065, -0x1065,0x1065,0x1065,0x1065,0x1074,0x1074,0x106b,0x1068,0x126,0x126,0x126,0x107a,0x107a,0x106e,0x106e,0x106e, -0x1071,0x1071,0x1071,0x1071,0x1071,0x1071,0x1071,0x1071,0x1071,0x1071,0x126,0x126,0x126,0x1077,0x1077,0x1077, -0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080, -0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1092,0x1095,0x1095,0x129,0x129,0x129,0x129, -0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129,0x129, -0x10bc,0x10bc,0x10bc,0x10bc,0x10b6,0x17d6,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x10c2,0x10c2, -0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c, -0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4,0x10d4, -0x10d4,0x10d4,0x10da,0x10dd,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x10d7, -0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10ec, -0x10ec,0x10e3,0x10e3,0x10ec,0x10ec,0x10e3,0x10e3,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132, -0x10ef,0x10ef,0x10ef,0x10e3,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10e3,0x10ec,0x132,0x132, -0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x132,0x132,0x10e6,0x10f2,0x10f2,0x10f2, -0x154e,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135, -0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135,0x135, -0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8, -0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10fb,0x138,0x138, -0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe, -0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x10fe,0x13b,0x13b,0x13b, -0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101, -0x1101,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e,0x13e, -0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107, -0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x141,0x141,0x141,0x141,0x141,0x1104, -0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x144,0x144,0x144,0x144, -0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d, -0x110d,0x110d,0x110d,0x110d,0x147,0x147,0x147,0x147,0x147,0x147,0x147,0x147,0x147,0x147,0x147,0x147, -0x1185,0x1185,0x1185,0x1185,0x118e,0x1185,0x1185,0x1185,0x118e,0x1185,0x1185,0x1185,0x1185,0x1182,0x14a,0x14a, -0x118b,0x118b,0x118b,0x118b,0x118b,0x118b,0x118b,0x1191,0x118b,0x1191,0x118b,0x118b,0x118b,0x1191,0x1191,0x14a, -0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194, -0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d, -0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af, -0x11af,0x11af,0x11af,0x11af,0x11af,0x11ac,0x1197,0x11ac,0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x150, -0x11a0,0x11a9,0x1197,0x11a9,0x11a9,0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x1197,0x11ac,0x11ac,0x11ac, -0x11ac,0x11ac,0x11ac,0x1197,0x1197,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x150,0x150,0x119a, -0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x150,0x150,0x150,0x150,0x150,0x150, -0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x11a6,0x150,0x150,0x150,0x150,0x150,0x150, -0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11a3,0x11b2,0x11b5,0x11b5,0x11b5,0x11b5,0x11a3,0x11a3,0x150,0x150, -0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1599,0x1596,0x1c8, -0x1302,0x12e1,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12d8,0x12e4,0x12d8,0x12d8,0x12ff,0x12d8,0x12d8, -0x12d8,0x12d8,0x12de,0x14c4,0x14ca,0x14c7,0x14c7,0x1914,0x16ef,0x16ef,0x153,0x153,0x153,0x153,0x153,0x153, -0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca, -0x11c1,0x11c1,0x11c4,0x11cd,0x11c7,0x11c7,0x11c7,0x11cd,0x156,0x156,0x156,0x156,0x156,0x156,0x156,0x156, -0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6, -0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x15c,0x15c,0x15c, -0x11eb,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11e2,0x11f1,0x11f1,0x11df,0x11df,0x11df,0x11df,0x15f,0x12f3, -0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x11e5,0x15f,0x15f,0x15f,0x15f,0x11df,0x11df, -0x120f,0x1203,0x120f,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162, -0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x162,0x120c,0x120c,0x1212,0x1206,0x1209, -0x1227,0x1227,0x1227,0x1221,0x1221,0x1218,0x1221,0x1221,0x1218,0x1221,0x1221,0x122a,0x1224,0x121b,0x165,0x165, -0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x165,0x165,0x165,0x165,0x165,0x165, -0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x168,0x168,0x168,0x168,0x122d,0x122d,0x122d,0x122d,0x122d, -0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d, -0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x122d,0x168,0x168,0x168,0x168,0x1239,0x1239,0x1239,0x1239, -0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239,0x1239, -0x1239,0x1239,0x16b,0x1236,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1233,0x1248,0x1248,0x1248,0x1248, -0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248, -0x1248,0x1248,0x16e,0x16e,0x16e,0x1242,0x1245,0x1245,0x1245,0x1245,0x1245,0x1245,0x124e,0x124e,0x124e,0x124e, -0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e,0x124e, -0x124e,0x124e,0x171,0x171,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x1254,0x1254,0x1254,0x1254, -0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x174, -0x174,0x174,0x174,0x174,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x125a,0x125a,0x125a,0x125a, -0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a, -0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x125a,0x17a,0x1275,0x1275,0x17d,0x17d, -0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x17d,0x1953,0x17d,0x17d,0x14a3,0x14a3,0x14a3,0x14a3, -0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x12a2,0x12a2,0x12a2,0x12a2, -0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x155a,0x155a,0x183,0x183,0x183,0x12a2,0x12a2,0x12a2,0x12a2, -0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x13ad,0x13ad,0x13ad,0x13ad, -0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x1434,0x1434,0x183,0x183,0x183,0x183,0x13b3,0x13b3,0x13ad,0x13ad, -0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x12ab,0x13ad,0x12ab,0x12ab,0x13ad,0x13b3,0x12b1,0x1854,0x1854,0x1854,0x1854, -0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x183,0x183,0x183,0x183,0x183,0x183,0x183, -0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183, -0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1365,0x1365, -0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365, -0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x1365,0x12d2,0x13ce,0x13cb,0x186,0x186,0x186,0x186,0x186, -0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc, -0x12cc,0x12cc,0x12cf,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc,0x12cc, -0x12cc,0x12cc,0x12cc,0x12cf,0x12cc,0x12cc,0x13ce,0x13ce,0x13ce,0x13ce,0x13ce,0x13cb,0x13ce,0x13ce,0x13ce,0x1857, -0x186,0x186,0x186,0x186,0x12c9,0x12c9,0x12c9,0x12c9,0x12c9,0x12c9,0x12c9,0x12c9,0x12c9,0x186,0x186,0x186, -0x186,0x186,0x186,0x186,0x13f2,0x13f2,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186, -0x186,0x186,0x186,0x186,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x186,0x186,0x186,0x186,0x186,0x186, -0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186, -0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x186,0x136e,0x136e,0x136e,0x136e, -0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e,0x136e, -0x136e,0x136e,0x136e,0x136e,0x136e,0x1368,0x1368,0x1368,0x189,0x189,0x136b,0x189,0x1380,0x1380,0x1380,0x1380, -0x1380,0x1380,0x1371,0x137a,0x1374,0x1374,0x137a,0x137a,0x137a,0x1374,0x137a,0x1374,0x1374,0x1374,0x137d,0x137d, -0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x18c,0x1377,0x1377,0x1377,0x1377,0x18f,0x1383,0x1383,0x1383, -0x1383,0x1383,0x1383,0x18f,0x18f,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x18f,0x18f,0x1383,0x1383,0x1383, -0x1383,0x1383,0x1383,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x1383,0x1383,0x1383,0x1383, -0x1383,0x1383,0x1383,0x18f,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x1383,0x18f,0x15f6,0x15f6,0x15f6,0x15f6, -0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x1386,0x1386,0x1386,0x1386, -0x1386,0x1386,0x1389,0x139b,0x139b,0x138f,0x138f,0x138f,0x138f,0x138f,0x192,0x192,0x192,0x192,0x138c,0x138c, -0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x1392,0x1392, -0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x1392,0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x192, -0x192,0x192,0x192,0x192,0x192,0x192,0x192,0x155d,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e, -0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e, -0x139e,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x198,0x198,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x1560,0x198,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13d7,0x198,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1, -0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x13a1,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560, -0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x1560,0x198,0x198, -0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x13ec,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x1575,0x1575, -0x1575,0x1575,0x1575,0x1578,0x16e6,0x1578,0x1578,0x1578,0x17b2,0x1860,0x1860,0x189c,0x189c,0x19b,0x19b,0x19b, -0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x1578,0x1578,0x1578,0x1578,0x1578,0x1578,0x1575,0x1575, -0x1575,0x1578,0x1575,0x16e3,0x16e3,0x19b,0x19b,0x19b,0x1578,0x1575,0x1575,0x1578,0x1860,0x1860,0x1860,0x18ff, -0x18ff,0x19dd,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4, -0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x19e,0x19e,0x19e,0x19e, -0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x1440,0x157e,0x1440,0x1440,0x1440,0x1440,0x1440,0x1440, -0x1440,0x1440,0x1440,0x1440,0x1440,0x157e,0x157e,0x157e,0x157e,0x157e,0x157e,0x1737,0x1737,0x1a1,0x17e2,0x17e2, -0x17e2,0x17e2,0x17e2,0x17e2,0x17e2,0x17e2,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1, -0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1959,0x17df,0x17df,0x17df,0x17df, -0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x1446,0x1446,0x1446,0x1446,0x1a4,0x1446,0x1446,0x1446, -0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446, -0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1a4,0x1446,0x1446,0x1a4,0x1446,0x1a4,0x1a4,0x1446, -0x1a4,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1a4,0x1446,0x1446,0x1446,0x1446, -0x1a4,0x1446,0x1a4,0x1446,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1446,0x1a4,0x1a4,0x1a4,0x1a4,0x1446, -0x1a4,0x1446,0x1a4,0x1446,0x1a4,0x1446,0x1446,0x1446,0x1a4,0x1446,0x1446,0x1a4,0x1446,0x1a4,0x1a4,0x1446, -0x1a4,0x1446,0x1a4,0x1446,0x1a4,0x1446,0x1a4,0x1446,0x1a4,0x1446,0x1446,0x1a4,0x1446,0x1a4,0x1a4,0x1446, -0x1446,0x1446,0x1446,0x1a4,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1a4,0x1446,0x1446,0x1446,0x1446, -0x1a4,0x1446,0x1446,0x1446,0x1446,0x1a4,0x1446,0x1a4,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446, -0x1446,0x1446,0x1a4,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446, -0x1446,0x1446,0x1446,0x1446,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1446,0x1446,0x1446,0x1a4,0x1446,0x1446,0x1446, -0x1446,0x1446,0x1a4,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446,0x1446, -0x1446,0x1446,0x1446,0x1446,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4, -0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4, -0x1a4,0x1a4,0x1a4,0x1a4,0x1443,0x1443,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4, -0x1a4,0x1a4,0x1a4,0x1a4,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x1449,0x1449,0x1449,0x1449,0x1449, -0x1458,0x1449,0x144c,0x144c,0x1449,0x1449,0x1449,0x144f,0x144f,0x1a7,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455, -0x1455,0x1455,0x1455,0x1455,0x1452,0x145e,0x145e,0x145e,0x195f,0x195c,0x195c,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7, -0x1a7,0x1a7,0x1a7,0x1a7,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608, -0x1608,0x1608,0x1608,0x1608,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x146a,0x1467, -0x1461,0x1461,0x1467,0x1467,0x1470,0x1470,0x146a,0x146d,0x146d,0x1467,0x1464,0x1aa,0x1aa,0x1aa,0x1aa,0x1aa, -0x1aa,0x1aa,0x1aa,0x1aa,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473, -0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1473,0x1ad,0x1ad,0x1ad,0x1ad, -0x173a,0x173a,0x1473,0x1473,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a, -0x173a,0x173a,0x173a,0x173a,0x1ad,0x1ad,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a, -0x173a,0x173a,0x173a,0x173a,0x147f,0x147f,0x147f,0x147f,0x147f,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0, -0x1b0,0x1b0,0x1b0,0x1b0,0x147f,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c, -0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c, -0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0, -0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1479,0x1479,0x1479,0x1479,0x1482,0x1482,0x1482,0x1482,0x1482, -0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1494,0x1497,0x149a,0x149a,0x1497,0x149d,0x149d,0x1488, -0x148b,0x173d,0x1740,0x1740,0x1740,0x1584,0x1b3,0x1b3,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e,0x148e, -0x148e,0x148e,0x1581,0x1746,0x1749,0x1743,0x174c,0x174c,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3, -0x14a3,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x14a0,0x14a0,0x14a0,0x14a0,0x14a0,0x14a0,0x14a0,0x14a0, -0x14a0,0x14a0,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6, -0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x12fc,0x12f9,0x12fc,0x12db,0x12f9,0x12ff,0x12ff,0x1302, -0x12ff,0x1302,0x1305,0x12f9,0x1302,0x1302,0x12f9,0x12f9,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8, -0x14b8,0x14b8,0x14b8,0x14a9,0x14b2,0x14a9,0x14b2,0x14b2,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14b5,0x14ac, -0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x158a, -0x158a,0x158a,0x158a,0x158a,0x158a,0x158a,0x1bf,0x1bf,0x1587,0x1587,0x1587,0x1587,0x1587,0x158d,0x1bf,0x1bf, -0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x1bf,0x16f2,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9, -0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9,0x16e9, -0x16e9,0x16e9,0x16e9,0x16e9,0x1c5,0x1c5,0x1c5,0x1c5,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, -0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8, -0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x1c8,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5, -0x15a5,0x15a5,0x15a5,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5, -0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x15a5,0x15a5,0x15a5,0x15a5, -0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x1cb,0x1cb,0x15a2,0x159c,0x159f,0x15a8,0x15ab,0x15ab,0x15ab,0x15ab, -0x15ab,0x15ab,0x15ab,0x15ab,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1593,0x1593,0x1593,0x1593, -0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x15ae,0x15ae,0x15ae,0x15ae, -0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae, -0x15ae,0x1962,0x1962,0x1962,0x1962,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1, -0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1, -0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x174f,0x16f5,0x15b7,0x16fb,0x1d4,0x15c0,0x15c0,0x15c0, -0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x1d4,0x1d4,0x15c0,0x15c0,0x1d4,0x1d4,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, -0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x1d4,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, -0x15c0,0x1d4,0x15c0,0x15c0,0x1d4,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x1d4,0x19ec,0x16f8,0x15c0,0x15b1,0x15b7, -0x15b1,0x15b7,0x15b7,0x15b7,0x15b7,0x1d4,0x1d4,0x15b7,0x15b7,0x1d4,0x1d4,0x15ba,0x15ba,0x15bd,0x1d4,0x1d4, -0x1752,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x15b1,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x15c3,0x15c0,0x15c0, -0x15c0,0x15c0,0x15b7,0x15b7,0x1d4,0x1d4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x1d4,0x1d4,0x1d4, -0x15b4,0x15b4,0x15b4,0x15b4,0x15b4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4, -0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8, -0x15d8,0x15d8,0x1d7,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8,0x15d8, -0x15d2,0x15d2,0x15d2,0x15c6,0x15c6,0x15c6,0x15d2,0x15d2,0x15c6,0x15d5,0x15c9,0x15c6,0x15db,0x15db,0x15cf,0x15db, -0x15db,0x15cc,0x17e5,0x1d7,0x15ea,0x15ea,0x15ea,0x15de,0x15de,0x15de,0x15de,0x15de,0x15de,0x15e1,0x15e4,0x1da, -0x1da,0x1da,0x1da,0x1da,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1da,0x1da, -0x1da,0x1da,0x1da,0x1da,0x1755,0x1755,0x1755,0x1755,0x15f6,0x15f3,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd, -0x1dd,0x1dd,0x1dd,0x1dd,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f, -0x177f,0x177f,0x177f,0x177f,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc, -0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0, -0x1e0,0x1e0,0x1e0,0x1e0,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc, -0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0, -0x1e0,0x1e0,0x1e0,0x1e0,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x1e0,0x1e0,0x1e0,0x1e0, -0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0, -0x1e0,0x1e0,0x1e0,0x1e0,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608, -0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x1608,0x15ff,0x1602,0x1605,0x1608,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3, -0x1e3,0x1e3,0x1e3,0x1e3,0x1617,0x1617,0x1617,0x1617,0x1617,0x160b,0x160b,0x1e6,0x1e6,0x1e6,0x1e6,0x160e, -0x160e,0x160e,0x160e,0x160e,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1611,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6, -0x1e6,0x1e6,0x1e6,0x1e6,0x1620,0x1620,0x1620,0x1620,0x1620,0x1e9,0x1e9,0x161d,0x161d,0x161d,0x161d,0x161d, -0x161d,0x161d,0x161d,0x161d,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x161a,0x1e9,0x1e9,0x1e9,0x1e9,0x1e9, -0x1e9,0x1e9,0x1e9,0x1e9,0x1623,0x1635,0x1635,0x1629,0x1632,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec,0x1ec, -0x1ec,0x1ec,0x1ec,0x1ec,0x162c,0x162c,0x162c,0x162c,0x162c,0x162c,0x162c,0x162c,0x162c,0x162c,0x1ec,0x1ec, -0x1ec,0x1ec,0x1ec,0x1ec,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b, -0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x163b, -0x163b,0x163b,0x163b,0x1ef,0x1647,0x1647,0x1647,0x1647,0x1647,0x1641,0x164a,0x1647,0x1647,0x1647,0x1647,0x1647, -0x1647,0x1647,0x1647,0x1647,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1647,0x1647, -0x1647,0x1647,0x1647,0x1f2,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650, -0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650,0x1650, -0x1650,0x1650,0x1650,0x1f5,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c, -0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x165c,0x1659,0x1659,0x1659,0x1659,0x1659,0x1f8, -0x1f8,0x1f8,0x1f8,0x1f8,0x1674,0x1674,0x1677,0x1677,0x167a,0x166b,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb, -0x1fb,0x1fb,0x1fb,0x1fb,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1671,0x1fb,0x166b, -0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x1fb,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674, -0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1fb,0x1fb,0x1fb,0x1fb, -0x1fb,0x1674,0x1674,0x1674,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683, -0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1fe,0x1fe,0x1fe, -0x1fe,0x1fe,0x1fe,0x1fe,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x168c, -0x168c,0x168c,0x168c,0x168c,0x168c,0x168c,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x1689,0x1689,0x1689, -0x1689,0x201,0x201,0x201,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7, -0x16a7,0x16a7,0x16a7,0x168f,0x16a1,0x16a1,0x168f,0x168f,0x168f,0x168f,0x207,0x207,0x16a1,0x16a1,0x16a4,0x16a4, -0x168f,0x168f,0x16a1,0x1695,0x1692,0x1698,0x16aa,0x16aa,0x169b,0x169b,0x169e,0x169e,0x169e,0x16aa,0x175e,0x175e, -0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175e,0x175b,0x175b,0x175b,0x175b, -0x1758,0x1758,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207, -0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207, -0x207,0x207,0x207,0x207,0x20a,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad, -0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a, -0x20a,0x20a,0x20a,0x20a,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0, -0x20d,0x20d,0x20d,0x20d,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0, -0x16b0,0x16b0,0x16b0,0x16b0,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x16b0,0x16b0,0x16b0,0x16b0, -0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x16b0,0x16b0,0x16b0,0x16b0, -0x16b0,0x16b0,0x16b0,0x16b0,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x16b0,0x16b0,0x16b0,0x16b0, -0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x20d,0x20d, -0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d, -0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d, -0x16b3,0x16c2,0x16b9,0x16b6,0x16c8,0x16c8,0x16bc,0x16c8,0x210,0x210,0x210,0x210,0x210,0x210,0x210,0x210, -0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x16bf,0x210,0x210,0x210,0x210,0x210,0x210, -0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16cb,0x16cb,0x16cb,0x16cb,0x16cb,0x16cb, -0x16cb,0x16cb,0x16cb,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x16d4, -0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770, -0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1770,0x1965,0x216,0x216,0x1761,0x1761,0x1761, -0x176d,0x176d,0x1761,0x1761,0x1761,0x1761,0x176d,0x1761,0x1761,0x1761,0x1761,0x1764,0x216,0x216,0x216,0x216, -0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x176a,0x1767,0x1767,0x1773,0x1773,0x1773,0x1767, -0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219, -0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219, -0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x219,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788, -0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x1788,0x21f,0x1788,0x1788,0x21f,0x21f, -0x21f,0x21f,0x21f,0x1785,0x1785,0x1785,0x1785,0x1785,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x222, -0x178b,0x222,0x178b,0x178b,0x178b,0x178b,0x222,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b, -0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x222,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b,0x178b, -0x178b,0x178e,0x222,0x222,0x222,0x222,0x222,0x222,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed, -0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797, -0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x225,0x225,0x225,0x225,0x225, -0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794, -0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x225,0x225,0x225,0x225,0x225, -0x225,0x225,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x189f,0x189f,0x189f,0x189f,0x189f,0x189f,0x189f,0x189f, -0x189f,0x189f,0x189f,0x189f,0x228,0x228,0x228,0x228,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8, -0x17bb,0x1869,0x1869,0x1869,0x1869,0x1866,0x1869,0x1905,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1869,0x1866, -0x1902,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902,0x1869,0x1905,0x1905,0x1869,0x1869,0x1869,0x1869,0x1869, -0x1869,0x1869,0x1866,0x1863,0x1866,0x1869,0x1869,0x228,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902, -0x1902,0x1902,0x1902,0x1902,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x228,0x228,0x19e0,0x19e0,0x19e0,0x19e0,0x228, -0x228,0x228,0x19e0,0x228,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x228,0x228,0x228,0x228,0x228, -0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x19e3,0x19e3,0x19e3,0x19e3,0x19e0,0x19e6,0x19e6,0x19e0, -0x19e6,0x19e6,0x228,0x228,0x228,0x228,0x228,0x228,0x17b8,0x19e0,0x19e0,0x228,0x228,0x228,0x228,0x228, -0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x228,0x1902,0x1905,0x1905,0x1905,0x1905,0x1905,0x1905,0x1905, -0x1905,0x1905,0x1905,0x1905,0x1905,0x1905,0x1902,0x1902,0x179d,0x179d,0x179d,0x179d,0x179a,0x179d,0x179d,0x17a0, -0x17a3,0x17a0,0x17a0,0x179d,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b, -0x22b,0x22b,0x22b,0x179a,0x179a,0x179a,0x179a,0x179a,0x17f7,0x17f7,0x17f7,0x17f7,0x17ee,0x17ee,0x17ee,0x17e8, -0x17eb,0x17eb,0x17eb,0x22e,0x22e,0x22e,0x22e,0x22e,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4,0x17f4, -0x17f4,0x17f4,0x22e,0x22e,0x22e,0x22e,0x17f1,0x17f1,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, -0x1812,0x231,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, -0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x180f,0x17fd,0x17fd,0x17fd,0x17fd, -0x17fd,0x17fd,0x17fd,0x231,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x17fd,0x180f,0x1800,0x1812,0x1815,0x1815,0x1809, -0x1806,0x1806,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x231,0x180c,0x180c,0x180c,0x180c, -0x180c,0x180c,0x180c,0x180c,0x180c,0x180c,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803,0x1803, -0x1803,0x1803,0x1803,0x1803,0x1803,0x231,0x231,0x231,0x1821,0x1824,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, -0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x1818,0x1818,0x1818,0x1818,0x1818,0x1818,0x1818,0x1818, -0x1818,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983, -0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x237, -0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b,0x181b, -0x181b,0x237,0x237,0x181b,0x181b,0x181b,0x181b,0x181b,0x186c,0x1908,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a, -0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a, -0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a, -0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x182a,0x23d,0x23d,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e, -0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x23d,0x1827,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e, -0x181e,0x1827,0x181e,0x181e,0x1827,0x181e,0x181e,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d, -0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x240,0x240,0x240, -0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240,0x240, -0x1845,0x1845,0x1836,0x1830,0x1830,0x1845,0x1833,0x1848,0x1848,0x1848,0x1848,0x184b,0x184b,0x183f,0x183c,0x1839, -0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x243,0x183f,0x243,0x1839,0x1968,0x243, -0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243, -0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243, -0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851,0x1851, -0x1851,0x1851,0x1851,0x1851,0x246,0x246,0x246,0x246,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e, -0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e,0x184e, -0x184e,0x184e,0x184e,0x184e,0x246,0x246,0x246,0x246,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f, -0x186f,0x186f,0x186f,0x186f,0x186f,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x249,0x249,0x249,0x249,0x249,0x249, -0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872, -0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x24c,0x24c,0x24c,0x24c,0x24c, -0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x24c,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e, -0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e, -0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x190e,0x24f,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x252, -0x18ae,0x18ae,0x252,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae, -0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18ae,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x18a2,0x252, -0x252,0x252,0x18a2,0x252,0x18a2,0x18a2,0x252,0x18a2,0x18a2,0x18a2,0x18a5,0x18a2,0x18a8,0x18a8,0x18b1,0x18a2, -0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x252,0x18ab,0x18ab,0x18ab,0x18ab,0x18ab,0x18ab,0x18ab,0x18ab, -0x18ab,0x18ab,0x252,0x252,0x252,0x252,0x252,0x252,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911, -0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911, -0x1911,0x1911,0x1911,0x1911,0x255,0x255,0x255,0x255,0x18c9,0x18c9,0x18c9,0x18c9,0x258,0x258,0x18cc,0x18cc, -0x18cc,0x18cc,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18c6, -0x18b7,0x18ba,0x18bd,0x18cf,0x18cf,0x196b,0x18c0,0x18c0,0x18de,0x18e1,0x18f0,0x18f0,0x18e1,0x18e4,0x18de,0x18db, -0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x25e,0x18c9,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18b4,0x18c6, -0x18c6,0x18b4,0x18b4,0x18b4,0x18c9,0x18c9,0x18c9,0x18c9,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261, -0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261, -0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x261,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980, -0x1980,0x1980,0x1980,0x1980,0x197d,0x197d,0x197d,0x1971,0x1971,0x1971,0x1971,0x1971,0x1971,0x1971,0x1971,0x1971, -0x197d,0x1977,0x1974,0x197a,0x264,0x264,0x264,0x264,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983, -0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983,0x1983, -0x1983,0x1983,0x1983,0x267,0x267,0x1983,0x1983,0x1983,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x26a,0x1992, -0x1992,0x26a,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992, -0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x1992,0x198f,0x198f,0x198f,0x198f,0x198f,0x26a, -0x1986,0x1986,0x26a,0x198f,0x198f,0x1986,0x198f,0x1989,0x1992,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a, -0x199b,0x199b,0x199e,0x199e,0x1995,0x1995,0x1995,0x1995,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d, -0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d, -0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a4,0x19a1,0x19a1,0x19a1, -0x19a4,0x19a1,0x19a1,0x19a1,0x19a1,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270,0x270, -0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad, -0x19ad,0x19ad,0x19ad,0x19a7,0x19a7,0x19aa,0x19aa,0x19b0,0x19b0,0x273,0x273,0x273,0x273,0x273,0x273,0x273, -0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3,0x19b3, -0x19b3,0x19b3,0x19b3,0x19b3,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276, -0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6, -0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b9,0x19c2,0x19b6,0x19b6,0x279,0x279,0x279,0x279,0x279, -0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c8,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c, -0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1,0x19d1, -0x19d1,0x19d1,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19ce,0x19ce,0x19ce, -0x19ce,0x19d4,0x19d4,0x19d4,0x19d4,0x19d4,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x282,0x282,0x282,0x282, -0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282, -0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x190b,0x29d,0x29d,0x29d, -0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x282,0x282,0x282,0x282, -0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282, -0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x282,0x954,0x954,0xb1f,0xb1f,0xb1f,0xb1f, -0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f, -0xb1f,0xb1f,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x18f6,0x18f6,0x18f6,0x18f6, -0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x19da,0x19da,0x19da,0x19da,0x19da,0x288,0x288,0x288,0x288, -0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0xc78,0xc78,0xc78,0xc78, -0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0x12c0,0x12c0,0x12c0,0x28b,0x28b,0xeac,0xeac,0xeac,0xeac, -0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac, -0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, -0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, -0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82, -0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0x28e, -0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85, -0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85, -0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0x291,0x291,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5, -0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x294,0x294,0x294, -0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef, -0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef, -0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x297,0x297,0x17b5,0x17b5,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a, -0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b, -0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x1131,0x396,0x396,0x3a2,0xcba,0x3a5,0x3a5,0x3a5, -0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5, -0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a2,0x396,0x396,0x396,0x396,0x396,0x396,0x396, -0x396,0x3a2,0x3a2,0x3a2,0x3a2,0x39c,0x1134,0x1317,0x3a5,0x921,0x924,0x399,0x399,0x1131,0x1314,0x1314, -0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a8,0x3a5,0x3a5,0x396,0x396,0x8ac,0x8af,0x93f,0x93f, -0x93f,0x93f,0x93f,0x93f,0x93f,0x93f,0x93f,0x93f,0x39f,0xf93,0xf90,0x131a,0x131a,0x131a,0x131a,0x131a, -0x14df,0x1137,0x1137,0xee5,0xee5,0xdb3,0xee5,0xee5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5, -0x3a5,0x3a8,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a5,0x3a8,0x3a5,0x3a5,0x3a8,0x3a5,0x3a5,0x3a5, -0x3a5,0x3a5,0x1314,0x1317,0x399,0x3a5,0x3a2,0x3a2,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447, -0x447,0x1320,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447, -0x447,0x447,0x1320,0x1887,0x1887,0xfb1,0x438,0x441,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483, -0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0x483,0xba0, -0xba0,0xdbf,0xdbf,0x8b2,0xdc2,0x1401,0x1401,0x1401,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486, -0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486, -0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x486,0x48c,0x48c,0x48c,0x114c,0x114c,0x114c,0x114c,0x114c, -0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489, -0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489, -0x489,0x489,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x48f,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c, -0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c, -0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x48c,0x498,0x492,0x498,0x492, -0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492, -0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x492,0x492, -0x492,0x492,0x495,0x996,0xfe1,0xfe1,0xfe4,0xfe1,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492, -0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492,0x498,0x492, -0x498,0x492,0xfe4,0xfe1,0xfe4,0xfe1,0xfe4,0xfe1,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4,0x4a4, -0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x687,0x687,0x68a,0x4c2,0x696,0x693,0x693,0x690, -0x4ec,0x4ec,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0xac8,0x699,0x4ce,0x6b1,0x6b4,0x4e3,0x699,0x4d1,0x4d1, -0x4c2,0x4dd,0x4dd,0x687,0x4e9,0x4e6,0x68d,0x4bc,0x4b3,0x4b3,0x4b6,0x4b6,0x4b6,0x4b6,0x4b6,0x4b9, -0x4b6,0x4b6,0x4b6,0x4ad,0x4f5,0x4f2,0x4ef,0x4ef,0x6a5,0x4d7,0x4d4,0x6a2,0x69f,0x69c,0x6ae,0x4c5, -0x6ab,0x6ab,0x4da,0x4dd,0x6a8,0x6a8,0x4da,0x4dd,0x4bf,0x4c2,0x4c2,0x4c2,0x4e0,0x4cb,0x4c8,0xbb5, -0xace,0xad1,0xacb,0xacb,0xacb,0xacb,0xbac,0xbac,0xbac,0xbac,0xbb2,0xce7,0xce4,0xdce,0xdd1,0xbaf, -0xdd1,0xdd1,0xdd1,0xdd1,0xdce,0xdd1,0xdd1,0xba9,0x519,0x519,0x519,0x519,0x519,0x519,0x519,0x516, -0x51c,0x735,0x519,0x999,0x9ba,0xad4,0xad4,0xad4,0xbbb,0xbbb,0xdd7,0xdd7,0xdd7,0xdd7,0x1155,0x1158, -0x1158,0x1335,0x14cd,0x14f7,0x14fa,0x14fa,0x170d,0x188a,0x528,0x528,0x540,0x6c3,0x525,0x6c0,0x528,0x53d, -0x525,0x6c3,0x537,0x540,0x540,0x540,0x537,0x537,0x540,0x540,0x540,0x6cc,0x525,0x540,0x6c6,0x525, -0x534,0x540,0x540,0x540,0x540,0x540,0x525,0x525,0x52b,0x6c0,0x6c9,0x525,0x540,0x525,0x6cf,0x525, -0x540,0x52e,0x546,0x6d2,0x540,0x540,0x531,0x537,0x540,0x540,0x543,0x540,0x537,0x53a,0x53a,0x53a, -0x53a,0xae0,0xadd,0xcea,0xde0,0xbd0,0xbd3,0xbd3,0xbcd,0xbca,0xbca,0xbca,0xbca,0xbd3,0xbd0,0xbd0, -0xbd0,0xbd0,0xbc7,0xbca,0xddd,0xef1,0xef4,0xfea,0x115b,0x115b,0x115b,0x6d8,0x6d5,0x549,0x54c,0x54c, -0x54c,0x54c,0x54c,0x6d5,0x6d8,0x6d8,0x6d5,0x54c,0x6de,0x6de,0x6de,0x6de,0x6de,0x6de,0x6de,0x6de, -0x6de,0x6de,0x6de,0x6de,0x555,0x555,0x555,0x555,0x6db,0x6db,0x6db,0x6db,0x6db,0x6db,0x6db,0x6db, -0x6db,0x6db,0x54f,0x54f,0x54f,0x54f,0x54f,0x54f,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b, -0x558,0x561,0x561,0x55b,0x55b,0x55b,0x55e,0x558,0x55b,0x55b,0x558,0x558,0x558,0x558,0x55b,0x55b, -0x6e1,0x6e1,0x558,0x558,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b, -0x55b,0x55e,0x55e,0x55e,0x55b,0x55b,0x6e4,0x55b,0x6e4,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b,0x55b, -0x558,0x55b,0x558,0x558,0x558,0x558,0x558,0x558,0x55b,0x55b,0x558,0x6e1,0x558,0x558,0x558,0xae6, -0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xae6,0xbd6,0xbd6,0xbd6,0xbd6,0xbd6,0xbd6,0xbd6,0xbd6, -0xbd6,0xbd6,0xbd6,0xbd6,0x6ea,0x564,0x6ea,0x6ea,0x567,0x564,0x564,0x6ea,0x6ea,0x567,0x564,0x6ea, -0x567,0x564,0x564,0x6ea,0x564,0x6ea,0x573,0x570,0x564,0x6ea,0x564,0x564,0x564,0x564,0x6ea,0x564, -0x564,0x6ea,0x6ea,0x6ea,0x6ea,0x564,0x564,0x6ea,0x567,0x6ea,0x567,0x6ea,0x6ea,0x6ea,0x6ea,0x6ea, -0x6f0,0x56a,0x6ea,0x56a,0x56a,0x564,0x564,0x564,0x6ea,0x6ea,0x6ea,0x6ea,0x564,0x564,0x564,0x564, -0x6ea,0x6ea,0x564,0x564,0x564,0x567,0x564,0x564,0x567,0x564,0x564,0x567,0x6ea,0x567,0x564,0x564, -0x6ea,0x564,0x564,0x564,0x564,0x564,0x6ea,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564, -0x564,0x564,0x564,0x564,0x6ed,0x6ea,0x567,0x564,0x6ea,0x6ea,0x6ea,0x6ea,0x564,0x564,0x6ea,0x6ea, -0x564,0x567,0x6ed,0x6ed,0x567,0x567,0x564,0x564,0x567,0x567,0x564,0x564,0x567,0x567,0x564,0x564, -0x564,0x564,0x564,0x564,0x567,0x567,0x6ea,0x6ea,0x567,0x567,0x6ea,0x6ea,0x567,0x567,0x564,0x564, -0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x6ea,0x564,0x564,0x564,0x6ea,0x564,0x564, -0x564,0x564,0x564,0x564,0x564,0x6ea,0x564,0x564,0x564,0x564,0x564,0x564,0x567,0x567,0x567,0x567, -0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x6ea, -0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564, -0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564,0x564, -0x567,0x567,0x567,0x567,0x564,0x564,0x564,0x564,0x564,0x564,0x567,0x567,0x567,0x567,0x564,0x56d, -0x564,0x564,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9,0xbd9, -0x576,0xae9,0x576,0x576,0x576,0x576,0x576,0x576,0x582,0x57f,0x582,0x57f,0x576,0x576,0x576,0x576, -0x576,0x576,0x6f3,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x7f8,0x7f8,0x576,0x576,0x576,0x576, -0x57c,0x57c,0x576,0x576,0x576,0x576,0x576,0x576,0x579,0x7fe,0x7fb,0x576,0x576,0x576,0x576,0x576, -0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576, -0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0x576,0xae9, -0xbdf,0xae9,0xae9,0xae9,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585, -0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585,0x585, -0x585,0x585,0x585,0x585,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x58b,0xc48, -0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48,0xc48, -0xc48,0xc48,0xc48,0xd62,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705, -0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x58e,0x591,0x591,0x591,0x591,0x591,0x591,0x591, -0x591,0x591,0x591,0x591,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705, -0x591,0x591,0x591,0x591,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705, -0x705,0x705,0x705,0x705,0x708,0x708,0x708,0x708,0x708,0x708,0x708,0x708,0x708,0x708,0x708,0x708, -0x708,0x708,0x708,0x708,0x594,0x594,0x708,0x708,0x708,0x708,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2,0xbe2, -0xbe2,0xbe2,0xbe2,0xbe2,0x70e,0x70e,0x597,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x70b,0x59a,0x59a, -0x597,0x597,0x59d,0x59d,0x59d,0x59d,0x70e,0x70e,0x59d,0x59d,0x711,0x70e,0x597,0x597,0x597,0x597, -0x70e,0x70e,0x59d,0x59d,0x711,0x70e,0x597,0x597,0x597,0x597,0x70e,0x70e,0x70b,0x597,0x59d,0x70e, -0x597,0x597,0x70b,0x70e,0x70e,0x70e,0x59d,0x59d,0x597,0x597,0x597,0x597,0x597,0x597,0x597,0x597, -0x597,0x597,0x597,0x597,0x597,0x597,0x70e,0x70b,0x70e,0x70b,0x597,0x59d,0x59d,0x59d,0x59d,0x59d, -0x59d,0x597,0x597,0x70b,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xaf2,0xbe5,0xbe5,0xbe5,0xbe8, -0xbe8,0xc63,0xc63,0xbe5,0x5ac,0x5ac,0x5ac,0x5ac,0x5a9,0x723,0x720,0x5a3,0x5a3,0x714,0x5a3,0x5a3, -0x5a3,0x5a3,0x71a,0x714,0x5a3,0x5a9,0x5a3,0x5a0,0xd6b,0xd6b,0xbee,0xbee,0xdec,0xaf5,0x5a6,0x5a6, -0x717,0x5af,0x717,0x5a6,0x5a9,0x5a3,0x5a9,0x5a9,0x5a3,0x5a3,0x5a9,0x5a3,0x5a3,0x5a3,0x5a9,0x5a3, -0x5a3,0x5a3,0x5a9,0x5a9,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a9,0x5ac,0x5ac,0x5a6, -0x5a3,0x5a3,0x5a3,0x5a3,0x726,0x5a3,0x726,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x801,0x801,0x801,0x801, -0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3,0x5a3, -0x5a3,0x5a3,0x5a3,0x5a9,0x726,0x723,0x5b2,0x726,0x714,0x71a,0x5a9,0x714,0x71d,0x714,0x714,0x5a3, -0x714,0x723,0x5b2,0x723,0xaf5,0xaf5,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf4, -0xbf1,0xbf1,0xde9,0xea3,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5, -0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b5,0x5b8,0x13bf,0x13bf,0x13bf,0x5b8,0x5b8,0x5b8,0x5b8, -0x5b8,0x5b8,0x5b8,0x5b8,0x1503,0x5c4,0x5cd,0x5c4,0x5c4,0x13bf,0x5b8,0x5b8,0x5cd,0x5cd,0x13c2,0x13c2, -0x5d0,0x5d0,0x5c1,0x5c7,0x5c1,0x5c1,0x5c7,0x5b8,0x5c7,0x5b8,0x5c7,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8, -0x5b8,0x5c7,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x13bf,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8, -0x5b8,0x5b8,0x5b8,0x5c7,0x5c7,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x72c,0x5b8,0x5b8, -0x5b8,0x5b8,0x5b8,0x5b8,0x5c7,0x5b8,0x5b8,0x5c7,0x5b8,0x5b8,0x5b8,0x5b8,0x13bf,0x5b8,0x13bf,0x5b8, -0x5b8,0x5b8,0x5b8,0x13bf,0x13bf,0x13bf,0x5b8,0x12ba,0x5b8,0x5b8,0x5b8,0x5be,0x5be,0x5be,0x5be,0x1341, -0x1341,0x5b8,0x5bb,0x5ca,0x5cd,0x5c1,0x5c1,0x5c1,0xbfa,0xbf7,0xbfa,0xbf7,0xbfa,0xbf7,0xbfa,0xbf7, -0xbfa,0xbf7,0xbfa,0xbf7,0xbfa,0xbf7,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729,0x729, -0x5b8,0x5c7,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8, -0x13bf,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x13bf, -0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f4,0x5f4,0x5f4, -0x5f4,0x5f4,0x5f4,0x5f4,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5f1,0x5f7,0x5e8,0x5eb, -0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7, -0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5ee,0x5ee, -0x5ee,0x5ee,0x5ee,0x5ee,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1, -0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1, -0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1, -0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1, -0x5f7,0x5f1,0x5f7,0x5f1,0x5f7,0x5f1,0x5f7,0x5f1,0x5f7,0x5f1,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1, -0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f7,0x5f1,0x5f4,0x5fa, -0x5f7,0x5f1,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f4,0x5fa,0x5f7,0x5f1,0x5f7,0x5f1,0x1344,0x1344, -0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x5f7,0x5f1,0x5f7,0x5f1, -0x5f7,0x5f1,0x5f4,0x5fa,0x5f4,0x5fa,0x5f7,0x5f1,0x5f7,0x5f1,0x5f7,0x5f1,0x5f7,0x5f1,0x5f7,0x5f1, -0x5f7,0x5f1,0x5f7,0x5f1,0x5f4,0x5f7,0x5f1,0x5f4,0x5f7,0x5f1,0x5f4,0x5fa,0x5f1,0x5f1,0x5f1,0x5f1, -0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1, -0x5f1,0x5f1,0x5f1,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f7,0x5f7,0x5f7,0x5f7, -0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f1,0x5f1,0x5f1, -0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f4,0x5f4,0x5f1,0x5f4, -0x5f1,0x5f4,0x5f1,0x5f1,0x5f4,0x5f1,0x5f1,0x5f4,0x5f1,0x5f4,0x5f1,0x5f1,0x5f4,0x5f1,0x5f4,0x5f4, -0x5f1,0x5f1,0x5f1,0x5f4,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f4,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1, -0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1, -0x5f4,0x5f4,0x5f1,0x5f1,0x5f4,0x5f1,0x5f4,0x5f1,0x5f1,0x5f1,0x5f1,0x5f1,0x5f4,0x5f4,0x5f4,0x5f4, -0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4, -0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5f4,0x5fa, -0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7, -0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7, -0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa, -0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7,0x5f7, -0x5fd,0x5fd,0x5fd,0x5fd,0xff6,0xff6,0xff6,0x1506,0x1506,0x1506,0x1506,0x1506,0x1506,0x1506,0x1713,0x1713, -0x85e,0x864,0x864,0x870,0x870,0x861,0x858,0x861,0x858,0x861,0x858,0x861,0x858,0x861,0x858,0x861, -0x60c,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x60c, -0x606,0x609,0x60f,0x60c,0x606,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609, -0x60f,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609, -0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609, -0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609,0x60f,0x60c,0x606,0x609,0x6f9,0x6f9,0x6f9,0x6f9, -0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9,0x6f9, -0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6, -0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6,0x6f6, -0x6f6,0x6f6,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x702,0x6ff, -0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6ff,0x6fc,0x6fc,0x6fc,0x6fc, -0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x6fc,0x705,0x705,0x705,0x705, -0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705, -0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x72f,0x72f,0x72f,0x72f, -0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f, -0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0xc51,0x8c4,0x8be,0x8bb, -0x8c1,0x8b8,0x744,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x747,0x8ca,0x744,0x744,0x744, -0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744, -0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x744,0x8c7,0x8c7, -0x74a,0x8d9,0x8dc,0x8e2,0x804,0x810,0x8f7,0x80d,0x8d0,0x8cd,0x8d0,0x8cd,0x8d6,0x8d3,0x8d6,0x8d3, -0x8d0,0x8cd,0x80a,0x8e2,0x8d0,0x8cd,0x8d0,0x8cd,0x8d0,0x8cd,0x8d0,0x8cd,0x8e5,0x8ee,0x8eb,0x8eb, -0x750,0x78c,0x78c,0x78c,0x78c,0x78c,0x78c,0x786,0x786,0x786,0x786,0x786,0x786,0x786,0x786,0x786, -0x786,0x786,0x786,0x786,0x786,0x786,0x786,0x786,0x786,0x786,0x786,0x753,0x76e,0x74d,0x774,0x777, -0x771,0x789,0x789,0x789,0x789,0x789,0x789,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783, -0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x783,0x753,0x76e,0x74d,0x76e,0xc54, -0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2, -0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2,0x7f2, -0x7f2,0x7f2,0x12b4,0x12b4,0x12b4,0x12b4,0x12b4,0x7f5,0x80a,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d,0x80d, -0x80d,0x80d,0x92d,0x92d,0x92d,0x92d,0x813,0x813,0x8e8,0x8f4,0x8f4,0x8f4,0x8f4,0x8f1,0x807,0x8df, -0xb19,0xb19,0xb19,0xc66,0xc84,0xc81,0xb34,0x8b5,0x819,0x816,0x819,0x81c,0x816,0x819,0x816,0x819, -0x816,0x819,0x816,0x816,0x816,0x816,0x816,0x816,0x819,0x819,0x816,0x819,0x819,0x816,0x819,0x819, -0x816,0x819,0x819,0x816,0x819,0x819,0x816,0x816,0xc87,0x82b,0x825,0x82b,0x825,0x82b,0x825,0x82b, -0x825,0x82b,0x825,0x825,0x828,0x825,0x828,0x825,0x828,0x825,0x828,0x825,0x828,0x825,0x828,0x825, -0x828,0x825,0x828,0x825,0x828,0x825,0x828,0x825,0x828,0x825,0x828,0x82b,0x825,0x828,0x825,0x828, -0x825,0x828,0x825,0x825,0x825,0x825,0x825,0x825,0x828,0x828,0x825,0x828,0x828,0x825,0x828,0x828, -0x825,0x828,0x828,0x825,0x828,0x828,0x825,0x825,0x825,0x825,0x825,0x82b,0x825,0x82b,0x825,0x82b, -0x825,0x825,0x825,0x825,0x825,0x825,0x82b,0x825,0x825,0x825,0x825,0x825,0x828,0x82b,0x82b,0x828, -0x828,0x828,0x828,0x8fd,0x900,0x82e,0x831,0xc6f,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837, -0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837, -0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x83a,0x837,0x837,0x837,0x837,0x837,0x837,0x837, -0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837,0x837, -0x837,0x837,0x837,0x837,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843, -0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843,0x843, -0xd74,0xd74,0xea6,0x83d,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909, -0xd6e,0xd6e,0xd6e,0xd6e,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846,0x846, -0x846,0x846,0x846,0x846,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, -0x912,0x912,0x912,0x912,0x912,0x849,0x849,0x849,0x849,0x849,0x849,0xd77,0xd77,0xd77,0xd77,0x915, -0x915,0x915,0x915,0x915,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849, -0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849,0x849, -0x849,0x849,0x849,0x849,0x849,0x849,0xd77,0xd77,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c, -0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c, -0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, -0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f, -0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f,0x84f, -0x84f,0x84f,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9, -0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0xea9,0x1119,0x1119,0x1119,0x1119,0x852,0x852,0x852,0x852, -0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852, -0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x855,0x855, -0x852,0x855,0x852,0x855,0x855,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x852,0x855, -0x852,0x855,0x852,0x855,0x855,0x852,0x852,0x855,0x855,0x855,0x852,0x852,0x852,0x852,0x14be,0x14be, -0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78, -0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909, -0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909,0x909, -0x12f6,0x12f6,0x12f6,0x12f6,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0xd6e,0xc72,0xc72,0xc72, -0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0x90c,0x90c,0x90c,0x90c, -0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, -0x90c,0x90c,0x90c,0x90f,0x90c,0x90f,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, -0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72, -0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, -0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912,0x912, -0x912,0x912,0x912,0x912,0x912,0x912,0x912,0xd77,0x990,0x972,0x972,0x972,0x972,0x96c,0x972,0x972, -0x984,0x972,0x972,0x96f,0x97b,0x981,0x981,0x981,0x981,0x981,0x984,0x96c,0x978,0x96c,0x96c,0x96c, -0x963,0x963,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x987,0x987,0x987,0x987,0x987,0x987,0x987,0x987, -0x987,0x987,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96f,0x963,0x96c,0x963, -0x96c,0x963,0x97e,0x975,0x97e,0x975,0x98d,0x98d,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c, -0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c, -0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99c,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f, -0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f, -0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x99f,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2, -0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2, -0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9a2,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab, -0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab, -0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9a5,0x9a5,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae, -0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae, -0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9a8,0x9a8,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab, -0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab, -0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ab,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae, -0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae, -0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9b1,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4, -0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4, -0x9b4,0x9b4,0x9b4,0x9b4,0x9b1,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4, -0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4,0x9b4, -0xa41,0xa41,0xfdb,0xa41,0xa41,0xa41,0xa44,0xa41,0xfdb,0xa41,0xa41,0xfd2,0xa3b,0xa2f,0xa2f,0xa2f, -0xa2f,0xa3e,0xa2f,0xfc0,0xfc0,0xfc0,0xa2f,0xa32,0xa3b,0xa35,0xfc6,0xfd5,0xfd5,0xfc0,0xfc0,0xfdb, -0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xb3a,0xa47,0xa47,0xa38,0xa38,0xa38,0xa38, -0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa3e,0xa3e,0xa2f,0xa2f,0xfdb,0xfdb,0xfdb,0xfdb,0xfc0,0xfc0, -0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41, -0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41,0xa41, -0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xdcb,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xdcb,0xa56,0xa56,0xa56,0xa56, -0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa56,0xa5c,0xa5c,0xa5c,0xa5c, -0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c, -0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa5c,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa5f,0xa65,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0x1152,0x1152,0x1152,0x1152,0x1152,0x1152,0x1152,0x1152,0x1152,0x114f,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62,0xa62, -0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77, -0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77,0xa77, -0xa9b,0xa9b,0xa9b,0xa9e,0xa9e,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b, -0xa9b,0xa9b,0xa9b,0xa9b,0xa83,0xa83,0xa98,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa98,0xa98, -0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b, -0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b,0xa9b, -0xabc,0xabc,0xabc,0xabc,0xabc,0xaa7,0xaa7,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc, -0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc, -0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabf,0xabc,0xabc,0xabc,0xabc, -0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc, -0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9, -0xaec,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9,0xae9, -0xae9,0xae9,0xae9,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8, -0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8, -0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xaf8,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, -0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10, -0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb10,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f, -0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f, -0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb1f,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb25,0xb22,0xb22, -0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22,0xb22, -0xb28,0xb28,0xc75,0xc75,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb28,0xb28,0xb28,0xb28,0xc75,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28, -0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49, -0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0xb49,0x1509, -0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xcff,0xcff,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f, -0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f,0xb4f, -0xb4f,0xb4f,0xcfc,0xcfc,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d, -0xd4d,0xd4d,0xd4d,0xd4d,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52, -0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52,0xb52, -0xb52,0xb52,0xb52,0xb52,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55, -0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55,0xb55, -0xb55,0xb55,0xb55,0xb55,0xb64,0xb64,0xb64,0xb64,0xb64,0xb5b,0xb67,0xb6d,0xb6d,0xb6d,0xb61,0xb61, -0xb61,0xb6a,0xb5e,0xb5e,0xb5e,0xb5e,0xb5e,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb58,0xb6d, -0xb6d,0xb6d,0xb6d,0xb6d,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61, -0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61, -0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb64,0xb64,0xb6d,0xb6d,0xb6d,0xb61,0xb61,0xb6d,0xb6d,0xb6d, -0xb6d,0xb6d,0xb6d,0xb6d,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61, -0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb6d,0xb6d,0xb6d,0xb6d,0xb61,0xb61, -0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb64,0xb64,0xb64,0xb64,0xb64, -0xb64,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61, -0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0x1716,0x1716, -0xb79,0xb70,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb70,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb70,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb70,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb70,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73, -0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73, -0xb73,0xb73,0xb73,0xb73,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, -0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, -0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, -0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb82,0xb82,0xb82,0xb82, -0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82, -0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb82,0xb85,0xb85,0xb85,0xb85, -0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85, -0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xb85,0xbdf,0xbdf,0xbdf,0xbdf, -0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf,0xbdf, -0xbdf,0xbdf,0xbdc,0xbdf,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc,0xbdc, -0xbdc,0xbdc,0xbdc,0xced,0xcf0,0xde3,0xde3,0xde3,0xde3,0xde3,0xde3,0xde3,0xde3,0xde3,0xde3,0xde3, -0xefd,0xefd,0xefd,0xefd,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbf1,0xbeb,0xbeb,0xbeb,0xbeb,0xcf3,0xcf3, -0xcf3,0xcf3,0xcf3,0xcf3,0xcf6,0xcf6,0xde9,0xea0,0xde9,0xde9,0xde9,0xde9,0xde6,0xde9,0xde6,0xde9, -0xde9,0xff0,0x128a,0x128a,0xdf2,0xdf2,0xdf2,0xdf2,0xdf2,0xdf8,0xdf5,0xf0f,0xf0f,0xf0f,0xf0f,0x1407, -0x1002,0x1407,0x134d,0x134d,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc27, -0xc27,0xc27,0xc27,0xc27,0xc27,0xc27,0xc5a,0xc57,0xc5a,0xc57,0xc5a,0xc57,0x1113,0x1110,0x1008,0x1005, -0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a,0xc2a, -0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, -0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, -0xc30,0xc30,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc33,0xc33,0xc33,0xc39, -0xc36,0xc60,0xc5d,0xc39,0xc36,0xc39,0xc36,0xc39,0xc36,0xc39,0xc36,0xc39,0xc36,0xc39,0xc36,0xc39, -0xc36,0xc39,0xc36,0xc39,0xc36,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc39,0xc36,0xc39,0xc36,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33, -0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc33,0xc39,0xc36,0xc33,0xc33,0xc3c,0xc3c,0xc3c,0xc3c, -0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc42,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c, -0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c, -0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc42,0xc42,0xc42,0xc3c, -0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c, -0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3c,0xc3f,0xc3c,0xc3c,0xc3c, -0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78, -0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78,0xc78, -0xcf9,0xd68,0xde6,0xde6,0xde6,0xde6,0xde6,0xde6,0xde6,0xde6,0xea0,0xea0,0xde6,0xde6,0xde6,0xde6, -0xde9,0xde9,0xf00,0xff0,0xff0,0xff0,0xff0,0xff0,0xff0,0xff0,0xff0,0xff0,0xff0,0x12b7,0x12b7,0x128d, -0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d, -0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d,0xd1d, -0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd23,0xd23,0xd23,0xd23,0xd23,0xd20,0xd35,0xd35,0xd35,0xd2f, -0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd2f,0xd35,0xd35,0xd35,0xd35, -0xd29,0xd29,0xd32,0xd32,0xd32,0xd32,0xd26,0xd26,0xd26,0xd26,0xd26,0xd2c,0xdfe,0xdfe,0xdfe,0xdfe, -0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfb,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe,0xdfe, -0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd2f,0xd35, -0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35,0xd29,0xd29,0xd29, -0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c, -0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c,0xd2c, -0xd38,0xd38,0xd38,0xd38,0xd38,0xd3b,0xd3b,0xd3b,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xe01,0xe01, -0xe01,0xe01,0xe01,0xe01,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0xf12,0x111c,0x111c,0x100b,0x100b,0x100b, -0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e, -0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e,0xd3e, -0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, -0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, -0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d, -0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d, -0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59, -0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59,0xd59, -0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65, -0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65, -0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07, -0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07,0xe07, -0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d, -0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a, -0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d, -0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d, -0xecd,0xecd,0xe1f,0xe1f,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0xf15,0x1017,0x1017,0x1017,0x1017,0x1017, -0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014, -0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b, -0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b,0xe2e,0xe2b, -0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a, -0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a,0xe3a, -0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40, -0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40,0xe40, -0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58, -0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xf18,0xf18,0xf18,0xf18,0x101a,0x101a,0x101a,0x101a,0x101a, -0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61, -0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61, -0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a, -0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a,0xe6a, -0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73, -0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe6d, -0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70, -0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xe73,0xe73,0xe73,0xe73,0xe73, -0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe7c,0xe79,0xe79, -0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe76,0xe7f,0x1026,0x1020,0x102f,0x101d,0xe7c,0xe7c,0x101d,0x101d, -0xe91,0xe91,0xe82,0xe91,0xe91,0xe91,0xe88,0xe91,0xe91,0xe91,0xe91,0xe82,0xe91,0xe91,0xe91,0xe91, -0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91, -0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94, -0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94,0xe94, -0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac, -0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac,0xeac, -0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca,0xeca, -0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125, -0xefd,0xefd,0xefd,0xefa,0xefa,0xefa,0xefa,0xefa,0x115e,0x13b6,0x13b6,0x13b6,0x13b6,0x1338,0x1338,0x1338, -0x13b9,0x133b,0x133b,0x13b9,0x14fd,0x14fd,0x14fd,0x14fd,0x1500,0x1500,0x1500,0x17c7,0x17c7,0x17c7,0x17c7,0x188d, -0xf12,0xf12,0xf12,0xf12,0x100b,0x100b,0x100b,0x100b,0x100b,0x100b,0x100b,0x100b,0x100b,0x100b,0x100b,0x100b, -0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e,0x100e, -0xf33,0xf33,0xf33,0xf33,0xf45,0xf4e,0xf51,0xf4e,0xf51,0xf4e,0xf51,0xf4e,0xf51,0xf4e,0xf51,0xf4e, -0xf4e,0xf4e,0xf51,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e, -0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf36,0xf45,0xf33,0xf33,0xf33,0xf33,0xf33,0xf48, -0xf33,0xf48,0xf45,0xf45,0xf5a,0xf57,0xf5a,0xf5a,0xf5a,0xf57,0xf57,0xf5a,0xf57,0xf5a,0xf57,0xf5a, -0xf57,0x1041,0x1041,0x1041,0x117c,0x1038,0x1041,0x1038,0xf57,0xf5a,0xf57,0xf57,0x1038,0x1038,0x1038,0x1038, -0x103b,0x103e,0x117c,0x117c,0xf5d,0xf5d,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a, -0x1053,0x104a,0x1053,0x104a,0x104a,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a, -0x1053,0x104a,0x1053,0x104a,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63, -0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63,0xf63, -0xf63,0xf63,0xf63,0xf63,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72, -0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72, -0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0xf72,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, -0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78, -0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78, -0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xf78,0xfc0,0xfdb,0xfd2,0xfcf,0xfcf,0xfdb,0xfdb,0xfd2, -0xfd2,0xfcf,0xfcf,0xfcf,0xfcf,0xfcf,0xfdb,0xfdb,0xfdb,0xfc0,0xfc0,0xfc0,0xfc0,0xfdb,0xfdb,0xfdb, -0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfdb,0xfc0,0xfd2,0xfd5,0xfc0,0xfc0,0xfd8, -0xfd8,0xfd8,0xfd8,0xfd8,0xfd8,0xfc3,0xfdb,0xfd8,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc,0xfcc, -0xfcc,0xfcc,0x1146,0x1146,0x1143,0x1140,0xfc9,0xfc9,0xff3,0xff3,0xff3,0xff3,0x12b7,0x12b7,0x128d,0x128d, -0x1293,0x128a,0x128a,0x128a,0x128a,0x128d,0x13bc,0x1293,0x128d,0x1293,0x128a,0x1293,0x12b7,0x128a,0x128a,0x128a, -0x128d,0x128d,0x128a,0x128a,0x128d,0x128a,0x128a,0x128d,0x100e,0x100e,0x100e,0x100e,0x100e,0x100b,0x100b,0x100e, -0x100e,0x100e,0x100e,0x100e,0x100e,0x1515,0x1515,0x1515,0x111c,0x100b,0x100b,0x100b,0x100b,0x12c3,0x129c,0x129c, -0x129c,0x129c,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x102c,0x102c,0x1029,0x1023,0x1029,0x1023,0x1029,0x1023, -0x1029,0x1023,0x1020,0x1020,0x1020,0x1020,0x1035,0x1032,0x1020,0x1179,0x1413,0x1416,0x1416,0x1413,0x1413,0x1413, -0x1413,0x1413,0x1419,0x1419,0x1530,0x1524,0x1524,0x1521,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a, -0x1047,0x1044,0x1044,0x1053,0x104a,0x135c,0x1359,0x171f,0x135c,0x1359,0x1422,0x141f,0x1533,0x1533,0x1539,0x1533, -0x1539,0x1533,0x1539,0x1533,0x1539,0x1533,0x1539,0x1533,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a, -0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a, -0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x104a,0x104d,0x104a,0x104a,0x104a,0x104a,0x104a,0x104a,0x104a, -0x104a,0x1053,0x104a,0x1053,0x104a,0x1053,0x1053,0x104a,0x1056,0x1056,0x105c,0x1062,0x1062,0x1062,0x1062,0x1062, -0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062, -0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x1062,0x105c,0x1056,0x1056,0x1056,0x1056,0x105c,0x105c, -0x1056,0x1056,0x105f,0x142b,0x1428,0x1428,0x1062,0x1062,0x1059,0x1059,0x1059,0x1059,0x1059,0x1059,0x1059,0x1059, -0x1059,0x1059,0x142e,0x142e,0x142e,0x142e,0x142e,0x142e,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077, -0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077, -0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1077,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080, -0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080,0x1080, -0x1083,0x1083,0x1083,0x1086,0x1083,0x1083,0x1089,0x1089,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c, -0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c, -0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x108c,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095, -0x1095,0x1095,0x1095,0x1095,0x1098,0x108f,0x109e,0x109b,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095, -0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095, -0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1362,0x135f,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa, -0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10ad,0x112e,0x10a1,0x10a1,0x10a1,0x10a7,0x1431,0x1431,0x1431,0x1431, -0x1431,0x1431,0x1431,0x1431,0x10a4,0x10a4,0x10a7,0x10b3,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa, -0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa, -0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x10b0,0x10aa,0x1548,0x1545,0x1548,0x1545,0x154b,0x154b,0x1728,0x1431, -0x10bc,0x10bc,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf, -0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf,0x10bf, -0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10bc,0x10ce,0x10ce,0x10ce,0x10ce, -0x10ce,0x10ce,0x10c5,0x10c5,0x10c5,0x10c5,0x10c5,0x10c8,0x10c8,0x10c8,0x1122,0x10d1,0x10e0,0x10e0,0x10e0,0x10e0, -0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10cb,0x10cb,0x10cb,0x10cb, -0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10cb,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce, -0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ce,0x10ef,0x10ef,0x10ef,0x10ef, -0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef, -0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x1101,0x1101,0x1101,0x1101, -0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101, -0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x1101,0x110a,0x110a,0x110a,0x110a, -0x111f,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a, -0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110a,0x110d,0x110d,0x110d,0x110d, -0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d, -0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x1119,0x1119,0x1119,0x1119, -0x12bd,0x12bd,0x12bd,0x12bd,0x12bd,0x12bd,0x12bd,0x12bd,0x14bb,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6, -0x17a6,0x17a6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x18f6,0x118e,0x118e,0x118e,0x118e, -0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e,0x118e, -0x118e,0x118e,0x1185,0x1185,0x1188,0x1188,0x118e,0x1185,0x1185,0x1185,0x1185,0x1185,0x1194,0x1194,0x1194,0x1194, -0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194, -0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x1194,0x11af,0x11af,0x11af,0x11af, -0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af, -0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11bb,0x11bb,0x11bb,0x11bb, -0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb, -0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11b8,0x11be,0x11ca,0x11ca,0x11ca,0x11ca, -0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca, -0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11d0,0x11d0,0x11d0,0x11d0, -0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x1308,0x11d6,0x130b, -0x11d6,0x11d6,0x11d6,0x11d6,0x11d3,0x11d3,0x11d3,0x11d6,0x172b,0x172e,0x1950,0x194d,0x11d9,0x11d9,0x11d9,0x11e8, -0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee, -0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11dc, -0x11e8,0x11e8,0x11d9,0x11d9,0x11d9,0x11d9,0x11e8,0x11e8,0x11d9,0x11e8,0x11e8,0x11e8,0x11fa,0x11fa,0x11fa,0x11fa, -0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fa,0x11fd,0x11fa,0x11fa,0x11fa, -0x11fa,0x11fa,0x11fa,0x11f4,0x11f4,0x11f4,0x11fa,0x11f7,0x1551,0x1554,0x1557,0x1557,0x120c,0x120c,0x120c,0x120c, -0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x1200,0x120c,0x1200,0x1200, -0x1200,0x1215,0x1215,0x1200,0x1200,0x1215,0x120c,0x1215,0x1215,0x120c,0x1200,0x1203,0x120c,0x120c,0x120c,0x120c, -0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c, -0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x120c,0x1227,0x1227,0x1227,0x1227, -0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227, -0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x1227,0x123f,0x123f,0x123f,0x123f, -0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f, -0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123f,0x123c,0x123c,0x123c,0x1248,0x1248,0x1248,0x1248, -0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248, -0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1248,0x1257,0x1257,0x1257,0x1257, -0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257, -0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x1257,0x125d,0x125d,0x126c,0x126f, -0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f, -0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x1272,0x126f,0x1272,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f, -0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x126f,0x1272,0x126f,0x126f,0x126f,0x126f,0x126c,0x126c,0x126c,0x1260, -0x1260,0x1260,0x1260,0x126c,0x126c,0x1266,0x1263,0x1269,0x1269,0x1278,0x1275,0x1275,0x127b,0x127b,0x127b,0x127b, -0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b, -0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x1281,0x1281,0x1281,0x127e, -0x127e,0x127e,0x127b,0x127b,0x127b,0x127b,0x127e,0x127b,0x127b,0x127b,0x1281,0x127e,0x1281,0x127e,0x127b,0x127b, -0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b, -0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x1281,0x127e,0x127e,0x127b,0x127b,0x127b,0x127b, -0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a2,0x12a5,0x12a5,0x12a5,0x1284,0x1956, -0x13b0,0x12ae,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x13b0,0x12ae,0x13b0,0x12ae, -0x128d,0x128d,0x133e,0x128a,0x133e,0x133e,0x133e,0x133e,0x128a,0x1290,0x12b7,0x128a,0x128a,0x128a,0x128a,0x128a, -0x1290,0x1293,0x12b7,0x12b7,0x1293,0x12b7,0x128a,0x1293,0x1293,0x1296,0x12b7,0x128a,0x128a,0x12b7,0x128d,0x128d, -0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x129f,0x129f,0x129f,0x129f,0x13c8,0x13a7, -0x12a8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x1854,0x1854,0x1854,0x1854,0x1854, -0x13b0,0x13b0,0x12ae,0x13b0,0x13b0,0x13b0,0x12ae,0x13b0,0x13b0,0x13b0,0x12a8,0x12a8,0x12a8,0x12a8,0x12a8,0x13aa, -0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x12ab,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x12ab, -0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5, -0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5,0x12d5, -0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380, -0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380,0x1380, -0x1395,0x1386,0x1395,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398, -0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398,0x1398, -0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x1386,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e, -0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e, -0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x139e,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4, -0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4, -0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13a4,0x13d4,0x13d1,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc, -0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc, -0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13da,0x13da,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13da,0x13dd,0x13dd,0x13dd,0x13da,0x13dd,0x13da,0x13dd,0x13da,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13e0,0x13dd,0x13dd,0x13dd,0x13dd,0x13da,0x13dd,0x13da,0x13da,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13da, -0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da,0x13da, -0x13da,0x13da,0x1563,0x1563,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x156f,0x1569,0x1569,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f, -0x156f,0x17a9,0x17a9,0x17a9,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x156f,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x156f,0x17a9,0x17a9,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13e0,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x1569,0x1569,0x156f,0x156f,0x1569,0x156f,0x156f,0x156f,0x1566,0x1566,0x156f,0x156f,0x13dd,0x13dd,0x13e0,0x13e0, -0x13e0,0x16da,0x13dd,0x13e0,0x13dd,0x13dd,0x13e0,0x1572,0x1572,0x156f,0x156f,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9, -0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x156f,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x1569,0x1569,0x156f, -0x16da,0x156f,0x1569,0x156f,0x17a9,0x17a9,0x17a9,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x156f,0x13dd,0x156f,0x13e0,0x13e0, -0x13dd,0x13dd,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13e0,0x13e0, -0x13e0,0x13e0,0x13dd,0x13dd,0x13dd,0x13dd,0x13e0,0x13dd,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0,0x13e0, -0x13e0,0x13dd,0x13dd,0x13dd,0x13e0,0x13dd,0x13dd,0x13dd,0x13dd,0x13e0,0x13e0,0x13e0,0x13dd,0x13e0,0x13e0,0x13e0, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x16da,0x13dd,0x13dd,0x13dd,0x13dd,0x156f,0x1569,0x17a9, -0x1437,0x1437,0x1437,0x1437,0x1563,0x1563,0x1566,0x1566,0x1566,0x156c,0x156f,0x17a9,0x17a9,0x17a9,0x17a9,0x1731, -0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd, -0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x156f,0x156f,0x1569,0x1569,0x156f,0x1572,0x1572,0x156f,0x156f, -0x156f,0x156f,0x185d,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x156f,0x1569,0x156f,0x1569,0x1569,0x1569,0x1569, -0x156f,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x156f,0x1569,0x1569,0x1569,0x156f,0x1566,0x1566,0x1566,0x1566, -0x1566,0x1566,0x156f,0x13dd,0x13dd,0x13dd,0x13dd,0x13dd,0x14c1,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3, -0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x14c1,0x13e3,0x13e3,0x13e3,0x14c1,0x13e3,0x14c1, -0x13e3,0x14c1,0x13e3,0x14c1,0x13e3,0x13e3,0x13e3,0x14c1,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x14c1,0x14c1, -0x13e3,0x13e3,0x13e3,0x13e3,0x14c1,0x13e3,0x14c1,0x14c1,0x13e3,0x13e3,0x13e3,0x13e3,0x14c1,0x13e3,0x13e3,0x13e3, -0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x13e3,0x16e0,0x16e0,0x17af,0x17af,0x13e6,0x13e6,0x13e6, -0x13e3,0x13e3,0x13e3,0x13e6,0x13e6,0x13e6,0x13e6,0x13e6,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f, -0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9, -0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9, -0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13ec,0x13e9,0x13e9,0x13e9,0x13e9, -0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13ec,0x13ec,0x13ec,0x13e9, -0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13e9,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef, -0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef, -0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x17dc,0x17dc,0x17d9,0x1734,0x143d,0x143d,0x143d,0x143d, -0x143d,0x143d,0x143a,0x143a,0x143a,0x143a,0x143a,0x143a,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d, -0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x143d,0x157b,0x1449,0x1449,0x1449,0x145b,0x145b,0x145b,0x145b,0x145b, -0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b, -0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x145b,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476, -0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476, -0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x1476,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f, -0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f, -0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x147f,0x1485,0x1485,0x1491,0x1497,0x1497,0x1497,0x1497,0x1497, -0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497, -0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1497,0x1491,0x1491,0x1491,0x1485,0x1485, -0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1485,0x1491,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8, -0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8, -0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x14b8,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515,0x1515, -0x1515,0x1515,0x192c,0x192c,0x192c,0x1515,0x1515,0x1515,0x1569,0x1569,0x156f,0x156f,0x156f,0x1569,0x1569,0x1569, -0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x156f,0x156f,0x156f,0x1566,0x1566,0x1566,0x1566, -0x1566,0x1566,0x1566,0x1566,0x156f,0x156f,0x156f,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x156f, -0x1569,0x1569,0x156f,0x156f,0x156f,0x156f,0x1569,0x1569,0x1572,0x1569,0x1569,0x1569,0x1569,0x16dd,0x16dd,0x1569, -0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x185a,0x156f,0x1569,0x1569,0x156f,0x1569,0x1569,0x1569, -0x1569,0x1569,0x1569,0x1569,0x1569,0x156f,0x156f,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569, -0x156f,0x1569,0x1569,0x1569,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593, -0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593,0x1593, -0x1593,0x1593,0x1593,0x1593,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5, -0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5,0x15a5, -0x15a5,0x15a5,0x15a5,0x15a5,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab, -0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab,0x15ab, -0x15ab,0x15ab,0x15ab,0x15ab,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae, -0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae,0x15ae, -0x15ae,0x15ae,0x15ae,0x15ae,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed, -0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed,0x15ed, -0x15ed,0x15ed,0x15ed,0x15de,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6, -0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f6,0x15f0, -0x15f9,0x15f9,0x15f9,0x15f9,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc, -0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc,0x15fc, -0x15fc,0x15fc,0x15fc,0x15fc,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x160e,0x1617,0x1617,0x1617, -0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617,0x1617, -0x1617,0x1617,0x1617,0x1617,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620, -0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620,0x1620, -0x1620,0x1620,0x1620,0x1620,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1632,0x162f,0x162f,0x162f,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x162f, -0x162f,0x1623,0x162f,0x1626,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632,0x1632, -0x1632,0x1632,0x1632,0x1632,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656, -0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656,0x1656, -0x1656,0x1653,0x1653,0x1653,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f, -0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x1665,0x1665,0x1665,0x1662,0x1662,0x1662, -0x165f,0x165f,0x165f,0x165f,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674, -0x1674,0x1674,0x1674,0x1674,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x1668,0x167a,0x167a,0x166e,0x166b,0x166b, -0x166b,0x166b,0x166b,0x166b,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674, -0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674, -0x1674,0x1674,0x1674,0x1674,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680, -0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x1680,0x167d,0x167d,0x167d,0x167d,0x167d, -0x167d,0x167d,0x167d,0x167d,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683, -0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683, -0x1683,0x1683,0x1683,0x1683,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7, -0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7, -0x16a7,0x16a7,0x16a7,0x16a7,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0, -0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0,0x16b0, -0x16b0,0x16b0,0x16b0,0x16b0,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8, -0x16c8,0x16c8,0x16c8,0x16c8,0x16b3,0x16c2,0x16c2,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16c2,0x16b3,0x16c5, -0x16c5,0x16b3,0x16c5,0x16b3,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8, -0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8,0x16c8, -0x16c8,0x16c8,0x16c8,0x16c8,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1, -0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1,0x16d1, -0x16d1,0x16d1,0x16d1,0x16d1,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, -0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, -0x16d7,0x16d7,0x16d7,0x16d7,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a, -0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a,0x173a, -0x173a,0x173a,0x173a,0x173a,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776, -0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776, -0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x177c,0x1779,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776, -0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x1776,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f, -0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f, -0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x177f,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782, -0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782, -0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1782,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794, -0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794, -0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1794,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797, -0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797, -0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a, -0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a, -0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179d,0x179d,0x179d,0x179d,0x179a, -0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179a,0x179d,0x179d,0x179d, -0x179d,0x179d,0x179d,0x179d,0x179d,0x179a,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d, -0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d, -0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5, -0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5, -0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b5,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x1866,0x1866,0x1866, -0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902, -0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x17fa,0x17fa,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7, -0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7, -0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17f7,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa, -0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa, -0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x17fa,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848, -0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1845,0x1845,0x1845, -0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1830,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848, -0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848, -0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1863,0x1866, -0x1866,0x1866,0x1866,0x1866,0x1902,0x19e0,0x19e0,0x19e0,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866, -0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1866,0x1902,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f, -0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f, -0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x186f,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872, -0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872, -0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9, -0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9, -0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea, -0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18d5,0x18db,0x18d8,0x18d8,0x18d8, -0x18d8,0x18e7,0x18ed,0x18d8,0x18d8,0x18d8,0x18d8,0x18e4,0x18ea,0x18d8,0x18d8,0x18d8,0x18d8,0x18d8,0x18d8,0x18d8, -0x18d8,0x18d8,0x18d8,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea, -0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18ea,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc, -0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc, -0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x18fc,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902,0x1902,0x19e0, -0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0, -0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b, -0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b, -0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x190b,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911, -0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911, -0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980, -0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980, -0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x1980,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b, -0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b, -0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1, -0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1, -0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19a1,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc, -0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc, -0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf, -0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf, -0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19bf,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8, -0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c8, -0x19c8,0x19c8,0x19c8,0x19c8,0x19c8,0x19c5,0x19c5,0x19c5,0,0,0,0 -}; - -static const UTrie2 propsVectorsTrie={ - propsVectorsTrie_index, - propsVectorsTrie_index+4948, - NULL, - 4948, - 25064, - 0xa40, - 0x13d4, - 0x0, - 0x0, - 0x110000, - 0x7538, - NULL, 0, FALSE, FALSE, 0, NULL -}; - -static const uint32_t propsVectors[6639]={ -0x67,0,0,0x67,0,0x4e00000,0x67,0x80000,0x20,0x867,0,0,0xa67,0,0,0xb67, -0,0,0xc67,0,0,0xd67,0,0,0xe67,0,0,0x1067,0,0,0x1167,0, -0,0x1267,0,0,0x1367,0,0,0x1467,0,0,0x1567,0,0,0x1667,0,0, -0x1767,0,0,0x1867,0,0,0x1967,0,0,0x1a67,0,0,0x1b67,0,0,0x1d67, -0,0,0x1f67,0,0,0x2067,0,0,0x2267,0,0,0x2367,0,0,0x2467,0, -0,0x2567,0,0,0x2767,0,0,0x2867,0x80000,0x20,0x2967,0,0,0x2a67,0,0x1600000, -0x2b67,0,0,0x2d67,0,0,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67,0,0,0x3b67, -0,0,0x3c67,0,0,0x3e67,0,0,0x4067,0,0,0x4167,0,0,0x4367,0, -0,0x4467,0,0,0x4867,0,0,0x4967,0,0,0x4a67,0,0,0x5067,0,0, -0x5167,0,0,0x5467,0,0,0x5567,0,0,0x5667,0x80000,0x20,0x5767,0,0,0x5867, -0,0,0x5967,0,0,0x5b67,0,0,0x5c67,0,0,0x5d67,0,0,0x6067,0x80000, -0x20,0x6267,0,0,0x6367,0,0,0x6467,0,0,0x6567,0,0,0x6f67,0,0, -0x7067,0,0,0x7367,0x20000000,0,0x7567,0,0,0x7667,0,0,0x7767,0,0,0x7867, -0,0,0x7a67,0,0,0x7b67,0,0,0x7c67,0,0,0x7e67,0,0,0x7f67,0, -0,0x8167,0,0,0x8267,0,0,0x8367,0,0,0x8467,0,0,0x8567,0,0, -0x8667,0,0,0x8767,0,0,0x8867,0,0,0x8967,0,0,0x8b67,0,0,0x8c67, -0,0,0x8e67,0x20000000,0,0x8f67,0,0,0x9067,0,0,0x9167,0,0,0x9267,0, -0,0x9367,0,0,0x9567,0,0,0x9667,0,0,0x9767,0,0,0x9867,0,0, -0x9967,0,0,0x9a67,0,0,0x9c67,0,0,0x9f67,0,0,0xa167,0,0,0xa367, -0,0,0xa467,0,0,0xa567,0,0,0xa667,0,0,0xa767,0,0,0xa867,0, -0,0xa967,0,0,0xaa67,0,0x4e00000,0xab67,0,0x4e00000,0xac67,0,0,0xad67,0,0, -0xae67,0,0,0xaf67,0,0,0xb167,0,0,0xb267,0,0,0xb467,0,0,0xb567, -0,0,0xb767,0,0,0xb867,0,0,0xb967,0,0,0xba67,0,0,0xbc67,0, -0,0xbd67,0,0,0xbe67,0,0,0xbf67,0,0,0xc067,0,0,0xc167,0,0, -0xc267,0,0,0xc367,0,0x4e00000,0xc467,0,0x4e00000,0xc667,0,0,0xc767,0,0,0xc867, -0,0,0xc967,0,0,0xca67,0,0,0xcc67,0,0x4e00000,0xcf67,0,0x4e00000,0xd067,0, -0x4e00000,0xd267,0,0,0xd367,0,0,0xd467,0,0,0xd567,0,0,0xd667,0,0, -0xd867,0,0,0xd967,0,0,0xda67,0,0,0xdb67,0,0,0xdc67,0,0,0xdd67, -0,0,0xde67,0,0,0xdf67,0,0,0xe067,0,0,0xe167,0,0,0xe267,0, -0,0xe367,0,0x4e00000,0xe467,0,0,0xe567,0,0,0xe667,0,0,0xe767,0,0, -0xe867,0,0,0xe967,0,0,0xea67,0,0,0xeb67,0,0,0xec67,0,0,0xed67, -0,0,0xee67,0,0,0xef67,0,0,0xf167,0,0,0xf367,0,0,0xf567,0, -0,0xf667,0,0,0xf767,0,0,0xf867,0,0,0xf967,0,0,0xfa67,0,0x4e00000, -0xfb67,0,0,0xfc67,0,0,0xfd67,0,0,0xfe67,0,0,0x10167,0,0,0x10267, -0,0,0x10367,0,0,0x10467,0,0,0x10567,0,0x4e00000,0x10667,0,0,0x10767,0, -0,0x10867,0,0,0x10967,0,0,0x10a67,0,0,0x10b67,0,0,0x10c67,0,0, -0x10d67,0,0,0x10e67,0,0,0x10f67,0,0,0x11067,0,0,0x11167,0,0,0x11367, -0,0,0x11467,0,0,0x11567,0,0,0x11667,0,0,0x11767,0,0,0x11867,0, -0,0x11967,0,0x4e00000,0x11a67,0,0,0x11b67,0,0,0x11c67,0,0,0x11d67,0,0, -0x11e67,0,0,0x11f67,0,0,0x12067,0,0,0x12167,0,0,0x12267,0,0,0x12367, -0,0,0xa0067,0,0xe00000,0xa4667,0,0xe00000,0xa4767,0,0xe00000,0xa4f67,0,0xe00000,0xa5e67,0, -0xe00000,0xa5f67,0,0xe00000,0xac567,0,0xe00000,0xad167,0,0xe00000,0xb0067,0,0xe00000,0xb1267,0,0xe00000, -0x11000100,0,0x900020,0x11000100,0x40000001,0x440020,0x11000100,0x40000001,0x643020,0x11000100,0x40000001,0xa5a040,0x11000100,0x40000001,0x116a8a0,0x11000200, -0,0x900020,0x11000200,0x4000001,0xc4000b,0x11000200,0x7c00100,0x220402,0x11000200,0x24000000,0x14200000,0x11000200,0x24000008,0x1710000,0x11000200,0x40000001, -0x1d3b020,0x11000219,0x7c00100,0x220401,0x11000219,0x7c00100,0x250401,0x11000319,0x7c00100,0x220401,0x11000319,0x7c00100,0x220402,0x11000319,0x7c00100,0x250400, -0x11000319,0x7c00100,0x250401,0x11000419,0x7c00100,0x220400,0x11000419,0x7c00100,0x220401,0x11000419,0x7c00100,0x220402,0x11000419,0x7c00100,0x230400,0x11000419, -0x7c00100,0x250400,0x11000419,0x7c00100,0x250401,0x11000419,0x7c00100,0x250402,0x11000519,0x7c00100,0x220400,0x11000519,0x7c00100,0x230400,0x11000600,0x4000400, -0x200000,0x11000600,0x4000400,0x200002,0x11000600,0x4000400,0x200400,0x11000600,0x7c00500,0x220400,0x11000600,0x7c00500,0x230400,0x11000600,0x7c00500,0x530400, -0x11000600,0x7c00d00,0x230400,0x11000619,0x7c00500,0x22040f,0x11000800,0x4000010,0x1001401,0x11000800,0x4000400,0x200001,0x11000800,0x6800010,0x201001,0x11000800, -0x7c00500,0x230401,0x11000807,0x7c00100,0x220400,0x11000807,0x7c00100,0x250400,0x1100080e,0x4000400,0x200000,0x1100080e,0x4000400,0x200002,0x1100080e,0x7000500, -0x220402,0x1100080e,0x7c00100,0x220400,0x1100080e,0x7c00100,0x220401,0x1100080e,0x7c00100,0x220402,0x1100080e,0x7c00100,0x250400,0x1100080e,0x7c00100,0x250401, -0x1100080e,0x7c00120,0x220402,0x1100080e,0x7c00120,0x250402,0x11000908,0x4000000,0x200000,0x11000908,0x7c00100,0x220400,0x11000908,0x7c00100,0x220401,0x11000908, -0x7c00100,0x250400,0x11000908,0x7c00100,0x250401,0x11000a03,0x4000000,0x200000,0x11000a03,0x4000000,0x200400,0x11000a03,0x4000000,0x270000,0x11000a03,0x7c00100, -0x220400,0x11000a03,0x7c00100,0x220402,0x11000a03,0x7c00100,0x250400,0x11000a03,0x7c00500,0x230400,0x11000b13,0x2802500,0x962460,0x11000b13,0x4000000,0x200000, -0x11000b13,0x4000000,0x201000,0x11000b13,0x4000000,0x230400,0x11000b13,0x4000002,0x400000,0x11000b13,0x4000010,0x200000,0x11000b13,0x7c00100,0x2633800,0x11000c00, -0x80000000,0x218960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02,0x4000000,0x200000,0x11000c02,0x4000000,0x1329400,0x11000c02,0x4000000, -0x1329800,0x11000c02,0x4000000,0x1500000,0x11000c02,0x6800000,0x1329800,0x11000c02,0x7c00100,0x230400,0x11000c02,0x7c00100,0x230401,0x11000c02,0x7c00100,0x230402, -0x11000c02,0x7c00500,0x230400,0x11000c02,0x7d00100,0x230400,0x11000f0a,0x2802100,0x962460,0x11000f0a,0x2802400,0x962460,0x11000f0a,0x2806400,0x962460,0x11000f0a, -0x4000000,0x200000,0x11000f0a,0x6800100,0x962540,0x11000f0a,0x7c00100,0x230400,0x11000f0a,0x7c00100,0x230401,0x11001004,0x2802100,0x962460,0x11001004,0x2802400, -0x962460,0x11001004,0x2806400,0x962460,0x11001004,0x4000000,0x200000,0x11001004,0x4000000,0x1500000,0x11001004,0x6800100,0x962540,0x11001004,0x6800100,0x962541, -0x11001004,0x7c00100,0x230400,0x11001004,0x7c00100,0x230401,0x11001110,0x2802100,0x962460,0x11001110,0x2802400,0x962460,0x11001110,0x2806400,0x962460,0x11001110, -0x6800100,0x962540,0x11001110,0x7c00100,0x230400,0x11001110,0x7c00100,0x230401,0x1100120f,0x2802100,0x962460,0x1100120f,0x2802400,0x962460,0x1100120f,0x2806400, -0x962460,0x1100120f,0x6800100,0x962540,0x1100120f,0x7c00100,0x230400,0x1100131f,0x2802100,0x962460,0x1100131f,0x2802400,0x962460,0x1100131f,0x2806400,0x962460, -0x1100131f,0x4000000,0x200000,0x1100131f,0x6800000,0x1329800,0x1100131f,0x6800100,0x962540,0x1100131f,0x6800100,0x962541,0x1100131f,0x7c00100,0x230400,0x1100131f, -0x7c00100,0x230401,0x11001423,0x2802100,0x962460,0x11001423,0x2806400,0x962460,0x11001423,0x6800100,0x962540,0x11001423,0x6800100,0x962541,0x11001423,0x7c00100, -0x230400,0x11001423,0x7c00100,0x230401,0x11001524,0x2802100,0x962460,0x11001524,0x2802100,0x962461,0x11001524,0x2806400,0x962460,0x11001524,0x6800000,0x1329800, -0x11001524,0x6800100,0x962540,0x11001524,0x7c00100,0x230400,0x11001615,0x2802100,0x962460,0x11001615,0x2806400,0x962460,0x11001615,0x6800000,0x1329800,0x11001615, -0x6800100,0x962540,0x11001615,0x6800100,0x962541,0x11001615,0x7c00100,0x230400,0x1100171a,0x2802100,0x962460,0x1100171a,0x2806400,0x962460,0x1100171a,0x6800000, -0x1329800,0x1100171a,0x6800100,0x962540,0x1100171a,0x6800100,0x962541,0x1100171a,0x7c00100,0x230400,0x11001900,0x4000000,0x1600000,0x11001926,0x2802100,0x1862460, -0x11001926,0x2802400,0x1862460,0x11001926,0x2806100,0x1862460,0x11001926,0x4000000,0x200000,0x11001926,0x4000010,0x400000,0x11001926,0x6800000,0x1329800,0x11001926, -0x7800100,0x1830142,0x11001926,0x7c00100,0x1830000,0x11001926,0x7c00900,0x1830000,0x11001926,0x7e00100,0x1830000,0x11001a18,0x2802100,0x1862460,0x11001a18,0x2802400, -0x1862460,0x11001a18,0x6800000,0x1329800,0x11001a18,0x7800100,0x1830142,0x11001a18,0x7c00100,0x1830000,0x11001a18,0x7c00100,0x1830002,0x11001a18,0x7c00900,0x1830000, -0x11001a18,0x7e00100,0x1830000,0x11001d0c,0x7c00100,0x220400,0x11001d0c,0x7c00100,0x250400,0x11001e12,0x7c00100,0x2230500,0x11001e12,0x7c00100,0x2330520,0x11001e12, -0x7c80100,0x2330520,0x11002619,0x7c00100,0x220401,0x11002619,0x7c00100,0x220402,0x11002619,0x7c00100,0x250401,0x1100270e,0x4000400,0x200001,0x1100270e,0x4000400, -0x200002,0x1100270e,0x4000400,0x500001,0x1100270e,0x7c00100,0x220401,0x1100270e,0x7c00100,0x250401,0x11002800,0x80000,0x918820,0x11002800,0x80000,0x1c18020, -0x11002800,0x180000,0x918820,0x11002800,0x4000001,0x445801,0x11002800,0x4000001,0x445802,0x11002800,0x4000001,0xc4000b,0x11002800,0x6800000,0x201c00,0x11002800, -0x6800020,0x201c00,0x11002800,0x24000000,0x200000,0x11002800,0x24000000,0x200002,0x11002800,0x24000000,0x810000,0x11002800,0x24000000,0x1410000,0x11002800,0x24000000, -0x1500000,0x11002800,0x24000000,0x1500002,0x11002800,0x24000002,0x400000,0x11002800,0x24000006,0xc0000b,0x11002800,0x24000008,0x1410000,0x11002800,0x24000008,0x1710000, -0x11002800,0x24000020,0x1001400,0x11002800,0x24000020,0x1500002,0x11002800,0x2c000010,0x1248000,0x11002800,0x2c000010,0x15248002,0x11002800,0x40000001,0x63b020,0x11002800, -0x40080000,0x918820,0x11002801,0x80000,0xaa65620,0x11002801,0x82000,0x962460,0x11002900,0x4000000,0x20000e,0x11002900,0x4000000,0x20000f,0x11002900,0x4000020, -0x20000e,0x11002900,0x4000020,0x20000f,0x11002900,0x4000020,0x81000e,0x11002900,0x4000020,0x81000f,0x11002900,0x4000020,0x141000e,0x11002900,0x4000020,0x141000f, -0x11002900,0x4000022,0x20000e,0x11002900,0x4000022,0x20000f,0x11002a00,0x4000000,0x1500000,0x11002a00,0x4000000,0x1600000,0x11002a00,0x4000000,0x1600002,0x11002b01, -0x2000,0x962460,0x11002b01,0x2802020,0x962460,0x11002c00,0x4000000,0x200000,0x11002c00,0x4000000,0x200002,0x11002c00,0x4000000,0x20000f,0x11002c00,0x4000020, -0x200000,0x11002c00,0x7c00000,0x200000,0x11002c00,0x7c00020,0x200000,0x11002c00,0x7c00120,0x220405,0x11002c00,0x7c00120,0x230402,0x11002c00,0x7c00120,0x250402, -0x11002c00,0x7c00120,0x250405,0x11002c19,0x7c00100,0x250400,0x11002c19,0x7c00100,0x250401,0x11002d00,0x4000000,0x100006,0x11002d00,0x4000000,0x200006,0x11002d19, -0x7c00100,0x220402,0x11002d19,0x7c00100,0x230400,0x11002d19,0x7c00100,0x250402,0x11002e00,0x24000000,0x200000,0x11002e00,0x24000020,0x200000,0x11002e00,0x24000020, -0x200001,0x11002e00,0x24000020,0x14200000,0x11002f00,0x24000020,0x200000,0x11002f00,0x24000020,0x200001,0x11002f00,0x24000020,0x200002,0x11002f00,0x24000020,0xf00000, -0x11002f00,0x24000020,0x1600000,0x11002f00,0x24000022,0x1600000,0x11003000,0x24000000,0x200000,0x11003000,0x24000000,0x14200000,0x11003000,0x24000020,0x200000,0x11003000, -0x24000020,0x810000,0x11003000,0x24000020,0x1410000,0x11003100,0x24000000,0x200000,0x11003200,0x24000000,0x200000,0x11003300,0x4000000,0x100003,0x11003400,0x24000000, -0x100000,0x11003400,0x24000000,0x200000,0x11003500,0x24000000,0x200000,0x11003600,0x24000000,0x200000,0x11003600,0x24000000,0x14200000,0x11003600,0x24000020,0x200000, -0x11003700,0x24000000,0x200000,0x11003700,0x24000000,0x4200000,0x11003700,0x24000000,0x4e00000,0x11003700,0x24000000,0x14200000,0x11003700,0x24000000,0x14e00000,0x11003700, -0x24000000,0x96800000,0x11003700,0x24000020,0x4200000,0x11003800,0x4000000,0x100000,0x11003800,0x24000000,0x200000,0x11003800,0x24000000,0xb00000,0x11003800,0x24000000, -0x1710000,0x11003800,0x24000000,0x4200000,0x11003800,0x24000000,0x4e00000,0x11003800,0x24000000,0x14200000,0x11003800,0x24000000,0x14b00000,0x11003800,0x24000000,0x14e00000, -0x11003800,0x24000000,0x96800000,0x11005003,0x7c00100,0x220402,0x11005013,0x2802500,0x962460,0x11005013,0x4000020,0x200005,0x11005013,0x7c00100,0x2633801,0x11005013, -0x7c00100,0x2633802,0x11005013,0x7c00100,0x2633805,0x11005019,0x7c00100,0x220402,0x11005100,0x24000000,0x810000,0x11005100,0x24000000,0x1410000,0x11005102,0x7000100, -0x230408,0x11005102,0x7c00100,0x230404,0x11005102,0x7c00100,0x230407,0x11005102,0x7c00100,0x230408,0x11005102,0x7c00100,0x230409,0x11005201,0x2802400,0x962460, -0x11005500,0x80000,0x1e18820,0x11005502,0x7000100,0x230408,0x11005502,0x7c00100,0x230404,0x11005502,0x7c00100,0x230407,0x11005502,0x7c00100,0x230408,0x11005502, -0x7c00100,0x230409,0x11005667,0x1000,0,0x11020200,0x80004,0x418820,0x11020200,0x4000000,0x100006,0x11020200,0x4000000,0x10000f,0x11020200,0x4000400, -0x100002,0x11020200,0x4000400,0x500002,0x11020200,0x6800c00,0x101000,0x11020200,0x24000000,0x100000,0x11020200,0x24000000,0x1400000,0x11020200,0x24000000,0x1500000, -0x11020200,0x24000000,0x1600000,0x11020200,0x24000000,0x14200000,0x11020200,0x24000020,0x100000,0x11020200,0x24000020,0x1600000,0x11020219,0x7c00100,0x12040f,0x11020219, -0x7c00100,0x220400,0x11020219,0x7c00100,0x220401,0x11020219,0x7c00100,0x250400,0x11020319,0x7c00100,0x220400,0x11020319,0x7c00100,0x220401,0x11020319,0x7c00100, -0x220402,0x11020319,0x7c00100,0x250400,0x11020319,0x7c00100,0x250402,0x11020319,0x7d00100,0x220402,0x11020419,0x7c00100,0x220401,0x11020519,0x7c00100,0x220400, -0x11020600,0x4000400,0x100002,0x11020600,0x4000400,0x200400,0x11020600,0x7c00500,0x130400,0x11020600,0x7c00d00,0x130400,0x11020701,0x2802400,0x962460,0x11020701, -0x2802400,0x962461,0x11020701,0x2802400,0xc62460,0x1102080e,0x7c00100,0x220400,0x1102080e,0x7c00100,0x250400,0x11020908,0x7c00100,0x220400,0x11020908,0x7c00100, -0x220401,0x11020908,0x7c00100,0x250400,0x11020908,0x7c00100,0x250401,0x11022800,0x24000000,0x100000,0x11022800,0x24000000,0x200000,0x11022800,0x24000000,0x200002, -0x11022800,0x24000000,0x401000,0x11022800,0x24000000,0xf00002,0x11022800,0x24000000,0xf0ac02,0x11022800,0x24000000,0x1500000,0x11022800,0x24000002,0x100000,0x11022800, -0x24000002,0x370000,0x11022800,0x24000002,0x470000,0x11022800,0x24000006,0x400000,0x11022800,0x24000008,0x1710000,0x11022800,0x24000008,0x1712c00,0x11022800,0x24000020, -0x100000,0x11022800,0x24000020,0x1500000,0x11022800,0x24000020,0x1500002,0x11022900,0x4000000,0x10000e,0x11022900,0x4000000,0x10000f,0x11022919,0x7c00100,0x12040f, -0x11022c00,0x4000000,0x100002,0x11022c00,0x4000000,0x1500002,0x11022c00,0x4000000,0x1600002,0x11022c00,0x4000000,0x1410000f,0x11022c00,0x7c00120,0x120405,0x11022c0e, -0x7c00100,0x250401,0x11022c19,0x7c00100,0x150401,0x11022d00,0x4000000,0x100006,0x11022d00,0x4000000,0x200006,0x11022d19,0x7c00100,0x120402,0x11022d19,0x7c00100, -0x150402,0x11022e00,0x24000000,0x200000,0x11022e00,0x24000020,0x100000,0x11022e00,0x24000020,0x14100000,0x11022f00,0x24000020,0x100000,0x11022f00,0x24000020,0x100001, -0x11022f00,0x24000020,0x100002,0x11023000,0x24000000,0x100000,0x11023300,0x4000000,0x100002,0x11023300,0x4000000,0x100003,0x11023300,0x4000100,0x120403,0x11023300, -0x4000100,0x150403,0x11023300,0x4000100,0x14150403,0x11023400,0x24000000,0x100000,0x11023500,0x24000000,0x100000,0x11023600,0x24000000,0x100000,0x11023600,0x24000020, -0x100000,0x11023600,0x24000020,0x14100000,0x11023700,0x24000000,0x4100000,0x11023700,0x24000000,0x4e00000,0x11023700,0x24000000,0x14100000,0x11023700,0x24000000,0x14e00000, -0x11023700,0x24000020,0x100000,0x11023700,0x24000020,0x4100000,0x11023700,0x24000020,0x14100000,0x11023800,0x4000000,0x100000,0x11023800,0x24000000,0x200000,0x11024e67, -0,0,0x11025600,0x4000000,0x100000,0x11042a00,0x4000000,0x1600000,0x11045700,0x4000000,0x20000a,0x11045700,0x4000020,0x20000a,0x11045712,0x7c00100, -0xe3040a,0x11045712,0x7c80100,0xe3040a,0x11045716,0x7c00100,0xe30c0a,0x11045716,0x7c00100,0x2530c0a,0x11063d00,0x4000001,0x445811,0x11065700,0x4000000,0x810011, -0x11065700,0x4000000,0xe00011,0x11065700,0x4000000,0x1410011,0x11065700,0x4000000,0x1500011,0x11065700,0x4000000,0x1600011,0x11065700,0x4000006,0xe70011,0x11065700, -0x4000008,0xe00011,0x11065700,0x4000008,0xe02c11,0x11065700,0x4000010,0x871411,0x11065700,0x4000010,0x1201411,0x11065700,0x4000010,0x1271011,0x11065700,0x4000020, -0xe00011,0x11065700,0x4000400,0xe00011,0x11065700,0x4000420,0xe00011,0x11065700,0x6800000,0xe01c11,0x11065700,0x6800040,0xe00011,0x11065700,0xc000010,0x80ac11, -0x11065700,0xc000010,0xb48011,0x11065719,0x7c00100,0xe20411,0x11065719,0x7c00100,0xe50411,0x11065719,0x7c00140,0xe20411,0x11065719,0x7c00140,0xe50411,0x11080100, -0x6800000,0x201c00,0x11080100,0x68000c0,0x19329800,0x11080100,0x24000000,0x200000,0x11080100,0x24000000,0x810000,0x11080100,0x24000000,0x1410000,0x11080100,0x24000000, -0x1500000,0x11080100,0x24000000,0x1600000,0x11080100,0x24000000,0x1b00000,0x11080100,0x24000000,0x2410000,0x11080100,0x24000000,0x18200000,0x11080100,0x24000006,0xd70000, -0x11080100,0x24000008,0x1713c00,0x11080100,0x24000008,0x1714000,0x11080100,0x24000010,0x1001400,0x11080100,0x24000010,0x1071000,0x11080100,0x24000010,0x1071400,0x11080100, -0x24000020,0x200000,0x11080100,0x24000020,0x400000,0x11080100,0x24000020,0x1600000,0x11080100,0x24000400,0x200000,0x11080100,0x24000420,0x200000,0x11080100,0x2c000010, -0xb48000,0x11080100,0x2c000010,0x100ac00,0x11080100,0x44000001,0x1a45800,0x11080119,0x7c00100,0x220400,0x11080119,0x7c00100,0x250400,0x11080119,0x7c001c0,0x220400, -0x11080119,0x7c001c0,0x250400,0x11080200,0x4000400,0x200002,0x11080200,0x24000000,0x200000,0x11080200,0x24000000,0x1500000,0x11080200,0x24000000,0x1600000,0x11080200, -0x24000020,0x200000,0x110a1e12,0x7c00100,0x2130480,0x110a1e12,0x7c80100,0x2130480,0x110a3000,0x24000000,0x34e00000,0x110a3000,0x24100000,0x810001,0x110a3000,0x24100000, -0x1410001,0x110a3700,0x24000000,0x34200000,0x110a3d00,0x4000000,0xe00000,0x110a3d00,0x4000000,0xe00002,0x110a3d00,0x24000000,0xe00000,0x110a3d11,0x7c00300,0xe30000, -0x110a3d11,0x7c00900,0x1230400,0x110a3d12,0x2802400,0x962460,0x110a3e14,0x7c00100,0xe30000,0x110a3e14,0x7c00100,0xe30001,0x110a3e14,0x7c00100,0x2530000,0x110a3e14, -0x7c00900,0x1230000,0x110a3e14,0x7c00900,0x1230001,0x110a3f16,0x7c00100,0xe30c00,0x110a3f16,0x7c00100,0xe30c01,0x110a3f16,0x7c00100,0x2530c00,0x110a3f16,0x7c00900, -0x1230c00,0x110a3f16,0x7c00900,0x1230c01,0x110a4005,0x7c00100,0xe30400,0x110a4112,0x7c00100,0xe30402,0x110a4112,0x7c80100,0xe30402,0x110a4400,0x4000000,0xe00000, -0x110a4412,0x4000000,0xe00002,0x110a4412,0x4000000,0xe00003,0x110a4416,0x4000000,0xe00c03,0x110a4500,0x4000000,0xe0000d,0x110a4516,0x4000000,0xe00c0d,0x110a4711, -0x7c40300,0xe30000,0x110a4f11,0x7c00300,0xe30001,0x110a4f11,0x7c40300,0xe30000,0x110a5300,0x4000000,0x810010,0x110a5300,0x4000000,0xe00002,0x110a5300,0x4000000, -0xe00010,0x110a5300,0x4000000,0x1410010,0x110a5300,0x4000002,0xe70010,0x110a5300,0x4000008,0x810010,0x110a5300,0x4000008,0x1410010,0x110a5300,0x6800000,0xe01c02, -0x110a5300,0x6800000,0xe01c10,0x110a5400,0x4000000,0x81000c,0x110a5400,0x4000000,0xe0000c,0x110a5400,0x4000000,0x141000c,0x110a5400,0x4000000,0x150000c,0x110a5400, -0x4000000,0x160000c,0x110a5400,0x4000002,0xe7000c,0x110a5400,0x4000010,0x87140c,0x110a5400,0x4000010,0xe7000c,0x110a5400,0x4000010,0x120140c,0x110a5400,0x4000010, -0x127100c,0x110a5400,0x4000020,0xe0000c,0x110a5400,0x4000026,0xe7000c,0x110a5400,0xc000010,0x80ac0c,0x110a5400,0xc000010,0xb4800c,0x11400a0c,0xc000010,0x1049400, -0x11400c0e,0x4000010,0xb00000,0x11400c0e,0x4000010,0x1071400,0x11400c0e,0xc000010,0xb48000,0x11400c17,0x7c00900,0x230400,0x11400f42,0xc000010,0x448000,0x11400f54, -0xc000010,0x448000,0x11401d83,0x4000000,0x200000,0x11403dab,0x4000000,0xe00000,0x114457a0,0x4000004,0x120000a,0x114457a0,0x4000008,0x81000a,0x114457a0,0x4000008, -0x141000a,0x114457a0,0x4000010,0x87000a,0x114457a0,0xc000010,0x84800a,0x114457a9,0x3802500,0x126246a,0x114457a9,0x7c00d00,0x2530c0a,0x114a3da0,0x24000000,0x810000, -0x114a3da0,0x24000000,0x1410000,0x114a3da0,0x24000008,0x810000,0x114a3da0,0x24000008,0x1410000,0x114a3da0,0x24000010,0x870000,0x114a3da0,0x2c000010,0x848000,0x114a3da6, -0x4000000,0xe00000,0x114a3da6,0x24000000,0xe00000,0x114a3da6,0x24000002,0x1200000,0x114a3da6,0x24000002,0x14e00000,0x114a3da6,0x24000008,0x810000,0x114a3da6,0x24000008, -0x1410000,0x114a3da9,0x7c00900,0x930c00,0x114a3da9,0x7c00900,0xe30c00,0x114a3dab,0x7c00300,0xe30000,0x114a3ea9,0x7000400,0x1200c02,0x114a3fa0,0x4000004,0x1200000, -0x114a3fa9,0x7c00d00,0x2530c00,0x114a42ab,0x4000000,0xe00000,0x114a42ab,0x4000000,0xe0000f,0x114a44ab,0x4000000,0xe00002,0x114a44ab,0x4000000,0xe00003,0x114a44ab, -0x4000000,0x14e00003,0x114a45ab,0x4000000,0xe00002,0x114a45ab,0x4000000,0xe0000d,0x1180090a,0x2802400,0x962460,0x11800c1f,0x2802100,0x962460,0x11800c1f,0x2802500, -0x962460,0x11800f29,0x2802400,0x962460,0x11800f36,0x2802400,0x962460,0x11820700,0x2802400,0x962460,0x11820700,0x2802500,0x962460,0x118a3dac,0x2802400,0x962460, -0x118a3ea9,0x2802400,0x962460,0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c23,0x6800000,0x1329800,0x11c00c27,0xc000010,0xb48000,0x11c00f6b, -0x6800000,0x1329800,0x11c01070,0x6800000,0x1329800,0x11c01174,0x6800000,0x1329800,0x11c01278,0x6800000,0x1329800,0x11c0147c,0x4000000,0x200000,0x11c0147c,0x6800000, -0x1329800,0x11c05123,0x7c00100,0x230408,0x20000067,0x1000,0,0x20000b13,0x2802400,0x962460,0x20000b13,0x2802500,0x962460,0x20001b27,0x2802100,0x962460, -0x20001b27,0x2802100,0x962461,0x20001b27,0x2802400,0x962460,0x20001b27,0x2806400,0x962460,0x20001b27,0x2902100,0x962462,0x20001b27,0x4000000,0x200000,0x20001b27, -0x4000000,0x400000,0x20001b27,0x4000000,0x500000,0x20001b27,0x4000000,0x810000,0x20001b27,0x4000000,0xb00000,0x20001b27,0x4000000,0xc0000b,0x20001b27,0x4000000, -0x1410000,0x20001b27,0x4000010,0xb00000,0x20001b27,0x4000010,0xc00000,0x20001b27,0x6800000,0x1329800,0x20001b27,0x6800100,0x462540,0x20001b27,0x6800400,0x962540, -0x20001b27,0x7c00100,0x230400,0x20001b27,0x7c00100,0x230401,0x20002619,0x7c00100,0x220401,0x20002a00,0x4000000,0x1600000,0x20004b67,0,0x1900020,0x20004c67, -0,0x1900020,0x20004d67,0,0x1900020,0x20006d67,0x1000,0,0x20006e67,0x1000,0,0x20026d67,0,0,0x20026e67,0, -0,0x200a4a12,0x7c00100,0x1f304c1,0x200a4a12,0x7c00100,0x20304e1,0x21005600,0x4000000,0x700000,0x21022a00,0x4000000,0x1600000,0x30000419,0x7c00100,0x220400, -0x30000419,0x7c00100,0x220401,0x30000419,0x7c00100,0x250400,0x30000419,0x7c00100,0x250401,0x30000519,0x7c00100,0x220400,0x30000600,0x4000400,0x200400,0x30000600, -0x7c00500,0x230400,0x30000605,0x4000400,0x200000,0x3000080e,0x7c00100,0x220400,0x30000908,0x2000,0x962460,0x30000908,0x7c00100,0x220400,0x30000908,0x7c00100, -0x220401,0x30000908,0x7c00100,0x250400,0x30000908,0x7c00100,0x250401,0x30000a03,0x4000006,0x400000,0x30000c02,0x4000000,0x200000,0x30000c02,0x7c00100,0x230400, -0x30000d22,0x2802100,0x962460,0x30000d22,0x2802400,0x962460,0x30000d22,0x2802500,0x962460,0x30000d22,0x4000000,0x200000,0x30000d22,0x4000010,0x200000,0x30000d22, -0x7c00100,0x230400,0x30000d22,0xc000010,0x248000,0x30000d22,0x80000000,0x218960,0x30000e25,0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821,0x2802100, -0x962460,0x30001821,0x2806400,0x962460,0x30001821,0x4000000,0x200000,0x30001821,0x6800100,0x962540,0x30001821,0x6800100,0x962541,0x30001821,0x7c00100,0x230400, -0x30001b27,0x2802100,0x962460,0x30001b27,0x2802400,0x962460,0x30001b27,0x4000000,0x200000,0x30001b27,0x4000000,0x400000,0x30001b27,0x7c00100,0x230400,0x30001c1c, -0x2802100,0x1862460,0x30001c1c,0x2802400,0x1862460,0x30001c1c,0x2806400,0x1862460,0x30001c1c,0x4000000,0x200000,0x30001c1c,0x6800100,0x1862400,0x30001c1c,0x6800100, -0x1862540,0x30001c1c,0x7c00100,0x1830000,0x30001c1c,0x7c00100,0x1830001,0x30001c1c,0xc000010,0x448000,0x30001f0b,0x4000000,0x200000,0x30001f0b,0x4000010,0x200000, -0x30001f0b,0x4000010,0x400000,0x30001f0b,0x6800000,0x200000,0x30001f0b,0x7c00100,0x230400,0x30001f0b,0xc000010,0x248000,0x30002006,0x7c00100,0x250400,0x30002128, -0x4000010,0x200000,0x30002128,0x7c00100,0x230400,0x30002128,0xc000010,0x248000,0x3000221d,0x4000000,0x810000,0x3000221d,0x4000000,0x1410000,0x3000221d,0x4000001, -0x445800,0x3000221d,0x7c00100,0x230400,0x30002300,0x4000010,0x400000,0x30002320,0x7c00100,0x230400,0x30002417,0x2802100,0x1862460,0x30002417,0x2802400,0x1862460, -0x30002417,0x2806400,0x1862460,0x30002417,0x2882000,0x1862460,0x30002417,0x4000000,0x200000,0x30002417,0x4000000,0x400000,0x30002417,0x4000000,0x1600000,0x30002417, -0x4000010,0x400000,0x30002417,0x4000010,0x1200000,0x30002417,0x6800000,0x1329800,0x30002417,0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417,0x7d00100, -0x1830000,0x3000251b,0x80000,0xc18820,0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100,0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006,0x500000, -0x3000251b,0x4000010,0x400000,0x3000251b,0x4000010,0xb70000,0x3000251b,0x4000800,0x200000,0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400,0x3000251b, -0x7c00900,0x230400,0x3000251b,0xc000010,0xb48000,0x3000251b,0x12882000,0x962460,0x30002800,0x4000001,0xc41c0b,0x30002800,0x24000000,0x200000,0x30002800,0x2c000010, -0x1248002,0x30002800,0x2c000010,0x15248002,0x30002a00,0x4000000,0x1600000,0x30002b01,0x2000,0x962460,0x30002b01,0x2000,0x8962460,0x30002c00,0x4000000,0x200000, -0x30002c00,0x7c00100,0x14220405,0x30002d19,0x7c00100,0x250400,0x30002e00,0x24000000,0x200000,0x30003000,0x24000000,0x200000,0x30003000,0x24000000,0x4200000,0x30003100, -0x24000000,0x200000,0x30003600,0x24000000,0x200000,0x30003700,0x24000000,0x4200000,0x3000392e,0x24000000,0x200000,0x30005013,0x7c00100,0x2633801,0x30005600,0, -0x918820,0x30020600,0x4000400,0x500400,0x30020701,0x2802400,0x962460,0x30020701,0x2802400,0xc62460,0x300a3a11,0x4020000,0xe00000,0x300a3a11,0x4020000,0xe00002, -0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000,0xe00000,0x300a3c00,0x4010000,0xe00000,0x300a3d11,0x7c00300,0xe30002,0x300a4305,0x7c00100,0xe30400,0x300a4611, -0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400,0x300a4829,0x7c00900,0x1230400,0x300a4929,0x4000000,0xe00000,0x30402589,0x4000010,0x400000,0x30402589,0x4000010, -0xb70000,0x30402589,0xc000010,0xb48000,0x304a3dab,0x4000000,0xe00000,0x30800c1f,0x2802100,0x962460,0x30c01c81,0x6800000,0x1329800,0x3100080e,0x7c00120,0x220402, -0x3100080e,0x7c00120,0x250402,0x31005167,0x1000,0,0x3100581e,0x4000000,0x200000,0x3100581e,0x7c00100,0x230400,0x3100590d,0x7c00100,0x230400,0x31005a09, -0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400,0x31005b00,0x4000000,0x200000,0x31005c00,0x80000,0x918820,0x31005c00,0x2802000,0x962460,0x31005c00,0x2802400, -0x962460,0x31005c00,0x4000000,0x200000,0x31005c00,0x4000000,0x200001,0x31005c00,0x6800000,0x962540,0x31005c00,0x6800400,0x962540,0x31005c01,0x2802400,0x962460, -0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020,0x1329805,0x31005d00,0x7c00120,0x220405,0x31005d00,0x7c00120,0x250405,0x31006000,0x82000,0x8962460,0x31006000, -0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000,0x310a5f11,0x7c00300,0xe30001,0x32000419,0x7c00100,0x250400,0x3200080e,0x4000020,0x200000,0x3200080e,0x7c00100, -0x220400,0x3200080e,0x7c00100,0x250400,0x32000908,0x7c00100,0x220400,0x32000908,0x7c00100,0x250400,0x32000c02,0x7c00100,0x230400,0x32000e25,0x7c00100,0x230400, -0x32001d0c,0x7c00100,0x220400,0x32002800,0x80000,0x1e18820,0x32002800,0x80020,0x218820,0x32002800,0x4000001,0x445802,0x32002800,0x24000000,0x200000,0x32002800, -0x24000000,0x200002,0x32002800,0x24000020,0x200000,0x32002800,0x2c000010,0x1248002,0x32002919,0x7c00100,0x22040f,0x32002a00,0x4000000,0x1600000,0x32002b01,0x2000, -0x962460,0x32002b01,0x2802000,0x962460,0x32002b01,0x2802020,0x962460,0x32002c00,0x4000000,0x200000,0x32002c00,0x4000020,0x200000,0x32002c00,0x4000020,0x200005, -0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120,0x250405,0x32002e00,0x24000020,0x200000,0x32002f00,0x24000020,0x200000,0x32003000,0x24000000,0x200000,0x32003000, -0x24000020,0x200000,0x32003500,0x24000000,0x200000,0x32003600,0x24000020,0x200000,0x32003600,0x24000020,0x14200000,0x32003700,0x24000000,0x200000,0x32003700,0x24000000, -0x4100000,0x32003700,0x24000000,0x4200000,0x32003700,0x24000000,0x14200000,0x32003800,0x24000000,0x810000,0x32003800,0x24000000,0x1410000,0x32005102,0x4000000,0x1500008, -0x32005502,0x7c00100,0x230400,0x32006108,0x7c00100,0x220400,0x32006108,0x7c00100,0x250400,0x3200622a,0x2802100,0x962460,0x3200622a,0x2806000,0x962460,0x3200622a, -0x7c00100,0x230400,0x3200632b,0x2802100,0x962460,0x3200632b,0x2806000,0x962460,0x3200632b,0x7c00100,0x230400,0x3200642c,0x2802100,0x962460,0x3200642c,0x7c00100, -0x230400,0x3200652d,0x2802100,0x962460,0x3200652d,0x7c00100,0x230400,0x32006600,0x24000020,0x200000,0x32006700,0x24000020,0x200000,0x32006800,0x24000020,0x200000, -0x32006800,0x24000020,0x14200000,0x32006900,0x24000020,0x200000,0x32006900,0x24000020,0x810000,0x32006900,0x24000020,0x1410000,0x32006a00,0x24000020,0x200000,0x32006a00, -0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701,0x2882000,0xc62460,0x32023300,0x4000000,0x100000,0x32026c01,0x12882000,0x962460,0x32026c01,0x12882000, -0x8962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020,0x810000,0x32086600,0x24000020,0x1410000,0x32086900,0x24000020,0x810000, -0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x34200000,0x320a3d11,0x7c00100,0x1230400,0x320a3e14,0x7c00100,0xe30010,0x320a3e14,0x7c00100,0x2530000,0x320a3f16, -0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11,0x7c00300,0xe30001,0x320a6b16,0x7c00100,0x2530c00,0x32406385,0xc000010, -0x448000,0x324a3dae,0x4000000,0x14e00000,0x324a3dae,0x7c00100,0x1230400,0x324a3fa9,0x4000002,0x1200c00,0x324a53a6,0x24000000,0xe00000,0x32820701,0x2802000,0x962460, -0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200400,0x4000080e,0x7c00100,0x220400,0x4000080e,0x7c00100,0x250400,0x4000080e, -0x7c00100,0x250402,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460,0x40000c02,0x2802500,0x962460,0x40000c02,0x4000000,0x200000,0x40000c02,0x4000000, -0x1071400,0x40000c02,0x7c00100,0x230400,0x40000c02,0x80000000,0x218960,0x40000d22,0x7c00100,0x230400,0x40000f0a,0x7c00100,0x230400,0x40001004,0x7c00100,0x230400, -0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460,0x4000120f,0x4000000,0x1600000,0x4000120f,0x7c00100,0x230400,0x4000131f, -0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615,0x2802400,0x962460,0x40001615,0x7c00100,0x230400,0x40002417,0x2802400, -0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002,0x200000,0x40002c00,0x4000000,0x200002,0x40003000,0x24000000,0x14200000, -0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40003700,0x24000000,0x4200000,0x40003700,0x24000000,0x14200000,0x40005a09,0x7c00100,0x220400,0x40005a09, -0x7c00100,0x250400,0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30,0x2802400,0x962460,0x40006f30,0x4000000,0x200000,0x40006f30,0x6800000, -0x1329800,0x40006f30,0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010,0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117,0x4000000,0x200000, -0x40007208,0x7c00100,0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e,0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100,0x220400,0x40007219, -0x7c00500,0x220400,0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300,0x24000000,0x200000,0x40007300,0x24000000,0x14200000,0x40007400,0x4000000, -0x200000,0x40007531,0x7c00100,0x230400,0x40007631,0x7c00100,0x230400,0x40007835,0x4000010,0x400000,0x40007835,0x7c00100,0x230400,0x40007933,0x7c00100,0x230400, -0x40007a32,0x6800000,0x1329800,0x40007a32,0x7c00100,0x230400,0x40007b2f,0x7c00100,0x230400,0x40007c00,0x4000000,0x200000,0x40020701,0x2802400,0x962460,0x40020701, -0x2802400,0xc62460,0x40023300,0x4000000,0x200000,0x40027d01,0x12882000,0x962460,0x400a3700,0x24000000,0x34200000,0x400a3700,0x24000000,0x34e00000,0x400a4400,0x4000000, -0xe0000d,0x400a4412,0x4000000,0xe00002,0x400a4412,0x4000000,0xe00003,0x400a4500,0x4000000,0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300,0x4000000,0x1410010, -0x404077e0,0x4000000,0x200000,0x404077e3,0x4000000,0x200000,0x404077e3,0x4000000,0x400000,0x40c0147c,0x4000000,0x200000,0x40c05123,0x4000000,0x200000,0x41000419, -0x7c00100,0x220400,0x41000419,0x7c00100,0x250400,0x4100080e,0x7c00100,0x220400,0x4100080e,0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908,0x7c00100, -0x250400,0x41000b13,0x2802000,0x962460,0x41000b13,0x2802100,0x962460,0x41000b13,0x4000000,0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000,0x1500000, -0x41000c02,0xc000010,0xb48000,0x41000f0a,0x7c00100,0x230400,0x41001004,0x7c00100,0x230400,0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000,0x41001d0c, -0x7c00100,0x220400,0x41001d0c,0x7c00100,0x23040f,0x41001f0b,0x2802100,0x962460,0x41001f0b,0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800,0x24000000, -0x200000,0x41002800,0x24000000,0x400000,0x41002919,0x7c00100,0x22040e,0x41002a00,0x4000000,0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000,0x200000, -0x41002c00,0x7c00120,0x220405,0x41003000,0x24000000,0x200000,0x41003700,0x24000000,0x4200000,0x41003700,0x24000000,0x14200000,0x41003700,0x24000000,0x14e00000,0x41005d00, -0x7c00120,0x220405,0x41006600,0x24000020,0x200000,0x41006600,0x24000020,0x810000,0x41006600,0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f,0x41007219,0x7c00100, -0x220400,0x41007300,0x24000000,0x200000,0x41007e0e,0x2802000,0x962460,0x41007e0e,0x4000000,0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e,0x7c00100,0x230400, -0x41008002,0x7c00100,0x230400,0x41008137,0x2802100,0x962460,0x41008137,0x4000000,0x200000,0x41008137,0x6800100,0x962540,0x41008137,0x7c00100,0x230400,0x41008301, -0x2802000,0x962460,0x41008407,0x4000000,0x200000,0x41008407,0x4000000,0x400000,0x41008407,0x4000000,0xb00000,0x41008407,0x7c00100,0x220400,0x41008407,0x7c00100, -0x250400,0x4100850b,0x7c00100,0x230400,0x4100860b,0x4000000,0x200000,0x4100860b,0x7c00100,0x230400,0x4100870c,0x7c00100,0x220400,0x41008838,0x7c00100,0x220400, -0x41008838,0x7c00100,0x250400,0x41008939,0x2802000,0x962460,0x41008939,0x2802100,0x962460,0x41008939,0x2806000,0x962460,0x41008939,0x4000000,0x200000,0x41008939, -0x4000000,0x400000,0x41008939,0x7c00100,0x230400,0x41008939,0xc000010,0x448000,0x41008a00,0x4000000,0x200000,0x41008b3b,0x4000000,0x1800000,0x41008b3b,0x6800000, -0x1329800,0x41008b3b,0x7c00100,0x1830000,0x41008b3b,0x7e00100,0x1830000,0x41008c3d,0x4000010,0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e,0x7c00100,0x22040f, -0x41008d19,0x7c00100,0x220400,0x41008d19,0x7c00100,0x22040f,0x41008e00,0x24000000,0x200000,0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000,0x1710000,0x41008e00, -0x24000006,0x400000,0x41008f3a,0x2802000,0x962460,0x41008f3a,0x2802100,0x962460,0x41008f3a,0x2806000,0x962460,0x41008f3a,0x4000000,0x200000,0x41008f3a,0x6800100, -0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c,0x7c00100,0x230400,0x4100903c,0x7c00100,0x23040f,0x41020701,0x2802000,0x962460,0x41020701,0x2802000,0xc62460, -0x410a3700,0x24000000,0x34200000,0x410a3700,0x24000000,0x34e00000,0x410a4412,0x4000000,0xe00003,0x410a4711,0x7c40300,0xe30000,0x410a4f11,0x7c00300,0xe30001,0x410a9100, -0x4000000,0x800010,0x410a9100,0x4000000,0x810010,0x410a9100,0x4000000,0x870010,0x410a9100,0x4000000,0xb00010,0x410a9100,0x4000000,0xf00010,0x410a9100,0x4000000, -0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100,0x4000000,0x1071410,0x410a9100,0x4000000,0x1410010,0x414a82ab,0x4000000,0xe00000,0x41808300,0x2802000,0x962460, -0x41c0147c,0x6800000,0x1329800,0x50000419,0x7c00100,0x220400,0x50000419,0x7c00100,0x250400,0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100,0x220400,0x50000908, -0x7c00100,0x250400,0x50000b13,0x2802500,0x962460,0x50000f0a,0x7c00100,0x230400,0x50001615,0x2802100,0x962460,0x50001615,0x7c00100,0x230400,0x50002b01,0x2802020, -0x962460,0x50002c00,0x4000000,0x200000,0x50002c19,0x7c00100,0x220400,0x50002d19,0x7c00100,0x220400,0x50003000,0x24000000,0x200000,0x50003000,0x24000020,0x200000, -0x50003700,0x24000000,0x4200000,0x50005d00,0x7c00120,0x220405,0x50005d00,0x7c00120,0x250405,0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100,0x250400,0x50006600, -0x24000020,0x200000,0x50007300,0x24000000,0x200000,0x50008301,0x2802400,0x962460,0x50008a00,0x7c00500,0x230400,0x50009257,0x2802400,0x962460,0x50009257,0x4000000, -0x200000,0x50009257,0x4000010,0x1071400,0x50009257,0x6800000,0x1329800,0x50009257,0x7c00100,0x230400,0x50009257,0x7c00500,0x230400,0x50009257,0x7c00900,0x230400, -0x50009257,0xc000010,0xb48000,0x5000933e,0x2802100,0x962460,0x5000933e,0x2802400,0x962460,0x5000933e,0x4000000,0x200000,0x5000933e,0x4000000,0x400000,0x5000933e, -0x4000010,0x400000,0x5000933e,0x6800000,0x1329800,0x5000933e,0x6800100,0x962540,0x5000933e,0x6800100,0x962541,0x5000933e,0x6804400,0x962540,0x5000933e,0x7c00100, -0x230400,0x5000933e,0x7c00100,0x230401,0x5000933e,0xc000010,0x448000,0x50009419,0x7c00100,0x220400,0x50009419,0x7c00100,0x250400,0x50009500,0x4000400,0x200400, -0x5000965a,0x4000000,0x500000,0x5000965a,0x7c00100,0x230400,0x5000965a,0xc000010,0xb48000,0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010,0x400000,0x5000975b, -0x7c00100,0x230400,0x50009865,0x7c00100,0x230400,0x50009965,0x4000010,0x400000,0x50009965,0x7c00100,0x230400,0x50409aab,0x4000000,0x200000,0x5100080e,0x7c00100, -0x220400,0x5100080e,0x7c00100,0x250400,0x51000c02,0x2802100,0x962460,0x51000c02,0x4000000,0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02,0x7c00100,0x230400, -0x51000f0a,0x7c00100,0x230400,0x51000f0a,0x7c00500,0x230400,0x51001110,0x2802100,0x962460,0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100,0x230400,0x51001524, -0x2802100,0x962460,0x51001524,0x4000000,0x200000,0x51001524,0x7c00100,0x230400,0x5100171a,0x2802100,0x962460,0x5100171a,0x4000000,0x200000,0x5100171a,0x4000000, -0x1500000,0x5100171a,0x7c00100,0x230400,0x51001b27,0x4000000,0x200000,0x51001b27,0x4000000,0x400000,0x51001b27,0x4000000,0x500000,0x51001b27,0x7c00100,0x230400, -0x51001c1c,0x2802100,0x1862460,0x51001c1c,0x2802400,0x1862460,0x51001c1c,0x2806400,0x1862460,0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000,0x1329800,0x51001c1c, -0x6800000,0x1862400,0x51001c1c,0x6800100,0x1862400,0x51001c1c,0x6800100,0x1862540,0x51001c1c,0x6800400,0x1862400,0x51001c1c,0x7c00100,0x1830000,0x5100251b,0x7c00100, -0x230400,0x51002619,0x7c00100,0x220400,0x51002619,0x7c00100,0x250400,0x51002800,0x80020,0x218820,0x51002c00,0x4000000,0x200000,0x51002d19,0x7c00100,0x230400, -0x51003700,0x24000000,0x4200000,0x51003700,0x24000000,0x4e00000,0x51005201,0x2802400,0x962460,0x51005c00,0x4000000,0x200000,0x51006108,0x7c00100,0x220400,0x51006108, -0x7c00100,0x250400,0x51006600,0x24000020,0x200000,0x51006600,0x24000020,0x810000,0x51006600,0x24000020,0x1410000,0x51007300,0x24000000,0x200000,0x51007300,0x24000020, -0x200000,0x51008002,0x7c00100,0x230400,0x51008301,0x2802000,0x962460,0x51008301,0x2802400,0x962460,0x51008a00,0x7c00500,0x230400,0x51008e00,0x24000000,0x200000, -0x51008e00,0x24000000,0x400000,0x51008e00,0x24000000,0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000,0x51008e00,0x24000000,0x1710000,0x51008e00, -0x24000002,0x200000,0x51008e00,0x24000500,0x230400,0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419,0x7c00100,0x22040e,0x51009419,0x7c00100, -0x22040f,0x51009419,0x7c00100,0x250400,0x51009500,0x4000000,0x200400,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100,0x220400,0x51009519,0x7c00100,0x22040f, -0x51009519,0x7c00100,0x230400,0x51009519,0x7c00100,0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800,0x51009b71,0x6800100,0x962540,0x51009b71, -0x6804400,0x962540,0x51009b71,0x7c00100,0x230400,0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52,0x2802c00,0x962460,0x51009c52,0x4000010, -0x400000,0x51009c52,0x6800000,0x1329800,0x51009c52,0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010,0x448000,0x51009d6d,0x6800000,0x1329800, -0x51009d6d,0x7c00100,0x230400,0x51009d6d,0x7c00500,0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000,0x51009e08,0x2802100,0x962460,0x51009f63, -0x4000010,0x400000,0x51009f63,0x6800000,0x1329800,0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63,0xc000010,0x448000,0x51009f63,0xc000010, -0xb48000,0x5100a008,0x2000,0x962460,0x5100a008,0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100,0x220400,0x5100a008,0x7c00100,0x230400, -0x5100a008,0x7c00100,0x250400,0x5100a008,0x7c00500,0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800,0x5100a16f,0x6800100,0x962540,0x5100a16f, -0x7c00100,0x230400,0x5100a16f,0xc000010,0x448000,0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f,0x6800000,0x1329800,0x5100a24f,0x7c00100, -0x230400,0x5100a24f,0xc000010,0x448000,0x5100a36e,0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100,0x962540,0x5100a36e,0x6804400,0x962540, -0x5100a36e,0x7c00100,0x230400,0x5100a442,0x2802100,0x962460,0x5100a442,0x4000000,0x200000,0x5100a442,0x6800000,0x1329800,0x5100a442,0x6800100,0x962540,0x5100a442, -0x7c00100,0x230400,0x5100a442,0xc000010,0x448000,0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601,0x2802000,0x962460,0x5100a76b,0x7c00100, -0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c,0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000,0x4e00000,0x5100ab00,0x4000000,0x4e00000, -0x51086600,0x24000020,0x810000,0x51086600,0x24000020,0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000,0x510a7300,0x24000000,0x34200000,0x510aaa00, -0x4000000,0x34e00000,0x5140a2db,0x4000400,0x400000,0x514a82ab,0x4000000,0xe00000,0x51802b9d,0x2802000,0x962460,0x51c00908,0x2802400,0x962460,0x51c0a008,0x2802400, -0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a,0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000,0x1600000,0x52001b00,0x4000000,0x200000, -0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100,0x1862400,0x52001c1c,0x6800400,0x1862400,0x52001e12,0x7c00100,0x2230500,0x52001e12,0x7c00100,0x2330520,0x52002128, -0x4000002,0x400000,0x52002128,0x7c00100,0x230400,0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00,0x4000000,0x200006,0x52003000,0x24000000, -0x200000,0x52006108,0x7c00100,0x220400,0x52006108,0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400,0x962460,0x52008407,0x7c00100,0x220400, -0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000,0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000,0x52009419,0x7c00100,0x250400,0x5200975b, -0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460,0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e,0x4000010,0x200000,0x5200ac7e,0x7c00100, -0x230400,0x5200ac7e,0xc000010,0x248000,0x5200ad28,0x7c00100,0x230400,0x5200ae6a,0x2802100,0x1862460,0x5200ae6a,0x2802400,0x962460,0x5200ae6a,0x2802400,0x1862460, -0x5200ae6a,0x2806000,0x1862460,0x5200ae6a,0x4000000,0x1800000,0x5200ae6a,0x6800000,0x1329800,0x5200ae6a,0x6800100,0x1862400,0x5200ae6a,0x6800100,0x1862540,0x5200ae6a, -0x7c00100,0x1830000,0x5200ae6a,0x7c00900,0x1830000,0x5200ae6a,0xc000010,0x1848000,0x5200b083,0x4000010,0x400000,0x5200b083,0x7c00100,0x230400,0x5200b083,0xc000010, -0x448000,0x5200b182,0x2802400,0x962460,0x5200b182,0x4000000,0x200000,0x5200b182,0x4000010,0x400000,0x5200b182,0x7c00100,0x230400,0x5200b182,0xc000010,0x448000, -0x5200b30a,0x2802400,0x962460,0x5200b30a,0x4000000,0x200000,0x5200b30a,0x7c00100,0x230400,0x5200b54e,0x2802100,0x962460,0x5200b54e,0x2802400,0x962460,0x5200b54e, -0x4000000,0x200000,0x5200b54e,0x4000010,0x400000,0x5200b54e,0x6800000,0x1329800,0x5200b54e,0x6800100,0x962540,0x5200b54e,0x6804400,0x962540,0x5200b54e,0x7c00100, -0x230400,0x5200b54e,0xc000010,0x448000,0x5200b61c,0x4000000,0x1800000,0x5200b61c,0x6800400,0x1862400,0x5200b61c,0x7c00100,0x1830000,0x5200b61c,0x7c00900,0x1830000, -0x5200b77f,0x2802100,0x1862460,0x5200b77f,0x2802400,0x1862460,0x5200b77f,0x4000000,0x1800000,0x5200b77f,0x4000010,0x1800000,0x5200b77f,0x7c00100,0x1830000,0x5200b77f, -0x7c00500,0x1830000,0x5200b77f,0x7c00900,0x1830000,0x5200b77f,0x7e00100,0x1830000,0x5200b873,0x2802100,0x962460,0x5200b873,0x2806400,0x962460,0x5200b873,0x6800000, -0x1329800,0x5200b873,0x6800100,0x962540,0x5200b873,0x6800400,0x962540,0x5200b873,0x7c00100,0x230400,0x5200b873,0xc000010,0x448000,0x5200b912,0x7c00100,0x2230500, -0x5200b912,0x7c00100,0x2330520,0x5200ba74,0x4000000,0x200000,0x5200ba74,0x4000010,0x400000,0x5200ba74,0x7c00100,0x230400,0x5200bb85,0x4000000,0x200000,0x5200bb85, -0x7c00100,0x230400,0x5200bc75,0x4000000,0x400000,0x5200bc75,0x4000010,0x400000,0x5200bc75,0x7c00100,0x230400,0x5200bd7d,0x4000000,0x200000,0x5200bd7d,0x7c00100, -0x230400,0x5200be7a,0x4000000,0x200000,0x5200be7a,0x7c00100,0x230400,0x5200bf58,0x7c00100,0x230400,0x5200c002,0x4000000,0x200000,0x5200c178,0x2802000,0x962460, -0x5200c178,0x2802100,0x962460,0x5200c178,0x2802400,0x962460,0x5200c178,0x2806400,0x962460,0x5200c178,0x4000000,0x200000,0x5200c178,0x6800100,0x962540,0x5200c178, -0x7c00100,0x230400,0x5200c178,0x7c00100,0x230401,0x5200c178,0xc000010,0x448000,0x5200c178,0x80000000,0x218960,0x5200c247,0x7c00100,0x230400,0x5200c247,0x7c00100, -0x830400,0x5200c247,0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00,0x4000000,0x100006,0x52023700,0x24000000,0x4100000,0x52023700,0x24000000,0x4e00000, -0x52023700,0x24000000,0x14100000,0x52023700,0x24000000,0x14e00000,0x52023700,0x24000000,0x96800000,0x52024400,0x4000000,0x100000,0x52027300,0x24000000,0x100000,0x5202c300, -0x4000000,0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003,0x5202c300,0x4000000,0x10000d,0x5202c300,0x4000100,0x150400,0x5202c300,0x4000100, -0x15040d,0x5202c300,0x4000100,0x14150400,0x520a1e12,0x7c00100,0x2130480,0x520a3700,0x24000000,0x34e00000,0x520a3800,0x24000000,0x34100000,0x520a4711,0x7c40300,0xe30000, -0x520a4f11,0x7c00300,0xe30001,0x520a7300,0x24000000,0x34100000,0x520ab412,0x7c00100,0x2130480,0x520ac400,0x4000000,0xe00002,0x520ac400,0x4000000,0xe0000d,0x520ac400, -0x4000000,0x34e0000d,0x520ac414,0x4000000,0xe0000d,0x520ac511,0x7c40300,0xe30000,0x5240af8b,0x7c00100,0x230400,0x5240af90,0x4000400,0x200000,0x5240af90,0x6800100, -0x962540,0x5240af92,0x6800400,0x962540,0x5240af92,0x7c00100,0x230400,0x5240b2b1,0x4000000,0x200000,0x5240b2b1,0x4000000,0x1500000,0x5240b2bc,0x4000000,0x200000, -0x5240b2c9,0x4000000,0x200000,0x5240b5de,0x7c00900,0x230400,0x524a44ab,0x4000000,0xe00003,0x5280af8b,0x2802400,0x962460,0x5280af8c,0x2802400,0x962460,0x5280af92, -0x2802400,0x962460,0x5280af94,0x2802400,0x962460,0x5280af96,0x2802400,0x962460,0x52c0b3d5,0x2802400,0x962460,0x52c0b3d9,0x7c00100,0x230400,0x60000c02,0x2802100, -0x962460,0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100,0x962540,0x60000f0a,0x7c00100,0x230400,0x6000131f,0x4000000,0x200000, -0x6000171a,0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460,0x60001b27,0x4000000,0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b, -0x2802000,0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000,0x24000000,0x14200000,0x60003000,0x24000000,0x14e00000,0x60003700,0x24000000, -0x4200000,0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100,0x220400,0x60006108,0x7c00100,0x250400,0x60006600,0x24000020,0x200000, -0x60008301,0x2802000,0x962460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000,0x60009519,0x7c00100,0x220400,0x60009519,0x7c00100,0x250400,0x6000a008, -0x7c00100,0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x3a703580,0x6000c654,0x2802000,0x962460,0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100, -0x230400,0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000,0x200000,0x6000c73f,0x6800100,0x962540,0x6000c73f,0x6804000,0x962540, -0x6000c73f,0x7c00100,0x230400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460,0x6000c941,0x2806000,0x962460,0x6000c941,0x4000000,0x200000,0x6000c941, -0x4000010,0x200000,0x6000c941,0x6800000,0x1329800,0x6000c941,0x6800100,0x962540,0x6000c941,0x7c00100,0x230400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100, -0x230400,0x6000cc00,0x4000000,0x4e00000,0x6000d000,0x4000000,0x200000,0x6002c300,0x4000000,0x100000,0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400, -0x6002c300,0x4000100,0x15040d,0x6002c300,0x4000100,0x14150400,0x600a3000,0x24000000,0x34200000,0x600a3000,0x24000000,0x34e00000,0x600a3700,0x24000000,0x34200000,0x600a3800, -0x24000000,0x34200000,0x600a3800,0x24000000,0xb6800000,0x600a4305,0x7c00100,0xe30400,0x600ac300,0x4000000,0x34100000,0x600ac400,0x4000000,0x14e0000d,0x600ac400,0x4000000, -0x34e0000d,0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00,0x600acc00,0x4000000,0x34e00000,0x600acd00,0x4000000,0x34200000,0x600acd00,0x4000000,0x34e00000, -0x600acd00,0x4000000,0xb6800000,0x600ace00,0x4000000,0x34e00000,0x600ace00,0x4000000,0xb6800000,0x600acf00,0x4000000,0x34e00000,0x600acf00,0x4000000,0xb6800000,0x600ad111, -0x7c40300,0xe30000,0x604ac4ab,0x4000000,0x34e00003,0x61000a03,0x4000000,0x1600000,0x61000c02,0x80000000,0x218960,0x6100120f,0x4000000,0x200000,0x61001a18,0x7c00100, -0x1830000,0x61001d0c,0x7c00100,0x220400,0x61001d0c,0x7c00100,0x250400,0x61006600,0x24000020,0x200000,0x61008407,0x7c00100,0x220400,0x61008407,0x7c00100,0x250400, -0x6100870c,0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00,0x24000000,0x400000,0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100,0x230400,0x61009519, -0x7c00100,0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500,0x22040f,0x61009b71,0x2802100,0x962460,0x61009b71,0x2806400,0x962460,0x61009b71,0x7c00100, -0x230400,0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f,0x6100cd00,0x4000000,0x200000,0x6100d202,0x2802400,0x962460,0x6100d202,0x2802500,0x962460, -0x6100d202,0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302,0x7c00120,0x230405,0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100,0x962461,0x6100d476, -0x2806400,0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000,0x1329800,0x6100d476,0x6800100,0x962540,0x6100d476,0x7c00100,0x230400,0x6100d476,0xc000010, -0x448000,0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460,0x6100d573,0x6800100,0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573,0x7c00900,0x230400, -0x6100d573,0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756,0x7c00100,0x230400,0x6100d85c,0x2802400,0x962460,0x6100d85c,0x6800100,0x962540,0x6100d85c, -0x7c00100,0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100,0x962460,0x6100d997,0x4000000,0x200000,0x6100d997,0x4000000,0x400000,0x6100d997,0x6800000, -0x1329800,0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540,0x6100d997,0x7c00100,0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997,0xc000010,0x448000, -0x6100da98,0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71,0x4000000,0x200000,0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400,0x962460,0x6100dc99, -0x6800000,0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400,0x962540,0x6100dc99,0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000,0x610a4f11,0x7c00300, -0xe30001,0x610ace00,0x4000000,0x34e00000,0x6140af90,0x6800100,0x962540,0x6140af92,0x7c00100,0x230400,0x6180af8d,0x2802400,0x962460,0x62002a00,0x4000000,0x1600000, -0x63002800,0x80000,0x918820,0x63c00c15,0x80000,0x918820,0x7000080e,0x7c00100,0x250400,0x70000a03,0x4000000,0x200000,0x70000c00,0x80000000,0x218960,0x70000f0a, -0x7c00100,0x230400,0x70001004,0x7c00100,0x230400,0x70001524,0x2802100,0x962460,0x70001524,0x7c00100,0x230400,0x70001615,0x2802100,0x962460,0x7000171a,0x2802100, -0x962460,0x70001821,0x6800000,0x1329800,0x70002320,0x7c00100,0x230400,0x70002a00,0x4000000,0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000,0x24000000,0x200000, -0x70003000,0x24000000,0x14200000,0x70003800,0x24000000,0x4e00000,0x70005201,0x2802400,0x962460,0x7000581e,0x7c00100,0x230400,0x70006108,0x7c00100,0x220400,0x70006108, -0x7c00100,0x250400,0x70006f30,0x7c00100,0x230400,0x70007300,0x24000000,0x200000,0x70007f0e,0x4000000,0x200000,0x70008301,0x2802100,0x962460,0x70008301,0x2802400, -0x962460,0x70008e00,0x24000000,0x200000,0x70008e00,0x24000000,0x400000,0x70008e00,0x24000002,0x400000,0x70008e00,0x24000008,0x1410000,0x70008e00,0x24000010,0x400000, -0x70008e00,0x2c000010,0x448000,0x70009519,0x7c00100,0x220400,0x70009519,0x7c00100,0x230400,0x70009519,0x7c00100,0x250400,0x70009865,0x7c00100,0x230400,0x70009965, -0x4000010,0x400000,0x70009965,0x7c00100,0x230400,0x7000a008,0x7c00100,0x220400,0x7000a008,0x7c00100,0x250400,0x7000a008,0x7c00500,0x22040f,0x7000a50e,0x4000000, -0x200000,0x7000b61c,0x2802400,0x1862460,0x7000b61c,0x6800400,0x1862400,0x7000b61c,0x7c00100,0x1830000,0x7000c300,0x4000000,0x100000,0x7000c941,0x2806000,0x962460, -0x7000cc00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x200000,0x7000cd00,0x4000000,0x4200000,0x7000cd00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x14200000,0x7000cd00, -0x4000000,0x14e00000,0x7000cd00,0x4000000,0x96800000,0x7000cf00,0x4000000,0x4e00000,0x7000cf00,0x4000000,0x14e00000,0x7000d202,0x2802100,0x962460,0x7000d202,0x7c00100, -0x230400,0x7000d997,0x7c00100,0x230400,0x7000d997,0xc000010,0x248000,0x7000dd86,0x2802400,0x962460,0x7000dd86,0x7c00100,0x230400,0x7000dd86,0xc000010,0x448000, -0x7000de9f,0x4000000,0x200000,0x7000de9f,0x7c00100,0x230400,0x7000e001,0x2000,0x962460,0x7000e001,0x2802400,0x962460,0x7000e187,0x2802000,0x962460,0x7000e187, -0x2802100,0x962460,0x7000e187,0x4000000,0x200000,0x7000e187,0x7c00100,0x230400,0x7000e187,0xc000010,0x448000,0x7000e288,0x7c00100,0x230400,0x7000e300,0x4000000, -0x200000,0x7000e489,0x2802100,0x962460,0x7000e489,0x2802400,0x962460,0x7000e489,0x6800100,0x962540,0x7000e489,0x6800100,0x962541,0x7000e489,0x6804400,0x962540, -0x7000e489,0x7c00100,0x230400,0x7000e489,0x7c00900,0x230400,0x7000e59d,0x2802100,0x962460,0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000,0x200000,0x7000e59d, -0x4000010,0x200000,0x7000e59d,0x6800100,0x962540,0x7000e59d,0x6804400,0x962540,0x7000e59d,0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000,0x7000e691,0x2802100, -0x962460,0x7000e691,0x2802400,0x962460,0x7000e691,0x2806400,0x962460,0x7000e691,0x6800000,0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691,0x7c00100,0x230400, -0x7000e700,0x4000400,0x200400,0x7000e70e,0x7c00100,0x220400,0x7000e719,0x7c00100,0x220400,0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100,0x230400,0x7000e9a0, -0x2802400,0x962460,0x7000e9a0,0x4000000,0x200000,0x7000e9a0,0x4000000,0x500000,0x7000e9a0,0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460,0x7000ea79,0x4000000, -0x200000,0x7000ea79,0x4000000,0xf00000,0x7000ea79,0x4000010,0x400000,0x7000ea79,0x7c00100,0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c,0x4000000,0x200000, -0x7000eb8c,0x7c00100,0x230400,0x7000eca3,0x2802100,0x962460,0x7000eca3,0x2806400,0x962460,0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000,0x1329800,0x7000eca3, -0x6800100,0x962540,0x7000eca3,0x7c00100,0x230400,0x7000eca3,0xc000010,0x448000,0x7000ed95,0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400,0x7000ed95,0xc000010, -0x448000,0x7000ee1c,0x2802400,0x1862460,0x7000ee1c,0x6800000,0x1329800,0x7000ee1c,0x7c00100,0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f,0x4000000,0x200000, -0x7000ef8f,0x7c00100,0x230400,0x7000f08e,0x4000000,0x200000,0x7000f08e,0x7c00100,0x230400,0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100,0x230400,0x7000f200, -0x4000000,0x200000,0x7000f200,0x4000000,0x1200000,0x7000f200,0x4000000,0x1710000,0x7000f34b,0x2802100,0x962460,0x7000f34b,0x4000000,0x200000,0x7000f34b,0x4000010, -0x400000,0x7000f34b,0x6800000,0x1329800,0x7000f34b,0x7c00100,0x230400,0x7000f34b,0x7c00900,0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490,0x4000000,0x200000, -0x7000f490,0x7c00100,0x230400,0x7000f5a5,0x7c00100,0x230400,0x7000f67b,0x4000000,0x200000,0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100,0x230400,0x7000f8a6, -0x2802100,0x962460,0x7000f8a6,0x2802400,0x962460,0x7000f8a6,0x2806400,0x962460,0x7000f8a6,0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000,0x7000f8a6,0x4000800, -0x200000,0x7000f8a6,0x6800100,0x962540,0x7000f8a6,0x6800100,0x962541,0x7000f8a6,0x7c00100,0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921,0x4000000,0x200000, -0x7000fa00,0x4000000,0x200000,0x7000fb9e,0x2802100,0x962460,0x7000fb9e,0x2802400,0x962460,0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000,0x200000,0x7000fb9e, -0x6800000,0x1329800,0x7000fb9e,0x6800100,0x962540,0x7000fb9e,0x6800100,0x962541,0x7000fb9e,0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000,0x7000fc92,0x6800000, -0x1329800,0x7000fc92,0x7c00100,0x220400,0x7000fc92,0x7c00100,0x230400,0x7000fc92,0x7c00100,0x250400,0x700acd00,0x4000000,0x34e00000,0x700acd00,0x4000000,0xb6800000, -0x700ace00,0x4000000,0x34e00000,0x700acf00,0x4000000,0x34e00000,0x700acf00,0x4000000,0xb6800000,0x7040dfe5,0x4000000,0x200000,0x7040f7e9,0x80000,0x918820,0x7080af90, -0x2802400,0x962460,0x7080dfe5,0x2802400,0x962460,0x70c0e4e7,0x2802100,0x962460,0x70c0e4e7,0x2802400,0x962460,0x70c0e4e7,0x6800100,0x962540,0x8000120f,0x7c00100, -0x230400,0x80001524,0x7c00100,0x230400,0x8000171a,0x7c00100,0x230400,0x80002006,0x7c00100,0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000, -0x80002d00,0x4000000,0x200000,0x80005208,0x2802400,0x962460,0x80005c00,0x4000000,0x200000,0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519, -0x7c00100,0x230400,0x80009519,0x7c00100,0x250400,0x80009865,0x7c00100,0x230400,0x8000a008,0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100, -0x230400,0x8000cd00,0x4000000,0x4e00000,0x8000d202,0x2802500,0x962460,0x8000d202,0x7c00100,0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802000,0x962460, -0x8000d997,0x2802400,0x962460,0x8000d997,0x4000000,0x400000,0x8000d997,0x4000000,0x500000,0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489, -0x2802100,0x962460,0x8000e489,0x7c00100,0x230400,0x8000e719,0x7c00100,0x220400,0x8000f8a6,0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010, -0x448000,0x8000fda1,0x2802100,0x1862460,0x8000fda1,0x2806400,0x1862460,0x8000fda1,0x4000000,0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862540, -0x8000fda1,0x7c00100,0x1830000,0x8000fda1,0xc000010,0x448000,0x8000fe9c,0x7c00100,0x230400,0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06, -0x7c00100,0x220400,0x80010165,0x7c00100,0x230400,0x800102a2,0x4000000,0x200000,0x800102a2,0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010, -0x448000,0x8001044c,0x4000000,0x200000,0x8001044c,0x7c00100,0x220400,0x8001044c,0x7c00100,0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000, -0x80010670,0x4000010,0x400000,0x80010670,0xc000010,0x448000,0x800a4711,0x7c40300,0xe30000,0x800acd00,0x4000000,0x34e00000,0x800acd00,0x4000000,0x7a902460,0x800ace00, -0x4000000,0x34e00000,0x800acf00,0x4000000,0x34e00000,0x800b0011,0x7c40300,0xe30000,0x800b0500,0x4000000,0x34e00000,0x800b0500,0x4000000,0xb6800000,0x90001615,0x7c00100, -0x230400,0x9000171a,0x4000000,0x200000,0x9000171a,0x7c00100,0x230400,0x90003000,0x24000000,0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802000,0x962460, -0x90008e00,0x24000000,0x400000,0x90009519,0x7c00100,0x250400,0x9000a16f,0x2802100,0x962460,0x9000d200,0x80000000,0x218960,0x9000d202,0x2802000,0x962460,0x9000d202, -0x2802100,0x962460,0x9000d202,0x7c00100,0x230400,0x9000e59d,0x2802100,0x962460,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400,0x962460,0x900107a7,0x2802c00, -0x962460,0x900107a7,0x4000000,0x1400000,0x900107a7,0x6800000,0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400,0x900108a8,0x2802100,0x962460, -0x900108a8,0x2806400,0x962460,0x900108a8,0x4000000,0x200000,0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8,0x6800000,0x1329800,0x900108a8, -0x6800100,0x962540,0x900108a8,0x7c00100,0x230400,0x900108a8,0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100,0x962460,0x90010ca9,0x2802100, -0x962460,0x90010ca9,0x4000000,0x500000,0x90010ca9,0x4000010,0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400,0x90010d1b,0x4000000,0x500000, -0x90010eaa,0x2802100,0x962460,0x90010eaa,0x2802400,0x962460,0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa,0x4000000,0x400000,0x90010eaa, -0x4000010,0x400000,0x90010eaa,0x6800000,0x1329800,0x90010eaa,0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010,0x448000,0x90010fab,0x7c00100, -0x220400,0x90010fab,0x7c00100,0x250400,0x9002c300,0x4000000,0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0x34e00000,0x900acd00,0x4000000,0xb6800000, -0x900acf00,0x4000000,0x34e00000,0x900b0500,0x4000000,0xe00000,0x900b0500,0x4000000,0x34e00000,0x900b0500,0x4000000,0xb6800000,0x900b0b9a,0x7c00900,0x1230400,0x900b109a, -0x7c00300,0xe30000,0x900b119a,0x7c00300,0xe30000,0x90408e06,0x24000000,0x400000,0xa0001004,0x4000000,0x200000,0xa0001004,0x7c00100,0x230400,0xa000120f,0x2802100, -0x962460,0xa000120f,0x2802400,0x962460,0xa000171a,0x2802100,0x962460,0xa000171a,0x2806400,0x962460,0xa0002a00,0x4000000,0x1600000,0xa0003000,0x24000000,0x200000, -0xa000581e,0x7c00100,0x230400,0xa0007300,0x24000000,0x200000,0xa0008301,0x2802400,0x962460,0xa0008e00,0x24000000,0x400000,0xa000cf00,0x4000000,0x4e00000,0xa0010500, -0x4000000,0x200000,0xa00114af,0x2802100,0x962460,0xa00114af,0x2802400,0x962460,0xa00114af,0x2806400,0x962460,0xa00114af,0x6800000,0x1329800,0xa00114af,0x7c00100, -0x230400,0xa00114af,0x7c00100,0x230560,0xa00116b0,0x2802100,0x962460,0xa00116b0,0x2802800,0x962460,0xa00116b0,0x2806400,0x962460,0xa00116b0,0x4000000,0x400000, -0xa00116b0,0x4000000,0x500000,0xa00116b0,0x4000010,0x400000,0xa00116b0,0x6800100,0x962540,0xa00116b0,0x7c00100,0x230400,0xa00116b0,0x7c00100,0x230560,0xa00116b0, -0xc000010,0x448000,0xa0011722,0x7c00100,0x230400,0xa00118b1,0x2802000,0x962460,0xa00118b1,0x2802100,0x962460,0xa00118b1,0x2806400,0x962460,0xa00118b1,0x4000000, -0x200000,0xa00118b1,0x4000000,0x400000,0xa00118b1,0x4000000,0x500000,0xa00118b1,0x6800100,0x962540,0xa00118b1,0x7c00100,0x230400,0xa00118b1,0x7c00100,0x230560, -0xa00118b1,0xc000010,0x448000,0xa00a4005,0x7c00100,0xe30400,0xa00a4711,0x7c40300,0xe30000,0xa00ac400,0x4000000,0x4e00000,0xa00acb14,0x7c00100,0xe30000,0xa00acf00, -0x4000000,0x34e00000,0xa00b0500,0x4000000,0x34e00000,0xa00b0500,0x4000000,0xb6800000,0xa00b0b96,0x7c00900,0x1230400,0xa00b1211,0x7c40300,0xe30000,0xa00b1314,0x7c00100, -0xe30000,0xa00b1596,0x7c00300,0xe30000,0xa040af9c,0x6800400,0x962540,0xb0000a03,0x7c00100,0x220400,0xb0000b13,0x7c00100,0x2633800,0xb0001004,0x2802000,0x962460, -0xb0001110,0x4000000,0x200000,0xb0001524,0x2802000,0x962460,0xb0001615,0x4000000,0x500000,0xb000251b,0x7c00100,0x230400,0xb0007300,0x24000000,0x200000,0xb0008939, -0x4000000,0x200000,0xb0008939,0x7c00100,0x230400,0xb0008e00,0x24000000,0x200000,0xb0008e00,0x24000000,0x400000,0xb0008e00,0x24000010,0x400000,0xb0009257,0x2802000, -0x962460,0xb0009257,0x4000000,0x1600000,0xb0009519,0x7c00100,0x220400,0xb0009519,0x7c00100,0x250400,0xb0009a00,0x4000000,0x200000,0xb000b30a,0x2802000,0x962460, -0xb000b30a,0x7c00100,0x230400,0xb000c178,0x80000000,0x218960,0xb000c300,0x4000000,0x4200000,0xb000d202,0x2802000,0x962460,0xb000d476,0x6800100,0x962540,0xb000d476, -0x7c00100,0x230400,0xb000e300,0x4000000,0x4e00000,0xb000fda1,0x7c00100,0x1830000,0xb0010eaa,0x2802000,0x962460,0xb00116b0,0x7c00100,0x230400,0xb0011900,0x4000000, -0x4e00000,0xb0011ab2,0x2802100,0x962460,0xb0011ab2,0x2802400,0x962460,0xb0011ab2,0x2806400,0x962460,0xb0011ab2,0x4000000,0x200000,0xb0011ab2,0x6800100,0x962540, -0xb0011ab2,0x7c00100,0x230400,0xb0011b0c,0x7c00100,0x250400,0xb0011cb3,0x2802100,0x962460,0xb0011cb3,0x2806400,0x962460,0xb0011cb3,0x6800000,0x1329800,0xb0011cb3, -0x6800100,0x962540,0xb0011cb3,0x7c00100,0x230400,0xb0011db6,0x2802500,0x962460,0xb0011db6,0x6800000,0x1329800,0xb0011db6,0x7c00100,0x230400,0xb0011db6,0x7c00500, -0x230400,0xb0011e00,0x4000000,0x200000,0xb0011e00,0x4000000,0x1500000,0xb0011fb4,0x2802100,0x962460,0xb0011fb4,0x6800100,0x962540,0xb0011fb4,0x7c00100,0x230400, -0xb0011fb4,0xc000010,0x248000,0xb0012000,0x4000000,0x200000,0xb00121b5,0x4000000,0x200000,0xb00121b5,0x4000010,0x400000,0xb00121b5,0x7c00100,0x220400,0xb00121b5, -0x7c00100,0x250400,0xb00121b5,0xc000010,0x448000,0xb00122b8,0x4000000,0x200000,0xb00122b8,0x7c00100,0x230400,0xb00123b7,0x2802400,0x962460,0xb00123b7,0x4000000, -0x200000,0xb00123b7,0x7c00100,0x230400,0xb00123b7,0xc000010,0x248000,0xb00a4005,0x7c00100,0xe30400,0xb00a4711,0x7c40300,0xe30000,0xb00acf00,0x4000000,0x34e00000, -0xb00b0500,0x4000000,0x34e00000,0xb00b0500,0x4000000,0x3ce00000,0xb00b0500,0x4000000,0xb6800000,0xb00b109a,0x7c00300,0xe30000,0xb080e47a,0x2802000,0x962460}; - -static const int32_t countPropsVectors=6639; -static const int32_t propsVectorsColumns=3; -static const uint16_t scriptExtensions[234]={ -0x800e,0x8019,8,0x8059,8,2,8,0x8038,8,6,8,0x8019,3,0x800c,2,0x22, -0x25,0x80b6,2,0x22,0x8025,2,0x12,2,0x22,0x54,0x79,0x7b,0xa7,0xb6,0x80b7,2, -0x8022,2,0x8025,2,0x21,2,0x80b6,2,0x25,4,0xa,0xf,0x10,0x15,0x19,0x1a, -0x1f,0x23,0x24,0x89,0x97,0x809e,4,0xa,0xf,0x10,0x15,0x19,0x1a,0x1f,0x23,0x24, -0x89,0x809e,4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x3a,0x89,0x91,0x99, -0x9e,0xa0,0xb2,0x80b3,4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x30,0x3a, -0x89,0x91,0x99,0x9e,0xa0,0xb2,0x80b3,0xa,0x78,0xa0,0x80b2,0xa,0x67,4,0x3a,0x8076, -4,0x6d,0x10,0x80a4,0x10,0x72,0xf,0x809d,0xf,0x76,0x23,0x8089,0x23,0x7a,0x1c,0x34, -0x8076,0x1c,0x7e,0xc,0x8019,0x2a,0x2b,0x2c,0x802d,0x1b,0x805a,0x800a,4,0xa,0x15,0x8089, -0xa,0x8089,4,0x800a,0xa,0x8097,0xa,0x15,0x1a,0x1f,0x23,0x8024,0x8004,0xa,0x19,0x8089, -5,0x11,0x12,0x14,0x16,0x8029,5,0x11,0x12,0x14,0x8016,0x8011,5,0x8011,0x11,0x14, -0x8016,0xa,0xf,0x10,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0x80b2,0xa,0xf,0x10,0x15, -0x1a,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0x80b2,0xa,0xf,0x10,0x15,0x78,0x91,0x99, -0x9d,0x9e,0xa0,0xa3,0x80b2,0xa,0x92,0xa,0x8023,0xa,0xd7,0x19,0x1c,0x804f,0x37,0x804e, -0x2f,0x31,0x8053,0x2f,0x8031,2,0x8007,0x89,0x7a,0x8087}; - -static const int32_t indexes[UPROPS_INDEX_COUNT]={0x29fa,0x29fa,0x29fa,0x29fa,0x649c,3,0x7e8b,0x7f00,0x7f00,0x7f00,0xb23b8,0x2a75a31,0,0,0,0}; - -#endif // INCLUDED_FROM_UCHAR_C diff --git a/deps/node/deps/icu-small/source/common/ucharstrie.cpp b/deps/node/deps/icu-small/source/common/ucharstrie.cpp deleted file mode 100644 index e0b33af5..00000000 --- a/deps/node/deps/icu-small/source/common/ucharstrie.cpp +++ /dev/null @@ -1,414 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ucharstrie.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010nov14 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/appendable.h" -#include "unicode/ucharstrie.h" -#include "unicode/uobject.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -UCharsTrie::~UCharsTrie() { - uprv_free(ownedArray_); -} - -UStringTrieResult -UCharsTrie::current() const { - const UChar *pos=pos_; - if(pos==NULL) { - return USTRINGTRIE_NO_MATCH; - } else { - int32_t node; - return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } -} - -UStringTrieResult -UCharsTrie::firstForCodePoint(UChar32 cp) { - return cp<=0xffff ? - first(cp) : - (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ? - next(U16_TRAIL(cp)) : - USTRINGTRIE_NO_MATCH); -} - -UStringTrieResult -UCharsTrie::nextForCodePoint(UChar32 cp) { - return cp<=0xffff ? - next(cp) : - (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ? - next(U16_TRAIL(cp)) : - USTRINGTRIE_NO_MATCH); -} - -UStringTrieResult -UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { - // Branch according to the current unit. - if(length==0) { - length=*pos++; - } - ++length; - // The length of the branch is the number of units to select from. - // The data structure encodes a binary search. - while(length>kMaxBranchLinearSubNodeLength) { - if(uchar<*pos++) { - length>>=1; - pos=jumpByDelta(pos); - } else { - length=length-(length>>1); - pos=skipDelta(pos); - } - } - // Drop down to linear search for the last few units. - // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 - // and divides length by 2. - do { - if(uchar==*pos++) { - UStringTrieResult result; - int32_t node=*pos; - if(node&kValueIsFinal) { - // Leave the final value for getValue() to read. - result=USTRINGTRIE_FINAL_VALUE; - } else { - // Use the non-final value as the jump delta. - ++pos; - // int32_t delta=readValue(pos, node); - int32_t delta; - if(node=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; - } - pos_=pos; - return result; - } - --length; - pos=skipValue(pos); - } while(length>1); - if(uchar==*pos++) { - pos_=pos; - int32_t node=*pos; - return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; - } else { - stop(); - return USTRINGTRIE_NO_MATCH; - } -} - -UStringTrieResult -UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) { - int32_t node=*pos++; - for(;;) { - if(node=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } else { - // No match. - break; - } - } else if(node&kValueIsFinal) { - // No further matching units. - break; - } else { - // Skip intermediate value. - pos=skipNodeValue(pos, node); - node&=kNodeTypeMask; - } - } - stop(); - return USTRINGTRIE_NO_MATCH; -} - -UStringTrieResult -UCharsTrie::next(int32_t uchar) { - const UChar *pos=pos_; - if(pos==NULL) { - return USTRINGTRIE_NO_MATCH; - } - int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. - if(length>=0) { - // Remaining part of a linear-match node. - if(uchar==*pos++) { - remainingMatchLength_=--length; - pos_=pos; - int32_t node; - return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } else { - stop(); - return USTRINGTRIE_NO_MATCH; - } - } - return nextImpl(pos, uchar); -} - -UStringTrieResult -UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) { - const UChar *s=ptr; - if(sLength<0 ? *s==0 : sLength==0) { - // Empty input. - return current(); - } - const UChar *pos=pos_; - if(pos==NULL) { - return USTRINGTRIE_NO_MATCH; - } - int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. - for(;;) { - // Fetch the next input unit, if there is one. - // Continue a linear-match node without rechecking sLength<0. - int32_t uchar; - if(sLength<0) { - for(;;) { - if((uchar=*s++)==0) { - remainingMatchLength_=length; - pos_=pos; - int32_t node; - return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } - if(length<0) { - remainingMatchLength_=length; - break; - } - if(uchar!=*pos) { - stop(); - return USTRINGTRIE_NO_MATCH; - } - ++pos; - --length; - } - } else { - for(;;) { - if(sLength==0) { - remainingMatchLength_=length; - pos_=pos; - int32_t node; - return (length<0 && (node=*pos)>=kMinValueLead) ? - valueResult(node) : USTRINGTRIE_NO_VALUE; - } - uchar=*s++; - --sLength; - if(length<0) { - remainingMatchLength_=length; - break; - } - if(uchar!=*pos) { - stop(); - return USTRINGTRIE_NO_MATCH; - } - ++pos; - --length; - } - } - int32_t node=*pos++; - for(;;) { - if(nodekMaxBranchLinearSubNodeLength) { - ++pos; // ignore the comparison unit - if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { - return NULL; - } - length=length-(length>>1); - pos=skipDelta(pos); - } - do { - ++pos; // ignore a comparison unit - // handle its value - int32_t node=*pos++; - UBool isFinal=(UBool)(node>>15); - node&=0x7fff; - int32_t value=readValue(pos, node); - pos=skipValue(pos, node); - if(isFinal) { - if(haveUniqueValue) { - if(value!=uniqueValue) { - return NULL; - } - } else { - uniqueValue=value; - haveUniqueValue=TRUE; - } - } else { - if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { - return NULL; - } - haveUniqueValue=TRUE; - } - } while(--length>1); - return pos+1; // ignore the last comparison unit -} - -UBool -UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) { - int32_t node=*pos++; - for(;;) { - if(node>15); - int32_t value; - if(isFinal) { - value=readValue(pos, node&0x7fff); - } else { - value=readNodeValue(pos, node); - } - if(haveUniqueValue) { - if(value!=uniqueValue) { - return FALSE; - } - } else { - uniqueValue=value; - haveUniqueValue=TRUE; - } - if(isFinal) { - return TRUE; - } - pos=skipNodeValue(pos, node); - node&=kNodeTypeMask; - } - } -} - -int32_t -UCharsTrie::getNextUChars(Appendable &out) const { - const UChar *pos=pos_; - if(pos==NULL) { - return 0; - } - if(remainingMatchLength_>=0) { - out.appendCodeUnit(*pos); // Next unit of a pending linear-match node. - return 1; - } - int32_t node=*pos++; - if(node>=kMinValueLead) { - if(node&kValueIsFinal) { - return 0; - } else { - pos=skipNodeValue(pos, node); - node&=kNodeTypeMask; - } - } - if(nodekMaxBranchLinearSubNodeLength) { - ++pos; // ignore the comparison unit - getNextBranchUChars(jumpByDelta(pos), length>>1, out); - length=length-(length>>1); - pos=skipDelta(pos); - } - do { - out.appendCodeUnit(*pos++); - pos=skipValue(pos); - } while(--length>1); - out.appendCodeUnit(*pos); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/ucharstriebuilder.cpp b/deps/node/deps/icu-small/source/common/ucharstriebuilder.cpp deleted file mode 100644 index 049997a2..00000000 --- a/deps/node/deps/icu-small/source/common/ucharstriebuilder.cpp +++ /dev/null @@ -1,443 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ucharstriebuilder.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010nov14 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/ucharstrie.h" -#include "unicode/ucharstriebuilder.h" -#include "unicode/unistr.h" -#include "unicode/ustring.h" -#include "cmemory.h" -#include "uarrsort.h" -#include "uassert.h" -#include "uhash.h" -#include "ustr_imp.h" - -U_NAMESPACE_BEGIN - -/* - * Note: This builder implementation stores (string, value) pairs with full copies - * of the 16-bit-unit sequences, until the UCharsTrie is built. - * It might(!) take less memory if we collected the data in a temporary, dynamic trie. - */ - -class UCharsTrieElement : public UMemory { -public: - // Use compiler's default constructor, initializes nothing. - - void setTo(const UnicodeString &s, int32_t val, UnicodeString &strings, UErrorCode &errorCode); - - UnicodeString getString(const UnicodeString &strings) const { - int32_t length=strings[stringOffset]; - return strings.tempSubString(stringOffset+1, length); - } - int32_t getStringLength(const UnicodeString &strings) const { - return strings[stringOffset]; - } - - UChar charAt(int32_t index, const UnicodeString &strings) const { - return strings[stringOffset+1+index]; - } - - int32_t getValue() const { return value; } - - int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const; - -private: - // The first strings unit contains the string length. - // (Compared with a stringLength field here, this saves 2 bytes per string.) - int32_t stringOffset; - int32_t value; -}; - -void -UCharsTrieElement::setTo(const UnicodeString &s, int32_t val, - UnicodeString &strings, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - int32_t length=s.length(); - if(length>0xffff) { - // Too long: We store the length in 1 unit. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - stringOffset=strings.length(); - strings.append((UChar)length); - value=val; - strings.append(s); -} - -int32_t -UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const { - return getString(strings).compare(other.getString(strings)); -} - -UCharsTrieBuilder::UCharsTrieBuilder(UErrorCode & /*errorCode*/) - : elements(NULL), elementsCapacity(0), elementsLength(0), - uchars(NULL), ucharsCapacity(0), ucharsLength(0) {} - -UCharsTrieBuilder::~UCharsTrieBuilder() { - delete[] elements; - uprv_free(uchars); -} - -UCharsTrieBuilder & -UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return *this; - } - if(ucharsLength>0) { - // Cannot add elements after building. - errorCode=U_NO_WRITE_PERMISSION; - return *this; - } - if(elementsLength==elementsCapacity) { - int32_t newCapacity; - if(elementsCapacity==0) { - newCapacity=1024; - } else { - newCapacity=4*elementsCapacity; - } - UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity]; - if(newElements==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return *this; - } - if(elementsLength>0) { - uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(UCharsTrieElement)); - } - delete[] elements; - elements=newElements; - elementsCapacity=newCapacity; - } - elements[elementsLength++].setTo(s, value, strings, errorCode); - if(U_SUCCESS(errorCode) && strings.isBogus()) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } - return *this; -} - -U_CDECL_BEGIN - -static int32_t U_CALLCONV -compareElementStrings(const void *context, const void *left, const void *right) { - const UnicodeString *strings=static_cast(context); - const UCharsTrieElement *leftElement=static_cast(left); - const UCharsTrieElement *rightElement=static_cast(right); - return leftElement->compareStringTo(*rightElement, *strings); -} - -U_CDECL_END - -UCharsTrie * -UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - buildUChars(buildOption, errorCode); - UCharsTrie *newTrie=NULL; - if(U_SUCCESS(errorCode)) { - newTrie=new UCharsTrie(uchars, uchars+(ucharsCapacity-ucharsLength)); - if(newTrie==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } else { - uchars=NULL; // The new trie now owns the array. - ucharsCapacity=0; - } - } - return newTrie; -} - -UnicodeString & -UCharsTrieBuilder::buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result, - UErrorCode &errorCode) { - buildUChars(buildOption, errorCode); - if(U_SUCCESS(errorCode)) { - result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength); - } - return result; -} - -void -UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - if(uchars!=NULL && ucharsLength>0) { - // Already built. - return; - } - if(ucharsLength==0) { - if(elementsLength==0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if(strings.isBogus()) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement), - compareElementStrings, &strings, - FALSE, // need not be a stable sort - &errorCode); - if(U_FAILURE(errorCode)) { - return; - } - // Duplicate strings are not allowed. - UnicodeString prev=elements[0].getString(strings); - for(int32_t i=1; i(uprv_malloc(capacity*2)); - if(uchars==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - ucharsCapacity=0; - return; - } - ucharsCapacity=capacity; - } - StringTrieBuilder::build(buildOption, elementsLength, errorCode); - if(uchars==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } -} - -int32_t -UCharsTrieBuilder::getElementStringLength(int32_t i) const { - return elements[i].getStringLength(strings); -} - -UChar -UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const { - return elements[i].charAt(unitIndex, strings); -} - -int32_t -UCharsTrieBuilder::getElementValue(int32_t i) const { - return elements[i].getValue(); -} - -int32_t -UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const { - const UCharsTrieElement &firstElement=elements[first]; - const UCharsTrieElement &lastElement=elements[last]; - int32_t minStringLength=firstElement.getStringLength(strings); - while(++unitIndex0); - return i; -} - -int32_t -UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const { - while(unit==elements[i].charAt(unitIndex, strings)) { - ++i; - } - return i; -} - -UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode) - : LinearMatchNode(len, nextNode), s(units) { - hash=hash*37u+ustr_hashUCharsN(units, len); -} - -UBool -UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!LinearMatchNode::operator==(other)) { - return FALSE; - } - const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other; - return 0==u_memcmp(s, o.s, length); -} - -void -UCharsTrieBuilder::UCTLinearMatchNode::write(StringTrieBuilder &builder) { - UCharsTrieBuilder &b=(UCharsTrieBuilder &)builder; - next->write(builder); - b.write(s, length); - offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1); -} - -StringTrieBuilder::Node * -UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, - Node *nextNode) const { - return new UCTLinearMatchNode( - elements[i].getString(strings).getBuffer()+unitIndex, - length, - nextNode); -} - -UBool -UCharsTrieBuilder::ensureCapacity(int32_t length) { - if(uchars==NULL) { - return FALSE; // previous memory allocation had failed - } - if(length>ucharsCapacity) { - int32_t newCapacity=ucharsCapacity; - do { - newCapacity*=2; - } while(newCapacity<=length); - UChar *newUChars=static_cast(uprv_malloc(newCapacity*2)); - if(newUChars==NULL) { - // unable to allocate memory - uprv_free(uchars); - uchars=NULL; - ucharsCapacity=0; - return FALSE; - } - u_memcpy(newUChars+(newCapacity-ucharsLength), - uchars+(ucharsCapacity-ucharsLength), ucharsLength); - uprv_free(uchars); - uchars=newUChars; - ucharsCapacity=newCapacity; - } - return TRUE; -} - -int32_t -UCharsTrieBuilder::write(int32_t unit) { - int32_t newLength=ucharsLength+1; - if(ensureCapacity(newLength)) { - ucharsLength=newLength; - uchars[ucharsCapacity-ucharsLength]=(UChar)unit; - } - return ucharsLength; -} - -int32_t -UCharsTrieBuilder::write(const UChar *s, int32_t length) { - int32_t newLength=ucharsLength+length; - if(ensureCapacity(newLength)) { - ucharsLength=newLength; - u_memcpy(uchars+(ucharsCapacity-ucharsLength), s, length); - } - return ucharsLength; -} - -int32_t -UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) { - return write(elements[i].getString(strings).getBuffer()+unitIndex, length); -} - -int32_t -UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { - if(0<=i && i<=UCharsTrie::kMaxOneUnitValue) { - return write(i|(isFinal<<15)); - } - UChar intUnits[3]; - int32_t length; - if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) { - intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead); - intUnits[1]=(UChar)((uint32_t)i>>16); - intUnits[2]=(UChar)i; - length=3; - // } else if(i<=UCharsTrie::kMaxOneUnitValue) { - // intUnits[0]=(UChar)(i); - // length=1; - } else { - intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16)); - intUnits[1]=(UChar)i; - length=2; - } - intUnits[0]=(UChar)(intUnits[0]|(isFinal<<15)); - return write(intUnits, length); -} - -int32_t -UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { - if(!hasValue) { - return write(node); - } - UChar intUnits[3]; - int32_t length; - if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) { - intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead); - intUnits[1]=(UChar)((uint32_t)value>>16); - intUnits[2]=(UChar)value; - length=3; - } else if(value<=UCharsTrie::kMaxOneUnitNodeValue) { - intUnits[0]=(UChar)((value+1)<<6); - length=1; - } else { - intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0)); - intUnits[1]=(UChar)value; - length=2; - } - intUnits[0]|=(UChar)node; - return write(intUnits, length); -} - -int32_t -UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) { - int32_t i=ucharsLength-jumpTarget; - U_ASSERT(i>=0); - if(i<=UCharsTrie::kMaxOneUnitDelta) { - return write(i); - } - UChar intUnits[3]; - int32_t length; - if(i<=UCharsTrie::kMaxTwoUnitDelta) { - intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16)); - length=1; - } else { - intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead); - intUnits[1]=(UChar)(i>>16); - length=2; - } - intUnits[length++]=(UChar)i; - return write(intUnits, length); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/ucharstrieiterator.cpp b/deps/node/deps/icu-small/source/common/ucharstrieiterator.cpp deleted file mode 100644 index b3132241..00000000 --- a/deps/node/deps/icu-small/source/common/ucharstrieiterator.cpp +++ /dev/null @@ -1,215 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ucharstrieiterator.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010nov15 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/ucharstrie.h" -#include "unicode/unistr.h" -#include "uvectr32.h" - -U_NAMESPACE_BEGIN - -UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, - UErrorCode &errorCode) - : uchars_(trieUChars), - pos_(uchars_), initialPos_(uchars_), - remainingMatchLength_(-1), initialRemainingMatchLength_(-1), - skipValue_(FALSE), - maxLength_(maxStringLength), value_(0), stack_(NULL) { - if(U_FAILURE(errorCode)) { - return; - } - // stack_ is a pointer so that it's easy to turn ucharstrie.h into - // a public API header for which we would want it to depend only on - // other public headers. - // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway - // via the UnicodeString and UVector32 implementations, so this additional - // cost is minimal. - stack_=new UVector32(errorCode); - if(stack_==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } -} - -UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, - UErrorCode &errorCode) - : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), - remainingMatchLength_(trie.remainingMatchLength_), - initialRemainingMatchLength_(trie.remainingMatchLength_), - skipValue_(FALSE), - maxLength_(maxStringLength), value_(0), stack_(NULL) { - if(U_FAILURE(errorCode)) { - return; - } - stack_=new UVector32(errorCode); - if(U_FAILURE(errorCode)) { - return; - } - if(stack_==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. - if(length>=0) { - // Pending linear-match node, append remaining UChars to str_. - ++length; - if(maxLength_>0 && length>maxLength_) { - length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. - } - str_.append(pos_, length); - pos_+=length; - remainingMatchLength_-=length; - } -} - -UCharsTrie::Iterator::~Iterator() { - delete stack_; -} - -UCharsTrie::Iterator & -UCharsTrie::Iterator::reset() { - pos_=initialPos_; - remainingMatchLength_=initialRemainingMatchLength_; - skipValue_=FALSE; - int32_t length=remainingMatchLength_+1; // Remaining match length. - if(maxLength_>0 && length>maxLength_) { - length=maxLength_; - } - str_.truncate(length); - pos_+=length; - remainingMatchLength_-=length; - stack_->setSize(0); - return *this; -} - -UBool -UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } - -UBool -UCharsTrie::Iterator::next(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return FALSE; - } - const UChar *pos=pos_; - if(pos==NULL) { - if(stack_->isEmpty()) { - return FALSE; - } - // Pop the state off the stack and continue with the next outbound edge of - // the branch node. - int32_t stackSize=stack_->size(); - int32_t length=stack_->elementAti(stackSize-1); - pos=uchars_+stack_->elementAti(stackSize-2); - stack_->setSize(stackSize-2); - str_.truncate(length&0xffff); - length=(int32_t)((uint32_t)length>>16); - if(length>1) { - pos=branchNext(pos, length, errorCode); - if(pos==NULL) { - return TRUE; // Reached a final value. - } - } else { - str_.append(*pos++); - } - } - if(remainingMatchLength_>=0) { - // We only get here if we started in a pending linear-match node - // with more than maxLength remaining units. - return truncateAndStop(); - } - for(;;) { - int32_t node=*pos++; - if(node>=kMinValueLead) { - if(skipValue_) { - pos=skipNodeValue(pos, node); - node&=kNodeTypeMask; - skipValue_=FALSE; - } else { - // Deliver value for the string so far. - UBool isFinal=(UBool)(node>>15); - if(isFinal) { - value_=readValue(pos, node&0x7fff); - } else { - value_=readNodeValue(pos, node); - } - if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { - pos_=NULL; - } else { - // We cannot skip the value right here because it shares its - // lead unit with a match node which we have to evaluate - // next time. - // Instead, keep pos_ on the node lead unit itself. - pos_=pos-1; - skipValue_=TRUE; - } - return TRUE; - } - } - if(maxLength_>0 && str_.length()==maxLength_) { - return truncateAndStop(); - } - if(node0 && str_.length()+length>maxLength_) { - str_.append(pos, maxLength_-str_.length()); - return truncateAndStop(); - } - str_.append(pos, length); - pos+=length; - } - } -} - -// Branch node, needs to take the first outbound edge and push state for the rest. -const UChar * -UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { - while(length>kMaxBranchLinearSubNodeLength) { - ++pos; // ignore the comparison unit - // Push state for the greater-or-equal edge. - stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); - stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); - // Follow the less-than edge. - length>>=1; - pos=jumpByDelta(pos); - } - // List of key-value pairs where values are either final values or jump deltas. - // Read the first (key, value) pair. - UChar trieUnit=*pos++; - int32_t node=*pos++; - UBool isFinal=(UBool)(node>>15); - int32_t value=readValue(pos, node&=0x7fff); - pos=skipValue(pos, node); - stack_->addElement((int32_t)(pos-uchars_), errorCode); - stack_->addElement(((length-1)<<16)|str_.length(), errorCode); - str_.append(trieUnit); - if(isFinal) { - pos_=NULL; - value_=value; - return NULL; - } else { - return pos+value; - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/uchriter.cpp b/deps/node/deps/icu-small/source/common/uchriter.cpp deleted file mode 100644 index 822168f5..00000000 --- a/deps/node/deps/icu-small/source/common/uchriter.cpp +++ /dev/null @@ -1,367 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1998-2012, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -*/ - -#include "utypeinfo.h" // for 'typeid' to work - -#include "unicode/uchriter.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCharCharacterIterator) - -UCharCharacterIterator::UCharCharacterIterator() - : CharacterIterator(), - text(0) -{ - // never default construct! -} - -UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, - int32_t length) - : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0), - text(textPtr) -{ -} - -UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, - int32_t length, - int32_t position) - : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position), - text(textPtr) -{ -} - -UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, - int32_t length, - int32_t textBegin, - int32_t textEnd, - int32_t position) - : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, textBegin, textEnd, position), - text(textPtr) -{ -} - -UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& that) -: CharacterIterator(that), - text(that.text) -{ -} - -UCharCharacterIterator& -UCharCharacterIterator::operator=(const UCharCharacterIterator& that) { - CharacterIterator::operator=(that); - text = that.text; - return *this; -} - -UCharCharacterIterator::~UCharCharacterIterator() { -} - -UBool -UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const { - if (this == &that) { - return TRUE; - } - if (typeid(*this) != typeid(that)) { - return FALSE; - } - - UCharCharacterIterator& realThat = (UCharCharacterIterator&)that; - - return text == realThat.text - && textLength == realThat.textLength - && pos == realThat.pos - && begin == realThat.begin - && end == realThat.end; -} - -int32_t -UCharCharacterIterator::hashCode() const { - return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end; -} - -CharacterIterator* -UCharCharacterIterator::clone() const { - return new UCharCharacterIterator(*this); -} - -UChar -UCharCharacterIterator::first() { - pos = begin; - if(pos < end) { - return text[pos]; - } else { - return DONE; - } -} - -UChar -UCharCharacterIterator::firstPostInc() { - pos = begin; - if(pos < end) { - return text[pos++]; - } else { - return DONE; - } -} - -UChar -UCharCharacterIterator::last() { - pos = end; - if(pos > begin) { - return text[--pos]; - } else { - return DONE; - } -} - -UChar -UCharCharacterIterator::setIndex(int32_t position) { - if(position < begin) { - pos = begin; - } else if(position > end) { - pos = end; - } else { - pos = position; - } - if(pos < end) { - return text[pos]; - } else { - return DONE; - } -} - -UChar -UCharCharacterIterator::current() const { - if (pos >= begin && pos < end) { - return text[pos]; - } else { - return DONE; - } -} - -UChar -UCharCharacterIterator::next() { - if (pos + 1 < end) { - return text[++pos]; - } else { - /* make current() return DONE */ - pos = end; - return DONE; - } -} - -UChar -UCharCharacterIterator::nextPostInc() { - if (pos < end) { - return text[pos++]; - } else { - return DONE; - } -} - -UBool -UCharCharacterIterator::hasNext() { - return (UBool)(pos < end ? TRUE : FALSE); -} - -UChar -UCharCharacterIterator::previous() { - if (pos > begin) { - return text[--pos]; - } else { - return DONE; - } -} - -UBool -UCharCharacterIterator::hasPrevious() { - return (UBool)(pos > begin ? TRUE : FALSE); -} - -UChar32 -UCharCharacterIterator::first32() { - pos = begin; - if(pos < end) { - int32_t i = pos; - UChar32 c; - U16_NEXT(text, i, end, c); - return c; - } else { - return DONE; - } -} - -UChar32 -UCharCharacterIterator::first32PostInc() { - pos = begin; - if(pos < end) { - UChar32 c; - U16_NEXT(text, pos, end, c); - return c; - } else { - return DONE; - } -} - -UChar32 -UCharCharacterIterator::last32() { - pos = end; - if(pos > begin) { - UChar32 c; - U16_PREV(text, begin, pos, c); - return c; - } else { - return DONE; - } -} - -UChar32 -UCharCharacterIterator::setIndex32(int32_t position) { - if(position < begin) { - position = begin; - } else if(position > end) { - position = end; - } - if(position < end) { - U16_SET_CP_START(text, begin, position); - int32_t i = this->pos = position; - UChar32 c; - U16_NEXT(text, i, end, c); - return c; - } else { - this->pos = position; - return DONE; - } -} - -UChar32 -UCharCharacterIterator::current32() const { - if (pos >= begin && pos < end) { - UChar32 c; - U16_GET(text, begin, pos, end, c); - return c; - } else { - return DONE; - } -} - -UChar32 -UCharCharacterIterator::next32() { - if (pos < end) { - U16_FWD_1(text, pos, end); - if(pos < end) { - int32_t i = pos; - UChar32 c; - U16_NEXT(text, i, end, c); - return c; - } - } - /* make current() return DONE */ - pos = end; - return DONE; -} - -UChar32 -UCharCharacterIterator::next32PostInc() { - if (pos < end) { - UChar32 c; - U16_NEXT(text, pos, end, c); - return c; - } else { - return DONE; - } -} - -UChar32 -UCharCharacterIterator::previous32() { - if (pos > begin) { - UChar32 c; - U16_PREV(text, begin, pos, c); - return c; - } else { - return DONE; - } -} - -int32_t -UCharCharacterIterator::move(int32_t delta, CharacterIterator::EOrigin origin) { - switch(origin) { - case kStart: - pos = begin + delta; - break; - case kCurrent: - pos += delta; - break; - case kEnd: - pos = end + delta; - break; - default: - break; - } - - if(pos < begin) { - pos = begin; - } else if(pos > end) { - pos = end; - } - - return pos; -} - -int32_t -UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) { - // this implementation relies on the "safe" version of the UTF macros - // (or the trustworthiness of the caller) - switch(origin) { - case kStart: - pos = begin; - if(delta > 0) { - U16_FWD_N(text, pos, end, delta); - } - break; - case kCurrent: - if(delta > 0) { - U16_FWD_N(text, pos, end, delta); - } else { - U16_BACK_N(text, begin, pos, -delta); - } - break; - case kEnd: - pos = end; - if(delta < 0) { - U16_BACK_N(text, begin, pos, -delta); - } - break; - default: - break; - } - - return pos; -} - -void UCharCharacterIterator::setText(ConstChar16Ptr newText, - int32_t newTextLength) { - text = newText; - if(newText == 0 || newTextLength < 0) { - newTextLength = 0; - } - end = textLength = newTextLength; - pos = begin = 0; -} - -void -UCharCharacterIterator::getText(UnicodeString& result) { - result = UnicodeString(text, textLength); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/ucln.h b/deps/node/deps/icu-small/source/common/ucln.h deleted file mode 100644 index fe6666ef..00000000 --- a/deps/node/deps/icu-small/source/common/ucln.h +++ /dev/null @@ -1,91 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucln.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001July05 -* created by: George Rhoten -*/ - -#ifndef __UCLN_H__ -#define __UCLN_H__ - -#include "unicode/utypes.h" - -/** These are the functions used to register a library's memory cleanup - * functions. Each library should define a single library register function - * to call this API. In the i18n library, it is ucln_i18n_registerCleanup(). - * - * None of the cleanup functions should use a mutex to clean up an API's - * allocated memory because a cleanup function is not meant to be thread safe, - * and plenty of data cannot be reference counted in order to make sure that - * no one else needs the allocated data. - * - * In order to make a cleanup function get called when u_cleanup is called, - * You should add your function to the library specific cleanup function. - * If the cleanup function is not in the common library, the code that - * allocates the memory should call the library specific cleanup function. - * For instance, in the i18n library, any memory allocated statically must - * call ucln_i18n_registerCleanup() from the ucln_in.h header. These library - * cleanup functions are needed in order to prevent a circular dependency - * between the common library and any other library. - * - * The order of the cleanup is very important. In general, an API that - * depends on a second API should be cleaned up before the second API. - * For instance, the default converter in ustring depends upon the converter - * API. So the default converter should be closed before the converter API - * has its cache flushed. This will prevent any memory leaks due to - * reference counting. - * - * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples. - */ - -/** - * Data Type for cleanup function selector. These roughly correspond to libraries. - */ -typedef enum ECleanupLibraryType { - UCLN_START = -1, - UCLN_UPLUG, /* ICU plugins */ - UCLN_CUSTOM, /* Custom is for anyone else. */ - UCLN_CTESTFW, - UCLN_TOOLUTIL, - UCLN_LAYOUTEX, - UCLN_LAYOUT, - UCLN_IO, - UCLN_I18N, - UCLN_COMMON /* This must be the last one to cleanup. */ -} ECleanupLibraryType; - -/** - * Data type for cleanup function pointer - */ -U_CDECL_BEGIN -typedef UBool U_CALLCONV cleanupFunc(void); -typedef void U_CALLCONV initFunc(UErrorCode *); -U_CDECL_END - -/** - * Register a cleanup function - * @param type which library to register for. - * @param func the function pointer - */ -U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type, - cleanupFunc *func); - -/** - * Request cleanup for one specific library. - * Not thread safe. - * @param type which library to cleanup - */ -U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucln_cmn.cpp b/deps/node/deps/icu-small/source/common/ucln_cmn.cpp deleted file mode 100644 index 7e541a1a..00000000 --- a/deps/node/deps/icu-small/source/common/ucln_cmn.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2001-2014, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* file name: ucln_cmn.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001July05 -* created by: George Rhoten -*/ - -#include "unicode/utypes.h" -#include "unicode/uclean.h" -#include "cmemory.h" -#include "mutex.h" -#include "uassert.h" -#include "ucln.h" -#include "ucln_cmn.h" -#include "utracimp.h" -#include "umutex.h" - -/** Auto-client for UCLN_COMMON **/ -#define UCLN_TYPE_IS_COMMON -#include "ucln_imp.h" - -static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT]; -static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON]; - - -/************************************************ - The cleanup order is important in this function. - Please be sure that you have read ucln.h - ************************************************/ -U_CAPI void U_EXPORT2 -u_cleanup(void) -{ - UTRACE_ENTRY_OC(UTRACE_U_CLEANUP); - umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */ - umtx_unlock(NULL); /* all state left around by any other threads. */ - - ucln_lib_cleanup(); - - cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */ - UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */ -/*#if U_ENABLE_TRACING*/ - utrace_cleanup(); -/*#endif*/ -} - -U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType) -{ - if (gLibCleanupFunctions[libType]) - { - gLibCleanupFunctions[libType](); - gLibCleanupFunctions[libType] = NULL; - } -} - -U_CFUNC void -ucln_common_registerCleanup(ECleanupCommonType type, - cleanupFunc *func) -{ - U_ASSERT(UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT); - if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT) - { - icu::Mutex m; // See ticket 10295 for discussion. - gCommonCleanupFunctions[type] = func; - } -#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL)) - ucln_registerAutomaticCleanup(); -#endif -} - -// Note: ucln_registerCleanup() is called with the ICU global mutex locked. -// Be aware if adding anything to the function. -// See ticket 10295 for discussion. - -U_CAPI void U_EXPORT2 -ucln_registerCleanup(ECleanupLibraryType type, - cleanupFunc *func) -{ - U_ASSERT(UCLN_START < type && type < UCLN_COMMON); - if (UCLN_START < type && type < UCLN_COMMON) - { - gLibCleanupFunctions[type] = func; - } -} - -U_CFUNC UBool ucln_lib_cleanup(void) { - int32_t libType = UCLN_START; - int32_t commonFunc = UCLN_COMMON_START; - - for (libType++; libType(libType)); - } - - for (commonFunc++; commonFunc - -/** - * Auto cleanup of ICU libraries - * There are several methods in per library cleanup of icu libraries: - * 1) Compiler/Platform based cleanup: - * a) Windows MSVC uses DllMain() - * b) GCC uses destructor function attribute - * c) Sun Studio, AIX VA, and HP-UX aCC uses a linker option to set the exit function - * 2) Using atexit() - * 3) Implementing own automatic cleanup functions - * - * For option 1, ensure that UCLN_NO_AUTO_CLEANUP is set to 0 by using --enable-auto-cleanup - * configure option or by otherwise setting UCLN_NO_AUTO_CLEANUP to 0 - * For option 2, follow option 1 and also define UCLN_AUTO_ATEXIT - * For option 3, follow option 1 and also define UCLN_AUTO_LOCAL (see below for more information) - */ - -#if !UCLN_NO_AUTO_CLEANUP - -/* - * The following declarations are for when UCLN_AUTO_LOCAL or UCLN_AUTO_ATEXIT - * are defined. They are commented out because they are static and will be defined - * later. The information is still here to provide some guidance for the developer - * who chooses to use UCLN_AUTO_LOCAL. - */ -/** - * Give the library an opportunity to register an automatic cleanup. - * This may be called more than once. - */ -/*static void ucln_registerAutomaticCleanup();*/ -/** - * Unregister an automatic cleanup, if possible. Called from cleanup. - */ -/*static void ucln_unRegisterAutomaticCleanup();*/ - -#ifdef UCLN_TYPE_IS_COMMON -# define UCLN_CLEAN_ME_UP u_cleanup() -#else -# define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE) -#endif - -/* ------------ automatic cleanup: registration. Choose ONE ------- */ -#if defined(UCLN_AUTO_LOCAL) -/* To use: - * 1. define UCLN_AUTO_LOCAL, - * 2. create ucln_local_hook.c containing implementations of - * static void ucln_registerAutomaticCleanup() - * static void ucln_unRegisterAutomaticCleanup() - */ -#include "ucln_local_hook.c" - -#elif defined(UCLN_AUTO_ATEXIT) -/* - * Use the ANSI C 'atexit' function. Note that this mechanism does not - * guarantee the order of cleanup relative to other users of ICU! - */ -static UBool gAutoCleanRegistered = FALSE; - -static void ucln_atexit_handler() -{ - UCLN_CLEAN_ME_UP; -} - -static void ucln_registerAutomaticCleanup() -{ - if(!gAutoCleanRegistered) { - gAutoCleanRegistered = TRUE; - atexit(&ucln_atexit_handler); - } -} - -static void ucln_unRegisterAutomaticCleanup () { -} -/* ------------end of automatic cleanup: registration. ------- */ - -#elif defined (UCLN_FINI) -/** - * If UCLN_FINI is defined, it is the (versioned, etc) name of a cleanup - * entrypoint. Add a stub to call ucln_cleanupOne - * Used on AIX, Solaris, and HP-UX - */ -U_CAPI void U_EXPORT2 UCLN_FINI (void); - -U_CAPI void U_EXPORT2 UCLN_FINI () -{ - /* This function must be defined, if UCLN_FINI is defined, else link error. */ - UCLN_CLEAN_ME_UP; -} - -/* Windows: DllMain */ -#elif U_PLATFORM_HAS_WIN32_API -/* - * ICU's own DllMain. - */ - -/* these are from putil.c */ -/* READ READ READ READ! Are you getting compilation errors from windows.h? - Any source file which includes this (ucln_imp.h) header MUST - be defined with language extensions ON. */ -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# include -/* - * This is a stub DllMain function with icu specific process handling code. - */ -BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) -{ - BOOL status = TRUE; - - switch(fdwReason) { - case DLL_PROCESS_ATTACH: - /* ICU does not trap process attach, but must pass these through properly. */ - /* ICU specific process attach could go here */ - break; - - case DLL_PROCESS_DETACH: - /* Here is the one we actually care about. */ - - UCLN_CLEAN_ME_UP; - - break; - - case DLL_THREAD_ATTACH: - /* ICU does not trap thread attach, but must pass these through properly. */ - /* ICU specific thread attach could go here */ - break; - - case DLL_THREAD_DETACH: - /* ICU does not trap thread detach, but must pass these through properly. */ - /* ICU specific thread detach could go here */ - break; - - } - return status; -} - -#elif defined(__GNUC__) -/* GCC - use __attribute((destructor)) */ -static void ucln_destructor() __attribute__((destructor)) ; - -static void ucln_destructor() -{ - UCLN_CLEAN_ME_UP; -} - -#endif - -#endif /* UCLN_NO_AUTO_CLEANUP */ - -#else -#error This file can only be included once. -#endif diff --git a/deps/node/deps/icu-small/source/common/ucmndata.cpp b/deps/node/deps/icu-small/source/common/ucmndata.cpp deleted file mode 100644 index ba2310bb..00000000 --- a/deps/node/deps/icu-small/source/common/ucmndata.cpp +++ /dev/null @@ -1,393 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*------------------------------------------------------------------------------ - * - * UCommonData An abstract interface for dealing with ICU Common Data Files. - * ICU Common Data Files are a grouping of a number of individual - * data items (resources, converters, tables, anything) into a - * single file or dll. The combined format includes a table of - * contents for locating the individual items by name. - * - * Two formats for the table of contents are supported, which is - * why there is an abstract inteface involved. - * - */ - -#include "unicode/utypes.h" -#include "unicode/udata.h" -#include "cstring.h" -#include "ucmndata.h" -#include "udatamem.h" - -#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) -# include -#endif - -U_CFUNC uint16_t -udata_getHeaderSize(const DataHeader *udh) { - if(udh==NULL) { - return 0; - } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { - /* same endianness */ - return udh->dataHeader.headerSize; - } else { - /* opposite endianness */ - uint16_t x=udh->dataHeader.headerSize; - return (uint16_t)((x<<8)|(x>>8)); - } -} - -U_CFUNC uint16_t -udata_getInfoSize(const UDataInfo *info) { - if(info==NULL) { - return 0; - } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { - /* same endianness */ - return info->size; - } else { - /* opposite endianness */ - uint16_t x=info->size; - return (uint16_t)((x<<8)|(x>>8)); - } -} - -/*-----------------------------------------------------------------------------* - * * - * Pointer TOCs. TODO: This form of table-of-contents should be removed * - * because DLLs must be relocated on loading to correct the * - * pointer values and this operation makes shared memory * - * mapping of the data much less likely to work. * - * * - *-----------------------------------------------------------------------------*/ -typedef struct { - const char *entryName; - const DataHeader *pHeader; -} PointerTOCEntry; - - -typedef struct { - uint32_t count; - uint32_t reserved; - /** - * Variable-length array declared with length 1 to disable bounds checkers. - * The actual array length is in the count field. - */ - PointerTOCEntry entry[1]; -} PointerTOC; - - -/* definition of OffsetTOC struct types moved to ucmndata.h */ - -/*-----------------------------------------------------------------------------* - * * - * entry point lookup implementations * - * * - *-----------------------------------------------------------------------------*/ - -#ifndef MIN -#define MIN(a,b) (((a)<(b)) ? (a) : (b)) -#endif - -/** - * Compare strings where we know the shared prefix length, - * and advance the prefix length as we find that the strings share even more characters. - */ -static int32_t -strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) { - int32_t pl=*pPrefixLength; - int32_t cmp=0; - s1+=pl; - s2+=pl; - for(;;) { - int32_t c1=(uint8_t)*s1++; - int32_t c2=(uint8_t)*s2++; - cmp=c1-c2; - if(cmp!=0 || c1==0) { /* different or done */ - break; - } - ++pl; /* increment shared same-prefix length */ - } - *pPrefixLength=pl; - return cmp; -} - -static int32_t -offsetTOCPrefixBinarySearch(const char *s, const char *names, - const UDataOffsetTOCEntry *toc, int32_t count) { - int32_t start=0; - int32_t limit=count; - /* - * Remember the shared prefix between s, start and limit, - * and don't compare that shared prefix again. - * The shared prefix should get longer as we narrow the [start, limit[ range. - */ - int32_t startPrefixLength=0; - int32_t limitPrefixLength=0; - if(count==0) { - return -1; - } - /* - * Prime the prefix lengths so that we don't keep prefixLength at 0 until - * both the start and limit indexes have moved. - * At the same time, we find if s is one of the start and (limit-1) names, - * and if not, exclude them from the actual binary search. - */ - if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) { - return 0; - } - ++start; - --limit; - if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) { - return limit; - } - while(starttoc; - if (toc != NULL) { - retVal = toc->count; - } - return retVal; -} - -static const DataHeader * U_CALLCONV -offsetTOCLookupFn(const UDataMemory *pData, - const char *tocEntryName, - int32_t *pLength, - UErrorCode *pErrorCode) { - (void)pErrorCode; - const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; - if(toc!=NULL) { - const char *base=(const char *)toc; - int32_t number, count=(int32_t)toc->count; - - /* perform a binary search for the data in the common data's table of contents */ -#if defined (UDATA_DEBUG_DUMP) - /* list the contents of the TOC each time .. not recommended */ - for(number=0; numberentry[number].nameOffset]); - } -#endif - number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count); - if(number>=0) { - /* found it */ - const UDataOffsetTOCEntry *entry=toc->entry+number; -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Found.\n", tocEntryName); -#endif - if((number+1) < count) { - *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset); - } else { - *pLength = -1; - } - return (const DataHeader *)(base+entry->dataOffset); - } else { -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Not found.\n", tocEntryName); -#endif - return NULL; - } - } else { -#ifdef UDATA_DEBUG - fprintf(stderr, "returning header\n"); -#endif - - return pData->pHeader; - } -} - - -static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) { - const PointerTOC *toc = (PointerTOC *)pData->toc; - return (uint32_t)((toc != NULL) ? (toc->count) : 0); -} - -static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData, - const char *name, - int32_t *pLength, - UErrorCode *pErrorCode) { - (void)pErrorCode; - if(pData->toc!=NULL) { - const PointerTOC *toc = (PointerTOC *)pData->toc; - int32_t number, count=(int32_t)toc->count; - -#if defined (UDATA_DEBUG_DUMP) - /* list the contents of the TOC each time .. not recommended */ - for(number=0; numberentry[number].entryName); - } -#endif - number=pointerTOCPrefixBinarySearch(name, toc->entry, count); - if(number>=0) { - /* found it */ -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); -#endif - *pLength=-1; - return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader); - } else { -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: Not found.\n", name); -#endif - return NULL; - } - } else { - return pData->pHeader; - } -} -U_CDECL_END - - -static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount}; -static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount}; - - - -/*----------------------------------------------------------------------* - * * - * checkCommonData Validate the format of a common data file. * - * Fill in the virtual function ptr based on TOC type * - * If the data is invalid, close the UDataMemory * - * and set the appropriate error code. * - * * - *----------------------------------------------------------------------*/ -U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { - if (U_FAILURE(*err)) { - return; - } - - if(udm==NULL || udm->pHeader==NULL) { - *err=U_INVALID_FORMAT_ERROR; - } else if(!(udm->pHeader->dataHeader.magic1==0xda && - udm->pHeader->dataHeader.magic2==0x27 && - udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && - udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) - ) { - /* header not valid */ - *err=U_INVALID_FORMAT_ERROR; - } - else if (udm->pHeader->info.dataFormat[0]==0x43 && - udm->pHeader->info.dataFormat[1]==0x6d && - udm->pHeader->info.dataFormat[2]==0x6e && - udm->pHeader->info.dataFormat[3]==0x44 && - udm->pHeader->info.formatVersion[0]==1 - ) { - /* dataFormat="CmnD" */ - udm->vFuncs = &CmnDFuncs; - udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); - } - else if(udm->pHeader->info.dataFormat[0]==0x54 && - udm->pHeader->info.dataFormat[1]==0x6f && - udm->pHeader->info.dataFormat[2]==0x43 && - udm->pHeader->info.dataFormat[3]==0x50 && - udm->pHeader->info.formatVersion[0]==1 - ) { - /* dataFormat="ToCP" */ - udm->vFuncs = &ToCPFuncs; - udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); - } - else { - /* dataFormat not recognized */ - *err=U_INVALID_FORMAT_ERROR; - } - - if (U_FAILURE(*err)) { - /* If the data is no good and we memory-mapped it ourselves, - * close the memory mapping so it doesn't leak. Note that this has - * no effect on non-memory mapped data, other than clearing fields in udm. - */ - udata_close(udm); - } -} - -/* - * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package - * header but not its sub-items. - * This function will be needed for automatic runtime swapping. - * Sub-items should not be swapped to limit the swapping to the parts of the - * package that are actually used. - * - * Since lengths of items are implicit in the order and offsets of their - * ToC entries, and since offsets are relative to the start of the ToC, - * a swapped version may need to generate a different data structure - * with pointers to the original data items and with their lengths - * (-1 for the last one if it is not known), and maybe even pointers to the - * swapped versions of the items. - * These pointers to swapped versions would establish a cache; - * instead, each open data item could simply own the storage for its swapped - * data. This fits better with the current design. - * - * markus 2003sep18 Jitterbug 2235 - */ diff --git a/deps/node/deps/icu-small/source/common/ucmndata.h b/deps/node/deps/icu-small/source/common/ucmndata.h deleted file mode 100644 index 16844414..00000000 --- a/deps/node/deps/icu-small/source/common/ucmndata.h +++ /dev/null @@ -1,117 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*---------------------------------------------------------------------------------- - * - * UCommonData An abstract interface for dealing with ICU Common Data Files. - * ICU Common Data Files are a grouping of a number of individual - * data items (resources, converters, tables, anything) into a - * single file or dll. The combined format includes a table of - * contents for locating the individual items by name. - * - * Two formats for the table of contents are supported, which is - * why there is an abstract inteface involved. - * - * These functions are part of the ICU internal implementation, and - * are not inteded to be used directly by applications. - */ - -#ifndef __UCMNDATA_H__ -#define __UCMNDATA_H__ - -#include "unicode/udata.h" -#include "umapfile.h" - - -#define COMMON_DATA_NAME U_ICUDATA_NAME - -typedef struct { - uint16_t headerSize; - uint8_t magic1; - uint8_t magic2; -} MappedData; - - -typedef struct { - MappedData dataHeader; - UDataInfo info; -} DataHeader; - -typedef struct { - uint32_t nameOffset; - uint32_t dataOffset; -} UDataOffsetTOCEntry; - -typedef struct { - uint32_t count; - /** - * Variable-length array declared with length 1 to disable bounds checkers. - * The actual array length is in the count field. - */ - UDataOffsetTOCEntry entry[1]; -} UDataOffsetTOC; - -/** - * Get the header size from a const DataHeader *udh. - * Handles opposite-endian data. - * - * @internal - */ -U_CFUNC uint16_t -udata_getHeaderSize(const DataHeader *udh); - -/** - * Get the UDataInfo.size from a const UDataInfo *info. - * Handles opposite-endian data. - * - * @internal - */ -U_CFUNC uint16_t -udata_getInfoSize(const UDataInfo *info); - -U_CDECL_BEGIN -/* - * "Virtual" functions for data lookup. - * To call one, given a UDataMemory *p, the code looks like this: - * p->vFuncs.Lookup(p, tocEntryName, pErrorCode); - * (I sure do wish this was written in C++, not C) - */ - -typedef const DataHeader * -(U_CALLCONV * LookupFn)(const UDataMemory *pData, - const char *tocEntryName, - int32_t *pLength, - UErrorCode *pErrorCode); - -typedef uint32_t -(U_CALLCONV * NumEntriesFn)(const UDataMemory *pData); - -U_CDECL_END - -typedef struct { - LookupFn Lookup; - NumEntriesFn NumEntries; -} commonDataFuncs; - - -/* - * Functions to check whether a UDataMemory refers to memory containing - * a recognizable header and table of contents a Common Data Format - * - * If a valid header and TOC are found, - * set the CommonDataFuncs function dispatch vector in the UDataMemory - * to point to the right functions for the TOC type. - * otherwise - * set an errorcode. - */ -U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv.cpp b/deps/node/deps/icu-small/source/common/ucnv.cpp deleted file mode 100644 index abf302ea..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv.cpp +++ /dev/null @@ -1,2910 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* ucnv.c: -* Implements APIs for the ICU's codeset conversion library; -* mostly calls through internal functions; -* created by Bertrand A. Damiba -* -* Modification History: -* -* Date Name Description -* 04/04/99 helena Fixed internal header inclusion. -* 05/09/00 helena Added implementation to handle fallback mappings. -* 06/20/2000 helena OS/400 port changes; mostly typecast. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ustring.h" -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" -#include "unicode/uset.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "putilimp.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" -#include "utracimp.h" -#include "ustr_imp.h" -#include "ucnv_imp.h" -#include "ucnv_cnv.h" -#include "ucnv_bld.h" - -/* size of intermediate and preflighting buffers in ucnv_convert() */ -#define CHUNK_SIZE 1024 - -typedef struct UAmbiguousConverter { - const char *name; - const UChar variant5c; -} UAmbiguousConverter; - -static const UAmbiguousConverter ambiguousConverters[]={ - { "ibm-897_P100-1995", 0xa5 }, - { "ibm-942_P120-1999", 0xa5 }, - { "ibm-943_P130-1999", 0xa5 }, - { "ibm-946_P100-1995", 0xa5 }, - { "ibm-33722_P120-1999", 0xa5 }, - { "ibm-1041_P100-1995", 0xa5 }, - /*{ "ibm-54191_P100-2006", 0xa5 },*/ - /*{ "ibm-62383_P100-2007", 0xa5 },*/ - /*{ "ibm-891_P100-1995", 0x20a9 },*/ - { "ibm-944_P100-1995", 0x20a9 }, - { "ibm-949_P110-1999", 0x20a9 }, - { "ibm-1363_P110-1997", 0x20a9 }, - { "ISO_2022,locale=ko,version=0", 0x20a9 }, - { "ibm-1088_P100-1995", 0x20a9 } -}; - -/*Calls through createConverter */ -U_CAPI UConverter* U_EXPORT2 -ucnv_open (const char *name, - UErrorCode * err) -{ - UConverter *r; - - if (err == NULL || U_FAILURE (*err)) { - return NULL; - } - - r = ucnv_createConverter(NULL, name, err); - return r; -} - -U_CAPI UConverter* U_EXPORT2 -ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) -{ - return ucnv_createConverterFromPackage(packageName, converterName, err); -} - -/*Extracts the UChar* to a char* and calls through createConverter */ -U_CAPI UConverter* U_EXPORT2 -ucnv_openU (const UChar * name, - UErrorCode * err) -{ - char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - - if (err == NULL || U_FAILURE(*err)) - return NULL; - if (name == NULL) - return ucnv_open (NULL, err); - if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - return ucnv_open(u_austrcpy(asciiName, name), err); -} - -/* Copy the string that is represented by the UConverterPlatform enum - * @param platformString An output buffer - * @param platform An enum representing a platform - * @return the length of the copied string. - */ -static int32_t -ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) -{ - switch (pltfrm) - { - case UCNV_IBM: - uprv_strcpy(platformString, "ibm-"); - return 4; - case UCNV_UNKNOWN: - break; - } - - /* default to empty string */ - *platformString = 0; - return 0; -} - -/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls - *through createConverter*/ -U_CAPI UConverter* U_EXPORT2 -ucnv_openCCSID (int32_t codepage, - UConverterPlatform platform, - UErrorCode * err) -{ - char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - int32_t myNameLen; - - if (err == NULL || U_FAILURE (*err)) - return NULL; - - /* ucnv_copyPlatformString could return "ibm-" or "cp" */ - myNameLen = ucnv_copyPlatformString(myName, platform); - T_CString_integerToString(myName + myNameLen, codepage, 10); - - return ucnv_createConverter(NULL, myName, err); -} - -/* Creating a temporary stack-based object that can be used in one thread, -and created from a converter that is shared across threads. -*/ - -U_CAPI UConverter* U_EXPORT2 -ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) -{ - UConverter *localConverter, *allocatedConverter; - int32_t stackBufferSize; - int32_t bufferSizeNeeded; - char *stackBufferChars = (char *)stackBuffer; - UErrorCode cbErr; - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - - UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); - - if (status == NULL || U_FAILURE(*status)){ - UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); - return NULL; - } - - if (cnv == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - UTRACE_EXIT_STATUS(*status); - return NULL; - } - - UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", - ucnv_getName(cnv, status), cnv, stackBuffer); - - if (cnv->sharedData->impl->safeClone != NULL) { - /* call the custom safeClone function for sizing */ - bufferSizeNeeded = 0; - cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); - if (U_FAILURE(*status)) { - UTRACE_EXIT_STATUS(*status); - return NULL; - } - } - else - { - /* inherent sizing */ - bufferSizeNeeded = sizeof(UConverter); - } - - if (pBufferSize == NULL) { - stackBufferSize = 1; - pBufferSize = &stackBufferSize; - } else { - stackBufferSize = *pBufferSize; - if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - UTRACE_EXIT_VALUE(bufferSizeNeeded); - return NULL; - } - } - - - /* Pointers on 64-bit platforms need to be aligned - * on a 64-bit boundary in memory. - */ - if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { - int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); - if(stackBufferSize > offsetUp) { - stackBufferSize -= offsetUp; - stackBufferChars += offsetUp; - } else { - /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ - stackBufferSize = 1; - } - } - - stackBuffer = (void *)stackBufferChars; - - /* Now, see if we must allocate any memory */ - if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) - { - /* allocate one here...*/ - localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); - - if(localConverter == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - UTRACE_EXIT_STATUS(*status); - return NULL; - } - *status = U_SAFECLONE_ALLOCATED_WARNING; - - /* record the fact that memory was allocated */ - *pBufferSize = bufferSizeNeeded; - } else { - /* just use the stack buffer */ - localConverter = (UConverter*) stackBuffer; - allocatedConverter = NULL; - } - - uprv_memset(localConverter, 0, bufferSizeNeeded); - - /* Copy initial state */ - uprv_memcpy(localConverter, cnv, sizeof(UConverter)); - localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; - - /* copy the substitution string */ - if (cnv->subChars == (uint8_t *)cnv->subUChars) { - localConverter->subChars = (uint8_t *)localConverter->subUChars; - } else { - localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - if (localConverter->subChars == NULL) { - uprv_free(allocatedConverter); - UTRACE_EXIT_STATUS(*status); - return NULL; - } - uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - } - - /* now either call the safeclone fcn or not */ - if (cnv->sharedData->impl->safeClone != NULL) { - /* call the custom safeClone function */ - localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); - } - - if(localConverter==NULL || U_FAILURE(*status)) { - if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { - uprv_free(allocatedConverter->subChars); - } - uprv_free(allocatedConverter); - UTRACE_EXIT_STATUS(*status); - return NULL; - } - - /* increment refcount of shared data if needed */ - if (cnv->sharedData->isReferenceCounted) { - ucnv_incrementRefCount(cnv->sharedData); - } - - if(localConverter == (UConverter*)stackBuffer) { - /* we're using user provided data - set to not destroy */ - localConverter->isCopyLocal = TRUE; - } - - /* allow callback functions to handle any memory allocation */ - toUArgs.converter = fromUArgs.converter = localConverter; - cbErr = U_ZERO_ERROR; - cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); - cbErr = U_ZERO_ERROR; - cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); - - UTRACE_EXIT_PTR_STATUS(localConverter, *status); - return localConverter; -} - - - -/*Decreases the reference counter in the shared immutable section of the object - *and frees the mutable part*/ - -U_CAPI void U_EXPORT2 -ucnv_close (UConverter * converter) -{ - UErrorCode errorCode = U_ZERO_ERROR; - - UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); - - if (converter == NULL) - { - UTRACE_EXIT(); - return; - } - - UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", - ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); - - /* In order to speed up the close, only call the callbacks when they have been changed. - This performance check will only work when the callbacks are set within a shared library - or from user code that statically links this code. */ - /* first, notify the callback functions that the converter is closed */ - if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - - toUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); - } - if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - fromUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); - } - - if (converter->sharedData->impl->close != NULL) { - converter->sharedData->impl->close(converter); - } - - if (converter->subChars != (uint8_t *)converter->subUChars) { - uprv_free(converter->subChars); - } - - if (converter->sharedData->isReferenceCounted) { - ucnv_unloadSharedDataIfReady(converter->sharedData); - } - - if(!converter->isCopyLocal){ - uprv_free(converter); - } - - UTRACE_EXIT(); -} - -/*returns a single Name from the list, will return NULL if out of bounds - */ -U_CAPI const char* U_EXPORT2 -ucnv_getAvailableName (int32_t n) -{ - if (0 <= n && n <= 0xffff) { - UErrorCode err = U_ZERO_ERROR; - const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); - if (U_SUCCESS(err)) { - return name; - } - } - return NULL; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_countAvailable () -{ - UErrorCode err = U_ZERO_ERROR; - return ucnv_bld_countAvailableConverters(&err); -} - -U_CAPI void U_EXPORT2 -ucnv_getSubstChars (const UConverter * converter, - char *mySubChar, - int8_t * len, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - - if (converter->subCharLen <= 0) { - /* Unicode string or empty string from ucnv_setSubstString(). */ - *len = 0; - return; - } - - if (*len < converter->subCharLen) /*not enough space in subChars */ - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - - uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ - *len = converter->subCharLen; /*store # of bytes copied to buffer */ -} - -U_CAPI void U_EXPORT2 -ucnv_setSubstChars (UConverter * converter, - const char *mySubChar, - int8_t len, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - - /*Makes sure that the subChar is within the codepages char length boundaries */ - if ((len > converter->sharedData->staticData->maxBytesPerChar) - || (len < converter->sharedData->staticData->minBytesPerChar)) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ - converter->subCharLen = len; /*sets the new len */ - - /* - * There is currently (2001Feb) no separate API to set/get subChar1. - * In order to always have subChar written after it is explicitly set, - * we set subChar1 to 0. - */ - converter->subChar1 = 0; - - return; -} - -U_CAPI void U_EXPORT2 -ucnv_setSubstString(UConverter *cnv, - const UChar *s, - int32_t length, - UErrorCode *err) { - UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; - char chars[UCNV_ERROR_BUFFER_LENGTH]; - - UConverter *clone; - uint8_t *subChars; - int32_t cloneSize, length8; - - /* Let the following functions check all arguments. */ - cloneSize = sizeof(cloneBuffer); - clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); - ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); - length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); - ucnv_close(clone); - if (U_FAILURE(*err)) { - return; - } - - if (cnv->sharedData->impl->writeSub == NULL -#if !UCONFIG_NO_LEGACY_CONVERSION - || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && - ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) -#endif - ) { - /* The converter is not stateful. Store the charset bytes as a fixed string. */ - subChars = (uint8_t *)chars; - } else { - /* - * The converter has a non-default writeSub() function, indicating - * that it is stateful. - * Store the Unicode string for on-the-fly conversion for correct - * state handling. - */ - if (length > UCNV_ERROR_BUFFER_LENGTH) { - /* - * Should not occur. The converter should output at least one byte - * per UChar, which means that ucnv_fromUChars() should catch all - * overflows. - */ - *err = U_BUFFER_OVERFLOW_ERROR; - return; - } - subChars = (uint8_t *)s; - if (length < 0) { - length = u_strlen(s); - } - length8 = length * U_SIZEOF_UCHAR; - } - - /* - * For storing the substitution string, select either the small buffer inside - * UConverter or allocate a subChars buffer. - */ - if (length8 > UCNV_MAX_SUBCHAR_LEN) { - /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ - if (cnv->subChars == (uint8_t *)cnv->subUChars) { - /* Allocate a new buffer for the string. */ - cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - if (cnv->subChars == NULL) { - cnv->subChars = (uint8_t *)cnv->subUChars; - *err = U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - } - } - - /* Copy the substitution string into the UConverter or its subChars buffer. */ - if (length8 == 0) { - cnv->subCharLen = 0; - } else { - uprv_memcpy(cnv->subChars, subChars, length8); - if (subChars == (uint8_t *)chars) { - cnv->subCharLen = (int8_t)length8; - } else /* subChars == s */ { - cnv->subCharLen = (int8_t)-length; - } - } - - /* See comment in ucnv_setSubstChars(). */ - cnv->subChar1 = 0; -} - -/*resets the internal states of a converter - *goal : have the same behaviour than a freshly created converter - */ -static void _reset(UConverter *converter, UConverterResetChoice choice, - UBool callCallback) { - if(converter == NULL) { - return; - } - - if(callCallback) { - /* first, notify the callback functions that the converter is reset */ - UErrorCode errorCode; - - if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - toUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); - } - if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - fromUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); - } - } - - /* now reset the converter itself */ - if(choice<=UCNV_RESET_TO_UNICODE) { - converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; - converter->mode = 0; - converter->toULength = 0; - converter->invalidCharLength = converter->UCharErrorBufferLength = 0; - converter->preToULength = 0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - converter->fromUnicodeStatus = 0; - converter->fromUChar32 = 0; - converter->invalidUCharLength = converter->charErrorBufferLength = 0; - converter->preFromUFirstCP = U_SENTINEL; - converter->preFromULength = 0; - } - - if (converter->sharedData->impl->reset != NULL) { - /* call the custom reset function */ - converter->sharedData->impl->reset(converter, choice); - } -} - -U_CAPI void U_EXPORT2 -ucnv_reset(UConverter *converter) -{ - _reset(converter, UCNV_RESET_BOTH, TRUE); -} - -U_CAPI void U_EXPORT2 -ucnv_resetToUnicode(UConverter *converter) -{ - _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); -} - -U_CAPI void U_EXPORT2 -ucnv_resetFromUnicode(UConverter *converter) -{ - _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); -} - -U_CAPI int8_t U_EXPORT2 -ucnv_getMaxCharSize (const UConverter * converter) -{ - return converter->maxBytesPerUChar; -} - - -U_CAPI int8_t U_EXPORT2 -ucnv_getMinCharSize (const UConverter * converter) -{ - return converter->sharedData->staticData->minBytesPerChar; -} - -U_CAPI const char* U_EXPORT2 -ucnv_getName (const UConverter * converter, UErrorCode * err) - -{ - if (U_FAILURE (*err)) - return NULL; - if(converter->sharedData->impl->getName){ - const char* temp= converter->sharedData->impl->getName(converter); - if(temp) - return temp; - } - return converter->sharedData->staticData->name; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_getCCSID(const UConverter * converter, - UErrorCode * err) -{ - int32_t ccsid; - if (U_FAILURE (*err)) - return -1; - - ccsid = converter->sharedData->staticData->codepage; - if (ccsid == 0) { - /* Rare case. This is for cases like gb18030, - which doesn't have an IBM canonical name, but does have an IBM alias. */ - const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); - if (U_SUCCESS(*err) && standardName) { - const char *ccsidStr = uprv_strchr(standardName, '-'); - if (ccsidStr) { - ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ - } - } - } - return ccsid; -} - - -U_CAPI UConverterPlatform U_EXPORT2 -ucnv_getPlatform (const UConverter * converter, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return UCNV_UNKNOWN; - - return (UConverterPlatform)converter->sharedData->staticData->platform; -} - -U_CAPI void U_EXPORT2 - ucnv_getToUCallBack (const UConverter * converter, - UConverterToUCallback *action, - const void **context) -{ - *action = converter->fromCharErrorBehaviour; - *context = converter->toUContext; -} - -U_CAPI void U_EXPORT2 - ucnv_getFromUCallBack (const UConverter * converter, - UConverterFromUCallback *action, - const void **context) -{ - *action = converter->fromUCharErrorBehaviour; - *context = converter->fromUContext; -} - -U_CAPI void U_EXPORT2 -ucnv_setToUCallBack (UConverter * converter, - UConverterToUCallback newAction, - const void* newContext, - UConverterToUCallback *oldAction, - const void** oldContext, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - if (oldAction) *oldAction = converter->fromCharErrorBehaviour; - converter->fromCharErrorBehaviour = newAction; - if (oldContext) *oldContext = converter->toUContext; - converter->toUContext = newContext; -} - -U_CAPI void U_EXPORT2 -ucnv_setFromUCallBack (UConverter * converter, - UConverterFromUCallback newAction, - const void* newContext, - UConverterFromUCallback *oldAction, - const void** oldContext, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; - converter->fromUCharErrorBehaviour = newAction; - if (oldContext) *oldContext = converter->fromUContext; - converter->fromUContext = newContext; -} - -static void -_updateOffsets(int32_t *offsets, int32_t length, - int32_t sourceIndex, int32_t errorInputLength) { - int32_t *limit; - int32_t delta, offset; - - if(sourceIndex>=0) { - /* - * adjust each offset by adding the previous sourceIndex - * minus the length of the input sequence that caused an - * error, if any - */ - delta=sourceIndex-errorInputLength; - } else { - /* - * set each offset to -1 because this conversion function - * does not handle offsets - */ - delta=-1; - } - - limit=offsets+length; - if(delta==0) { - /* most common case, nothing to do */ - } else if(delta>0) { - /* add the delta to each offset (but not if the offset is <0) */ - while(offsets=0) { - *offsets=offset+delta; - } - ++offsets; - } - } else /* delta<0 */ { - /* - * set each offset to -1 because this conversion function - * does not handle offsets - * or the error input sequence started in a previous buffer - */ - while(offsetsconverter; - s=pArgs->source; - t=pArgs->target; - offsets=pArgs->offsets; - - /* get the converter implementation function */ - sourceIndex=0; - if(offsets==NULL) { - fromUnicode=cnv->sharedData->impl->fromUnicode; - } else { - fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; - if(fromUnicode==NULL) { - /* there is no WithOffsets implementation */ - fromUnicode=cnv->sharedData->impl->fromUnicode; - /* we will write -1 for each offset */ - sourceIndex=-1; - } - } - - if(cnv->preFromULength>=0) { - /* normal mode */ - realSource=NULL; - - /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ - realSourceLimit=NULL; - realFlush=FALSE; - realSourceIndex=0; - } else { - /* - * Previous m:n conversion stored source units from a partial match - * and failed to consume all of them. - * We need to "replay" them from a temporary buffer and convert them first. - */ - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preFromULength; - pArgs->flush=FALSE; - sourceIndex=-1; - - cnv->preFromULength=0; - } - - /* - * loop for conversion and error handling - * - * loop { - * convert - * loop { - * update offsets - * handle end of input - * handle errors/call callback - * } - * } - */ - for(;;) { - if(U_SUCCESS(*err)) { - /* convert */ - fromUnicode(pArgs, err); - - /* - * set a flag for whether the converter - * successfully processed the end of the input - * - * need not check cnv->preFromULength==0 because a replay (<0) will cause - * sflush && pArgs->source==pArgs->sourceLimit && - cnv->fromUChar32==0); - } else { - /* handle error from ucnv_convertEx() */ - converterSawEndOfInput=FALSE; - } - - /* no callback called yet for this iteration */ - calledCallback=FALSE; - - /* no sourceIndex adjustment for conversion, only for callback output */ - errorInputLength=0; - - /* - * loop for offsets and error handling - * - * iterates at most 3 times: - * 1. to clean up after the conversion function - * 2. after the callback - * 3. after the callback again if there was truncated input - */ - for(;;) { - /* update offsets if we write any */ - if(offsets!=NULL) { - int32_t length=(int32_t)(pArgs->target-t); - if(length>0) { - _updateOffsets(offsets, length, sourceIndex, errorInputLength); - - /* - * if a converter handles offsets and updates the offsets - * pointer at the end, then pArgs->offset should not change - * here; - * however, some converters do not handle offsets at all - * (sourceIndex<0) or may not update the offsets pointer - */ - pArgs->offsets=offsets+=length; - } - - if(sourceIndex>=0) { - sourceIndex+=(int32_t)(pArgs->source-s); - } - } - - if(cnv->preFromULength<0) { - /* - * switch the source to new replay units (cannot occur while replaying) - * after offset handling and before end-of-input and callback handling - */ - if(realSource==NULL) { - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preFromULength; - pArgs->flush=FALSE; - if((sourceIndex+=cnv->preFromULength)<0) { - sourceIndex=-1; - } - - cnv->preFromULength=0; - } else { - /* see implementation note before _fromUnicodeWithCallback() */ - U_ASSERT(realSource==NULL); - *err=U_INTERNAL_PROGRAM_ERROR; - } - } - - /* update pointers */ - s=pArgs->source; - t=pArgs->target; - - if(U_SUCCESS(*err)) { - if(ssourceLimit) { - /* - * continue with the conversion loop while there is still input left - * (continue converting by breaking out of only the inner loop) - */ - break; - } else if(realSource!=NULL) { - /* switch back from replaying to the real source and continue */ - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - sourceIndex=realSourceIndex; - - realSource=NULL; - break; - } else if(pArgs->flush && cnv->fromUChar32!=0) { - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *err=U_TRUNCATED_CHAR_FOUND; - calledCallback=FALSE; /* new error condition */ - } else { - /* input consumed */ - if(pArgs->flush) { - /* - * return to the conversion loop once more if the flush - * flag is set and the conversion function has not - * successfully processed the end of the input yet - * - * (continue converting by breaking out of only the inner loop) - */ - if(!converterSawEndOfInput) { - break; - } - - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); - } - - /* done successfully */ - return; - } - } - - /* U_FAILURE(*err) */ - { - UErrorCode e; - - if( calledCallback || - (e=*err)==U_BUFFER_OVERFLOW_ERROR || - (e!=U_INVALID_CHAR_FOUND && - e!=U_ILLEGAL_CHAR_FOUND && - e!=U_TRUNCATED_CHAR_FOUND) - ) { - /* - * the callback did not or cannot resolve the error: - * set output pointers and return - * - * the check for buffer overflow is redundant but it is - * a high-runner case and hopefully documents the intent - * well - * - * if we were replaying, then the replay buffer must be - * copied back into the UConverter - * and the real arguments must be restored - */ - if(realSource!=NULL) { - int32_t length; - - U_ASSERT(cnv->preFromULength==0); - - length=(int32_t)(pArgs->sourceLimit-pArgs->source); - if(length>0) { - u_memcpy(cnv->preFromU, pArgs->source, length); - cnv->preFromULength=(int8_t)-length; - } - - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - } - - return; - } - } - - /* callback handling */ - { - UChar32 codePoint; - - /* get and write the code point */ - codePoint=cnv->fromUChar32; - errorInputLength=0; - U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); - cnv->invalidUCharLength=(int8_t)errorInputLength; - - /* set the converter state to deal with the next character */ - cnv->fromUChar32=0; - - /* call the callback function */ - cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, - cnv->invalidUCharBuffer, errorInputLength, codePoint, - *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, - err); - } - - /* - * loop back to the offset handling - * - * this flag will indicate after offset handling - * that a callback was called; - * if the callback did not resolve the error, then we return - */ - calledCallback=TRUE; - } - } -} - -/* - * Output the fromUnicode overflow buffer. - * Call this function if(cnv->charErrorBufferLength>0). - * @return TRUE if overflow - */ -static UBool -ucnv_outputOverflowFromUnicode(UConverter *cnv, - char **target, const char *targetLimit, - int32_t **pOffsets, - UErrorCode *err) { - int32_t *offsets; - char *overflow, *t; - int32_t i, length; - - t=*target; - if(pOffsets!=NULL) { - offsets=*pOffsets; - } else { - offsets=NULL; - } - - overflow=(char *)cnv->charErrorBuffer; - length=cnv->charErrorBufferLength; - i=0; - while(icharErrorBufferLength=(int8_t)j; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - *err=U_BUFFER_OVERFLOW_ERROR; - return TRUE; - } - - /* copy the overflow contents to the target */ - *t++=overflow[i++]; - if(offsets!=NULL) { - *offsets++=-1; /* no source index available for old output */ - } - } - - /* the overflow buffer is completely copied to the target */ - cnv->charErrorBufferLength=0; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - return FALSE; -} - -U_CAPI void U_EXPORT2 -ucnv_fromUnicode(UConverter *cnv, - char **target, const char *targetLimit, - const UChar **source, const UChar *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err) { - UConverterFromUnicodeArgs args; - const UChar *s; - char *t; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return; - } - - if(cnv==NULL || target==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - - if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { - /* - Prevent code from going into an infinite loop in case we do hit this - limit. The limit pointer is expected to be on a UChar * boundary. - This also prevents the next argument check from failing. - */ - sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); - } - - /* - * All these conditions should never happen. - * - * 1) Make sure that the limits are >= to the address source or target - * - * 2) Make sure that the buffer sizes do not exceed the number range for - * int32_t because some functions use the size (in units or bytes) - * rather than comparing pointers, and because offsets are int32_t values. - * - * size_t is guaranteed to be unsigned and large enough for the job. - * - * Return with an error instead of adjusting the limits because we would - * not be able to maintain the semantics that either the source must be - * consumed or the target filled (unless an error occurs). - * An adjustment would be targetLimit=t+0x7fffffff; for example. - * - * 3) Make sure that the user didn't incorrectly cast a UChar * pointer - * to a char * pointer and provide an incomplete UChar code unit. - */ - if (sourceLimit(size_t)0x3fffffff && sourceLimit>s) || - ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || - (((const char *)sourceLimit-(const char *)s) & 1) != 0) - { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* output the target overflow buffer */ - if( cnv->charErrorBufferLength>0 && - ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) - ) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target may have moved, therefore stop using t */ - - if(!flush && s==sourceLimit && cnv->preFromULength>=0) { - /* the overflow buffer is emptied and there is no new input: we are done */ - return; - } - - /* - * Do not simply return with a buffer overflow error if - * !flush && t==targetLimit - * because it is possible that the source will not generate any output. - * For example, the skip callback may be called; - * it does not output anything. - */ - - /* prepare the converter arguments */ - args.converter=cnv; - args.flush=flush; - args.offsets=offsets; - args.source=s; - args.sourceLimit=sourceLimit; - args.target=*target; - args.targetLimit=targetLimit; - args.size=sizeof(args); - - _fromUnicodeWithCallback(&args, err); - - *source=args.source; - *target=args.target; -} - -/* ucnv_toUnicode() --------------------------------------------------------- */ - -static void -_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { - UConverterToUnicode toUnicode; - UConverter *cnv; - const char *s; - UChar *t; - int32_t *offsets; - int32_t sourceIndex; - int32_t errorInputLength; - UBool converterSawEndOfInput, calledCallback; - - /* variables for m:n conversion */ - char replay[UCNV_EXT_MAX_BYTES]; - const char *realSource, *realSourceLimit; - int32_t realSourceIndex; - UBool realFlush; - - cnv=pArgs->converter; - s=pArgs->source; - t=pArgs->target; - offsets=pArgs->offsets; - - /* get the converter implementation function */ - sourceIndex=0; - if(offsets==NULL) { - toUnicode=cnv->sharedData->impl->toUnicode; - } else { - toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; - if(toUnicode==NULL) { - /* there is no WithOffsets implementation */ - toUnicode=cnv->sharedData->impl->toUnicode; - /* we will write -1 for each offset */ - sourceIndex=-1; - } - } - - if(cnv->preToULength>=0) { - /* normal mode */ - realSource=NULL; - - /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ - realSourceLimit=NULL; - realFlush=FALSE; - realSourceIndex=0; - } else { - /* - * Previous m:n conversion stored source units from a partial match - * and failed to consume all of them. - * We need to "replay" them from a temporary buffer and convert them first. - */ - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preToULength; - pArgs->flush=FALSE; - sourceIndex=-1; - - cnv->preToULength=0; - } - - /* - * loop for conversion and error handling - * - * loop { - * convert - * loop { - * update offsets - * handle end of input - * handle errors/call callback - * } - * } - */ - for(;;) { - if(U_SUCCESS(*err)) { - /* convert */ - toUnicode(pArgs, err); - - /* - * set a flag for whether the converter - * successfully processed the end of the input - * - * need not check cnv->preToULength==0 because a replay (<0) will cause - * sflush && pArgs->source==pArgs->sourceLimit && - cnv->toULength==0); - } else { - /* handle error from getNextUChar() or ucnv_convertEx() */ - converterSawEndOfInput=FALSE; - } - - /* no callback called yet for this iteration */ - calledCallback=FALSE; - - /* no sourceIndex adjustment for conversion, only for callback output */ - errorInputLength=0; - - /* - * loop for offsets and error handling - * - * iterates at most 3 times: - * 1. to clean up after the conversion function - * 2. after the callback - * 3. after the callback again if there was truncated input - */ - for(;;) { - /* update offsets if we write any */ - if(offsets!=NULL) { - int32_t length=(int32_t)(pArgs->target-t); - if(length>0) { - _updateOffsets(offsets, length, sourceIndex, errorInputLength); - - /* - * if a converter handles offsets and updates the offsets - * pointer at the end, then pArgs->offset should not change - * here; - * however, some converters do not handle offsets at all - * (sourceIndex<0) or may not update the offsets pointer - */ - pArgs->offsets=offsets+=length; - } - - if(sourceIndex>=0) { - sourceIndex+=(int32_t)(pArgs->source-s); - } - } - - if(cnv->preToULength<0) { - /* - * switch the source to new replay units (cannot occur while replaying) - * after offset handling and before end-of-input and callback handling - */ - if(realSource==NULL) { - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preToULength; - pArgs->flush=FALSE; - if((sourceIndex+=cnv->preToULength)<0) { - sourceIndex=-1; - } - - cnv->preToULength=0; - } else { - /* see implementation note before _fromUnicodeWithCallback() */ - U_ASSERT(realSource==NULL); - *err=U_INTERNAL_PROGRAM_ERROR; - } - } - - /* update pointers */ - s=pArgs->source; - t=pArgs->target; - - if(U_SUCCESS(*err)) { - if(ssourceLimit) { - /* - * continue with the conversion loop while there is still input left - * (continue converting by breaking out of only the inner loop) - */ - break; - } else if(realSource!=NULL) { - /* switch back from replaying to the real source and continue */ - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - sourceIndex=realSourceIndex; - - realSource=NULL; - break; - } else if(pArgs->flush && cnv->toULength>0) { - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *err=U_TRUNCATED_CHAR_FOUND; - calledCallback=FALSE; /* new error condition */ - } else { - /* input consumed */ - if(pArgs->flush) { - /* - * return to the conversion loop once more if the flush - * flag is set and the conversion function has not - * successfully processed the end of the input yet - * - * (continue converting by breaking out of only the inner loop) - */ - if(!converterSawEndOfInput) { - break; - } - - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); - } - - /* done successfully */ - return; - } - } - - /* U_FAILURE(*err) */ - { - UErrorCode e; - - if( calledCallback || - (e=*err)==U_BUFFER_OVERFLOW_ERROR || - (e!=U_INVALID_CHAR_FOUND && - e!=U_ILLEGAL_CHAR_FOUND && - e!=U_TRUNCATED_CHAR_FOUND && - e!=U_ILLEGAL_ESCAPE_SEQUENCE && - e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) - ) { - /* - * the callback did not or cannot resolve the error: - * set output pointers and return - * - * the check for buffer overflow is redundant but it is - * a high-runner case and hopefully documents the intent - * well - * - * if we were replaying, then the replay buffer must be - * copied back into the UConverter - * and the real arguments must be restored - */ - if(realSource!=NULL) { - int32_t length; - - U_ASSERT(cnv->preToULength==0); - - length=(int32_t)(pArgs->sourceLimit-pArgs->source); - if(length>0) { - uprv_memcpy(cnv->preToU, pArgs->source, length); - cnv->preToULength=(int8_t)-length; - } - - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - } - - return; - } - } - - /* copy toUBytes[] to invalidCharBuffer[] */ - errorInputLength=cnv->invalidCharLength=cnv->toULength; - if(errorInputLength>0) { - uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); - } - - /* set the converter state to deal with the next character */ - cnv->toULength=0; - - /* call the callback function */ - if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { - cnv->toUCallbackReason = UCNV_UNASSIGNED; - } - cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, - cnv->invalidCharBuffer, errorInputLength, - cnv->toUCallbackReason, - err); - cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ - - /* - * loop back to the offset handling - * - * this flag will indicate after offset handling - * that a callback was called; - * if the callback did not resolve the error, then we return - */ - calledCallback=TRUE; - } - } -} - -/* - * Output the toUnicode overflow buffer. - * Call this function if(cnv->UCharErrorBufferLength>0). - * @return TRUE if overflow - */ -static UBool -ucnv_outputOverflowToUnicode(UConverter *cnv, - UChar **target, const UChar *targetLimit, - int32_t **pOffsets, - UErrorCode *err) { - int32_t *offsets; - UChar *overflow, *t; - int32_t i, length; - - t=*target; - if(pOffsets!=NULL) { - offsets=*pOffsets; - } else { - offsets=NULL; - } - - overflow=cnv->UCharErrorBuffer; - length=cnv->UCharErrorBufferLength; - i=0; - while(iUCharErrorBufferLength=(int8_t)j; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - *err=U_BUFFER_OVERFLOW_ERROR; - return TRUE; - } - - /* copy the overflow contents to the target */ - *t++=overflow[i++]; - if(offsets!=NULL) { - *offsets++=-1; /* no source index available for old output */ - } - } - - /* the overflow buffer is completely copied to the target */ - cnv->UCharErrorBufferLength=0; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - return FALSE; -} - -U_CAPI void U_EXPORT2 -ucnv_toUnicode(UConverter *cnv, - UChar **target, const UChar *targetLimit, - const char **source, const char *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err) { - UConverterToUnicodeArgs args; - const char *s; - UChar *t; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return; - } - - if(cnv==NULL || target==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - - if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { - /* - Prevent code from going into an infinite loop in case we do hit this - limit. The limit pointer is expected to be on a UChar * boundary. - This also prevents the next argument check from failing. - */ - targetLimit = (const UChar *)(((const char *)targetLimit) - 1); - } - - /* - * All these conditions should never happen. - * - * 1) Make sure that the limits are >= to the address source or target - * - * 2) Make sure that the buffer sizes do not exceed the number range for - * int32_t because some functions use the size (in units or bytes) - * rather than comparing pointers, and because offsets are int32_t values. - * - * size_t is guaranteed to be unsigned and large enough for the job. - * - * Return with an error instead of adjusting the limits because we would - * not be able to maintain the semantics that either the source must be - * consumed or the target filled (unless an error occurs). - * An adjustment would be sourceLimit=t+0x7fffffff; for example. - * - * 3) Make sure that the user didn't incorrectly cast a UChar * pointer - * to a char * pointer and provide an incomplete UChar code unit. - */ - if (sourceLimit(size_t)0x7fffffff && sourceLimit>s) || - ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || - (((const char *)targetLimit-(const char *)t) & 1) != 0 - ) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* output the target overflow buffer */ - if( cnv->UCharErrorBufferLength>0 && - ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) - ) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target may have moved, therefore stop using t */ - - if(!flush && s==sourceLimit && cnv->preToULength>=0) { - /* the overflow buffer is emptied and there is no new input: we are done */ - return; - } - - /* - * Do not simply return with a buffer overflow error if - * !flush && t==targetLimit - * because it is possible that the source will not generate any output. - * For example, the skip callback may be called; - * it does not output anything. - */ - - /* prepare the converter arguments */ - args.converter=cnv; - args.flush=flush; - args.offsets=offsets; - args.source=s; - args.sourceLimit=sourceLimit; - args.target=*target; - args.targetLimit=targetLimit; - args.size=sizeof(args); - - _toUnicodeWithCallback(&args, err); - - *source=args.source; - *target=args.target; -} - -/* ucnv_to/fromUChars() ----------------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ucnv_fromUChars(UConverter *cnv, - char *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - const UChar *srcLimit; - char *originalDest, *destLimit; - int32_t destLength; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || - destCapacity<0 || (destCapacity>0 && dest==NULL) || - srcLength<-1 || (srcLength!=0 && src==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* initialize */ - ucnv_resetFromUnicode(cnv); - originalDest=dest; - if(srcLength==-1) { - srcLength=u_strlen(src); - } - if(srcLength>0) { - srcLimit=src+srcLength; - destCapacity=pinCapacity(dest, destCapacity); - destLimit=dest+destCapacity; - - /* perform the conversion */ - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength=(int32_t)(dest-originalDest); - - /* if an overflow occurs, then get the preflighting length */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - char buffer[1024]; - - destLimit=buffer+sizeof(buffer); - do { - dest=buffer; - *pErrorCode=U_ZERO_ERROR; - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength+=(int32_t)(dest-buffer); - } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); - } - } else { - destLength=0; - } - - return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toUChars(UConverter *cnv, - UChar *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode) { - const char *srcLimit; - UChar *originalDest, *destLimit; - int32_t destLength; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || - destCapacity<0 || (destCapacity>0 && dest==NULL) || - srcLength<-1 || (srcLength!=0 && src==NULL)) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* initialize */ - ucnv_resetToUnicode(cnv); - originalDest=dest; - if(srcLength==-1) { - srcLength=(int32_t)uprv_strlen(src); - } - if(srcLength>0) { - srcLimit=src+srcLength; - destCapacity=pinCapacity(dest, destCapacity); - destLimit=dest+destCapacity; - - /* perform the conversion */ - ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength=(int32_t)(dest-originalDest); - - /* if an overflow occurs, then get the preflighting length */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) - { - UChar buffer[1024]; - - destLimit=buffer+UPRV_LENGTHOF(buffer); - do { - dest=buffer; - *pErrorCode=U_ZERO_ERROR; - ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength+=(int32_t)(dest-buffer); - } - while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); - } - } else { - destLength=0; - } - - return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); -} - -/* ucnv_getNextUChar() ------------------------------------------------------ */ - -U_CAPI UChar32 U_EXPORT2 -ucnv_getNextUChar(UConverter *cnv, - const char **source, const char *sourceLimit, - UErrorCode *err) { - UConverterToUnicodeArgs args; - UChar buffer[U16_MAX_LENGTH]; - const char *s; - UChar32 c; - int32_t i, length; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return 0xffff; - } - - if(cnv==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - - s=*source; - if(sourceLimit(size_t)0x7fffffff && sourceLimit>s)) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - - c=U_SENTINEL; - - /* flush the target overflow buffer */ - if(cnv->UCharErrorBufferLength>0) { - UChar *overflow; - - overflow=cnv->UCharErrorBuffer; - i=0; - length=cnv->UCharErrorBufferLength; - U16_NEXT(overflow, i, length, c); - - /* move the remaining overflow contents up to the beginning */ - if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { - uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, - cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); - } - - if(!U16_IS_LEAD(c) || itoULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { - c=cnv->sharedData->impl->getNextUChar(&args, err); - *source=s=args.source; - if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); - return 0xffff; /* no output */ - } else if(U_SUCCESS(*err) && c>=0) { - return c; - /* - * else fall through to use _toUnicode() because - * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all - * U_FAILURE: call _toUnicode() for callback handling (do not output c) - */ - } - } - - /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ - _toUnicodeWithCallback(&args, err); - - if(*err==U_BUFFER_OVERFLOW_ERROR) { - *err=U_ZERO_ERROR; - } - - i=0; - length=(int32_t)(args.target-buffer); - } else { - /* write the lead surrogate from the overflow buffer */ - buffer[0]=(UChar)c; - args.target=buffer+1; - i=0; - length=1; - } - - /* buffer contents starts at i and ends before length */ - - if(U_FAILURE(*err)) { - c=0xffff; /* no output */ - } else if(length==0) { - /* no input or only state changes */ - *err=U_INDEX_OUTOFBOUNDS_ERROR; - /* no need to reset explicitly because _toUnicodeWithCallback() did it */ - c=0xffff; /* no output */ - } else { - c=buffer[0]; - i=1; - if(!U16_IS_LEAD(c)) { - /* consume c=buffer[0], done */ - } else { - /* got a lead surrogate, see if a trail surrogate follows */ - UChar c2; - - if(cnv->UCharErrorBufferLength>0) { - /* got overflow output from the conversion */ - if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { - /* got a trail surrogate, too */ - c=U16_GET_SUPPLEMENTARY(c, c2); - - /* move the remaining overflow contents up to the beginning */ - if((--cnv->UCharErrorBufferLength)>0) { - uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, - cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); - } - } else { - /* c is an unpaired lead surrogate, just return it */ - } - } else if(args.sourceUCharErrorBufferLength)>0) { - uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, - length*U_SIZEOF_UCHAR); - } - cnv->UCharErrorBufferLength=(int8_t)(length+delta); - - cnv->UCharErrorBuffer[0]=buffer[i++]; - if(delta>1) { - cnv->UCharErrorBuffer[1]=buffer[i]; - } - } - - *source=args.source; - return c; -} - -/* ucnv_convert() and siblings ---------------------------------------------- */ - -U_CAPI void U_EXPORT2 -ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, - char **target, const char *targetLimit, - const char **source, const char *sourceLimit, - UChar *pivotStart, UChar **pivotSource, - UChar **pivotTarget, const UChar *pivotLimit, - UBool reset, UBool flush, - UErrorCode *pErrorCode) { - UChar pivotBuffer[CHUNK_SIZE]; - const UChar *myPivotSource; - UChar *myPivotTarget; - const char *s; - char *t; - - UConverterToUnicodeArgs toUArgs; - UConverterFromUnicodeArgs fromUArgs; - UConverterConvert convert; - - /* error checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - - if( targetCnv==NULL || sourceCnv==NULL || - source==NULL || *source==NULL || - target==NULL || *target==NULL || targetLimit==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - if((sourceLimit!=NULL && sourceLimit(size_t)0x7fffffff && sourceLimit>s)) || - ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(pivotStart==NULL) { - if(!flush) { - /* streaming conversion requires an explicit pivot buffer */ - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* use the stack pivot buffer */ - myPivotSource=myPivotTarget=pivotStart=pivotBuffer; - pivotSource=(UChar **)&myPivotSource; - pivotTarget=&myPivotTarget; - pivotLimit=pivotBuffer+CHUNK_SIZE; - } else if( pivotStart>=pivotLimit || - pivotSource==NULL || *pivotSource==NULL || - pivotTarget==NULL || *pivotTarget==NULL || - pivotLimit==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(sourceLimit==NULL) { - /* get limit of single-byte-NUL-terminated source string */ - sourceLimit=uprv_strchr(*source, 0); - } - - if(reset) { - ucnv_resetToUnicode(sourceCnv); - ucnv_resetFromUnicode(targetCnv); - *pivotSource=*pivotTarget=pivotStart; - } else if(targetCnv->charErrorBufferLength>0) { - /* output the targetCnv overflow buffer */ - if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target has moved, therefore stop using t */ - - if( !flush && - targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && - sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit - ) { - /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ - return; - } - } - - /* Is direct-UTF-8 conversion available? */ - if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && - targetCnv->sharedData->impl->fromUTF8!=NULL - ) { - convert=targetCnv->sharedData->impl->fromUTF8; - } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && - sourceCnv->sharedData->impl->toUTF8!=NULL - ) { - convert=sourceCnv->sharedData->impl->toUTF8; - } else { - convert=NULL; - } - - /* - * If direct-UTF-8 conversion is available, then we use a smaller - * pivot buffer for error handling and partial matches - * so that we quickly return to direct conversion. - * - * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. - * - * We could reduce the pivot buffer size further, at the cost of - * buffer overflows from callbacks. - * The pivot buffer should not be smaller than the maximum number of - * fromUnicode extension table input UChars - * (for m:n conversion, see - * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) - * or 2 for surrogate pairs. - * - * Too small a buffer can cause thrashing between pivoting and direct - * conversion, with function call overhead outweighing the benefits - * of direct conversion. - */ - if(convert!=NULL && (pivotLimit-pivotStart)>32) { - pivotLimit=pivotStart+32; - } - - /* prepare the converter arguments */ - fromUArgs.converter=targetCnv; - fromUArgs.flush=FALSE; - fromUArgs.offsets=NULL; - fromUArgs.target=*target; - fromUArgs.targetLimit=targetLimit; - fromUArgs.size=sizeof(fromUArgs); - - toUArgs.converter=sourceCnv; - toUArgs.flush=flush; - toUArgs.offsets=NULL; - toUArgs.source=s; - toUArgs.sourceLimit=sourceLimit; - toUArgs.targetLimit=pivotLimit; - toUArgs.size=sizeof(toUArgs); - - /* - * TODO: Consider separating this function into two functions, - * extracting exactly the conversion loop, - * for readability and to reduce the set of visible variables. - * - * Otherwise stop using s and t from here on. - */ - s=t=NULL; - - /* - * conversion loop - * - * The sequence of steps in the loop may appear backward, - * but the principle is simple: - * In the chain of - * source - sourceCnv overflow - pivot - targetCnv overflow - target - * empty out later buffers before refilling them from earlier ones. - * - * The targetCnv overflow buffer is flushed out only once before the loop. - */ - for(;;) { - /* - * if(pivot not empty or error or replay or flush fromUnicode) { - * fromUnicode(pivot -> target); - * } - * - * For pivoting conversion; and for direct conversion for - * error callback handling and flushing the replay buffer. - */ - if( *pivotSource<*pivotTarget || - U_FAILURE(*pErrorCode) || - targetCnv->preFromULength<0 || - fromUArgs.flush - ) { - fromUArgs.source=*pivotSource; - fromUArgs.sourceLimit=*pivotTarget; - _fromUnicodeWithCallback(&fromUArgs, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - /* target overflow, or conversion error */ - *pivotSource=(UChar *)fromUArgs.source; - break; - } - - /* - * _fromUnicodeWithCallback() must have consumed the pivot contents - * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() - */ - } - - /* The pivot buffer is empty; reset it so we start at pivotStart. */ - *pivotSource=*pivotTarget=pivotStart; - - /* - * if(sourceCnv overflow buffer not empty) { - * move(sourceCnv overflow buffer -> pivot); - * continue; - * } - */ - /* output the sourceCnv overflow buffer */ - if(sourceCnv->UCharErrorBufferLength>0) { - if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { - /* U_BUFFER_OVERFLOW_ERROR */ - *pErrorCode=U_ZERO_ERROR; - } - continue; - } - - /* - * check for end of input and break if done - * - * Checking both flush and fromUArgs.flush ensures that the converters - * have been called with the flush flag set if the ucnv_convertEx() - * caller set it. - */ - if( toUArgs.source==sourceLimit && - sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && - (!flush || fromUArgs.flush) - ) { - /* done successfully */ - break; - } - - /* - * use direct conversion if available - * but not if continuing a partial match - * or flushing the toUnicode replay buffer - */ - if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { - if(*pErrorCode==U_USING_DEFAULT_WARNING) { - /* remove a warning that may be set by this function */ - *pErrorCode=U_ZERO_ERROR; - } - convert(&fromUArgs, &toUArgs, pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - break; - } else if(U_FAILURE(*pErrorCode)) { - if(sourceCnv->toULength>0) { - /* - * Fall through to calling _toUnicodeWithCallback() - * for callback handling. - * - * The pivot buffer will be reset with - * *pivotSource=*pivotTarget=pivotStart; - * which indicates a toUnicode error to the caller - * (*pivotSource==pivotStart shows no pivot UChars consumed). - */ - } else { - /* - * Indicate a fromUnicode error to the caller - * (*pivotSource>pivotStart shows some pivot UChars consumed). - */ - *pivotSource=*pivotTarget=pivotStart+1; - /* - * Loop around to calling _fromUnicodeWithCallbacks() - * for callback handling. - */ - continue; - } - } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { - /* - * No error, but the implementation requested to temporarily - * fall back to pivoting. - */ - *pErrorCode=U_ZERO_ERROR; - /* - * The following else branches are almost identical to the end-of-input - * handling in _toUnicodeWithCallback(). - * Avoid calling it just for the end of input. - */ - } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } else { - /* input consumed */ - if(flush) { - /* reset the converters without calling the callback functions */ - _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); - _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); - } - - /* done successfully */ - break; - } - } - - /* - * toUnicode(source -> pivot); - * - * For pivoting conversion; and for direct conversion for - * error callback handling, continuing partial matches - * and flushing the replay buffer. - * - * The pivot buffer is empty and reset. - */ - toUArgs.target=pivotStart; /* ==*pivotTarget */ - /* toUArgs.targetLimit=pivotLimit; already set before the loop */ - _toUnicodeWithCallback(&toUArgs, pErrorCode); - *pivotTarget=toUArgs.target; - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - /* pivot overflow: continue with the conversion loop */ - *pErrorCode=U_ZERO_ERROR; - } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { - /* conversion error, or there was nothing left to convert */ - break; - } - /* - * else: - * _toUnicodeWithCallback() wrote into the pivot buffer, - * continue with fromUnicode conversion. - * - * Set the fromUnicode flush flag if we flush and if toUnicode has - * processed the end of the input. - */ - if( flush && toUArgs.source==sourceLimit && - sourceCnv->preToULength>=0 && - sourceCnv->UCharErrorBufferLength==0 - ) { - fromUArgs.flush=TRUE; - } - } - - /* - * The conversion loop is exited when one of the following is true: - * - the entire source text has been converted successfully to the target buffer - * - a target buffer overflow occurred - * - a conversion error occurred - */ - - *source=toUArgs.source; - *target=fromUArgs.target; - - /* terminate the target buffer if possible */ - if(flush && U_SUCCESS(*pErrorCode)) { - if(*target!=targetLimit) { - **target=0; - if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { - *pErrorCode=U_ZERO_ERROR; - } - } else { - *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; - } - } -} - -/* internal implementation of ucnv_convert() etc. with preflighting */ -static int32_t -ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UChar pivotBuffer[CHUNK_SIZE]; - UChar *pivot, *pivot2; - - char *myTarget; - const char *sourceLimit; - const char *targetLimit; - int32_t targetLength=0; - - /* set up */ - if(sourceLength<0) { - sourceLimit=uprv_strchr(source, 0); - } else { - sourceLimit=source+sourceLength; - } - - /* if there is no input data, we're done */ - if(source==sourceLimit) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - pivot=pivot2=pivotBuffer; - myTarget=target; - targetLength=0; - - if(targetCapacity>0) { - /* perform real conversion */ - targetLimit=target+targetCapacity; - ucnv_convertEx(outConverter, inConverter, - &myTarget, targetLimit, - &source, sourceLimit, - pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, - FALSE, - TRUE, - pErrorCode); - targetLength=(int32_t)(myTarget-target); - } - - /* - * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing - * to it but continue the conversion in order to store in targetCapacity - * the number of bytes that was required. - */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) - { - char targetBuffer[CHUNK_SIZE]; - - targetLimit=targetBuffer+CHUNK_SIZE; - do { - *pErrorCode=U_ZERO_ERROR; - myTarget=targetBuffer; - ucnv_convertEx(outConverter, inConverter, - &myTarget, targetLimit, - &source, sourceLimit, - pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, - FALSE, - TRUE, - pErrorCode); - targetLength+=(int32_t)(myTarget-targetBuffer); - } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); - - /* done with preflighting, set warnings and errors as appropriate */ - return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); - } - - /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ - return targetLength; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_convert(const char *toConverterName, const char *fromConverterName, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UConverter in, out; /* stack-allocated */ - UConverter *inConverter, *outConverter; - int32_t targetLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( source==NULL || sourceLength<-1 || - targetCapacity<0 || (targetCapacity>0 && target==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* if there is no input data, we're done */ - if(sourceLength==0 || (sourceLength<0 && *source==0)) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - /* create the converters */ - inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - ucnv_close(inConverter); - return 0; - } - - targetLength=ucnv_internalConvert(outConverter, inConverter, - target, targetCapacity, - source, sourceLength, - pErrorCode); - - ucnv_close(inConverter); - ucnv_close(outConverter); - - return targetLength; -} - -/* @internal */ -static int32_t -ucnv_convertAlgorithmic(UBool convertToAlgorithmic, - UConverterType algorithmicType, - UConverter *cnv, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UConverter algoConverterStatic; /* stack-allocated */ - UConverter *algoConverter, *to, *from; - int32_t targetLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || source==NULL || sourceLength<-1 || - targetCapacity<0 || (targetCapacity>0 && target==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* if there is no input data, we're done */ - if(sourceLength==0 || (sourceLength<0 && *source==0)) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - /* create the algorithmic converter */ - algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, - "", 0, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* reset the other converter */ - if(convertToAlgorithmic) { - /* cnv->Unicode->algo */ - ucnv_resetToUnicode(cnv); - to=algoConverter; - from=cnv; - } else { - /* algo->Unicode->cnv */ - ucnv_resetFromUnicode(cnv); - from=algoConverter; - to=cnv; - } - - targetLength=ucnv_internalConvert(to, from, - target, targetCapacity, - source, sourceLength, - pErrorCode); - - ucnv_close(algoConverter); - - return targetLength; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toAlgorithmic(UConverterType algorithmicType, - UConverter *cnv, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, - target, targetCapacity, - source, sourceLength, - pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucnv_fromAlgorithmic(UConverter *cnv, - UConverterType algorithmicType, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, - target, targetCapacity, - source, sourceLength, - pErrorCode); -} - -U_CAPI UConverterType U_EXPORT2 -ucnv_getType(const UConverter* converter) -{ - int8_t type = converter->sharedData->staticData->conversionType; -#if !UCONFIG_NO_LEGACY_CONVERSION - if(type == UCNV_MBCS) { - return ucnv_MBCSGetType(converter); - } -#endif - return (UConverterType)type; -} - -U_CAPI void U_EXPORT2 -ucnv_getStarters(const UConverter* converter, - UBool starters[256], - UErrorCode* err) -{ - if (err == NULL || U_FAILURE(*err)) { - return; - } - - if(converter->sharedData->impl->getStarters != NULL) { - converter->sharedData->impl->getStarters(converter, starters, err); - } else { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) -{ - UErrorCode errorCode; - const char *name; - int32_t i; - - if(cnv==NULL) { - return NULL; - } - - errorCode=U_ZERO_ERROR; - name=ucnv_getName(cnv, &errorCode); - if(U_FAILURE(errorCode)) { - return NULL; - } - - for(i=0; ivariant5c; - for(i=0; iuseFallback = usesFallback; -} - -U_CAPI UBool U_EXPORT2 -ucnv_usesFallback(const UConverter *cnv) -{ - return cnv->useFallback; -} - -U_CAPI void U_EXPORT2 -ucnv_getInvalidChars (const UConverter * converter, - char *errBytes, - int8_t * len, - UErrorCode * err) -{ - if (err == NULL || U_FAILURE(*err)) - { - return; - } - if (len == NULL || errBytes == NULL || converter == NULL) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (*len < converter->invalidCharLength) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if ((*len = converter->invalidCharLength) > 0) - { - uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); - } -} - -U_CAPI void U_EXPORT2 -ucnv_getInvalidUChars (const UConverter * converter, - UChar *errChars, - int8_t * len, - UErrorCode * err) -{ - if (err == NULL || U_FAILURE(*err)) - { - return; - } - if (len == NULL || errChars == NULL || converter == NULL) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (*len < converter->invalidUCharLength) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if ((*len = converter->invalidUCharLength) > 0) - { - u_memcpy (errChars, converter->invalidUCharBuffer, *len); - } -} - -#define SIG_MAX_LEN 5 - -U_CAPI const char* U_EXPORT2 -ucnv_detectUnicodeSignature( const char* source, - int32_t sourceLength, - int32_t* signatureLength, - UErrorCode* pErrorCode) { - int32_t dummy; - - /* initial 0xa5 bytes: make sure that if we read preFromUFirstCP >= 0){ - return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; - }else if(cnv->preFromULength < 0){ - return -cnv->preFromULength ; - }else if(cnv->fromUChar32 > 0){ - return 1; - } - return 0; - -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return -1; - } - if(cnv == NULL){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } - - if(cnv->preToULength > 0){ - return cnv->preToULength ; - }else if(cnv->preToULength < 0){ - return -cnv->preToULength; - }else if(cnv->toULength > 0){ - return cnv->toULength; - } - return 0; -} - -U_CAPI UBool U_EXPORT2 -ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ - if (U_FAILURE(*status)) { - return FALSE; - } - - if (cnv == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - - switch (ucnv_getType(cnv)) { - case UCNV_SBCS: - case UCNV_DBCS: - case UCNV_UTF32_BigEndian: - case UCNV_UTF32_LittleEndian: - case UCNV_UTF32: - case UCNV_US_ASCII: - return TRUE; - default: - return FALSE; - } -} -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/ucnv2022.cpp b/deps/node/deps/icu-small/source/common/ucnv2022.cpp deleted file mode 100644 index 4a35ff85..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv2022.cpp +++ /dev/null @@ -1,3978 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv2022.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000feb03 -* created by: Markus W. Scherer -* -* Change history: -* -* 06/29/2000 helena Major rewrite of the callback APIs. -* 08/08/2000 Ram Included support for ISO-2022-JP-2 -* Changed implementation of toUnicode -* function -* 08/21/2000 Ram Added support for ISO-2022-KR -* 08/29/2000 Ram Seperated implementation of EBCDIC to -* ucnvebdc.c -* 09/20/2000 Ram Added support for ISO-2022-CN -* Added implementations for getNextUChar() -* for specific 2022 country variants. -* 10/31/2000 Ram Implemented offsets logic functions -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "unicode/ucnv_err.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "ucnv_imp.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "ucnvmbcs.h" -#include "cstring.h" -#include "cmemory.h" -#include "uassert.h" - -#ifdef U_ENABLE_GENERIC_ISO_2022 -/* - * I am disabling the generic ISO-2022 converter after proposing to do so on - * the icu mailing list two days ago. - * - * Reasons: - * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of - * its designation sequences, single shifts with return to the previous state, - * switch-with-no-return to UTF-16BE or similar, etc. - * This is unlike the language-specific variants like ISO-2022-JP which - * require a much smaller repertoire of ISO-2022 features. - * These variants continue to be supported. - * 2. I believe that no one is really using the generic ISO-2022 converter - * but rather always one of the language-specific variants. - * Note that ICU's generic ISO-2022 converter has always output one escape - * sequence followed by UTF-8 for the whole stream. - * 3. Switching between subcharsets is extremely slow, because each time - * the previous converter is closed and a new one opened, - * without any kind of caching, least-recently-used list, etc. - * 4. The code is currently buggy, and given the above it does not seem - * reasonable to spend the time on maintenance. - * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings. - * This means, for example, that when ISO-8859-7 is designated, the following - * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. - * The ICU ISO-2022 converter does not handle this - and has no information - * about which subconverter would have to be shifted vs. which is designed - * for 7-bit ISO-2022. - * - * Markus Scherer 2003-dec-03 - */ -#endif - -#if !UCONFIG_ONLY_HTML_CONVERSION -static const char SHIFT_IN_STR[] = "\x0F"; -// static const char SHIFT_OUT_STR[] = "\x0E"; -#endif - -#define CR 0x0D -#define LF 0x0A -#define H_TAB 0x09 -#define V_TAB 0x0B -#define SPACE 0x20 - -enum { - HWKANA_START=0xff61, - HWKANA_END=0xff9f -}; - -/* - * 94-character sets with native byte values A1..FE are encoded in ISO 2022 - * as bytes 21..7E. (Subtract 0x80.) - * 96-character sets with native byte values A0..FF are encoded in ISO 2022 - * as bytes 20..7F. (Subtract 0x80.) - * Do not encode C1 control codes with native bytes 80..9F - * as bytes 00..1F (C0 control codes). - */ -enum { - GR94_START=0xa1, - GR94_END=0xfe, - GR96_START=0xa0, - GR96_END=0xff -}; - -/* - * ISO 2022 control codes must not be converted from Unicode - * because they would mess up the byte stream. - * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b - * corresponding to SO, SI, and ESC. - */ -#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0) - -/* for ISO-2022-JP and -CN implementations */ -typedef enum { - /* shared values */ - INVALID_STATE=-1, - ASCII = 0, - - SS2_STATE=0x10, - SS3_STATE, - - /* JP */ - ISO8859_1 = 1 , - ISO8859_7 = 2 , - JISX201 = 3, - JISX208 = 4, - JISX212 = 5, - GB2312 =6, - KSC5601 =7, - HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */ - - /* CN */ - /* the first few enum constants must keep their values because they correspond to myConverterArray[] */ - GB2312_1=1, - ISO_IR_165=2, - CNS_11643=3, - - /* - * these are used in StateEnum and ISO2022State variables, - * but CNS_11643 must be used to index into myConverterArray[] - */ - CNS_11643_0=0x20, - CNS_11643_1, - CNS_11643_2, - CNS_11643_3, - CNS_11643_4, - CNS_11643_5, - CNS_11643_6, - CNS_11643_7 -} StateEnum; - -/* is the StateEnum charset value for a DBCS charset? */ -#if UCONFIG_ONLY_HTML_CONVERSION -#define IS_JP_DBCS(cs) (JISX208==(cs)) -#else -#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) -#endif - -#define CSM(cs) ((uint16_t)1<<(cs)) - -/* - * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence - * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x - * - * Note: The converter uses some leniency: - * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in - * all versions, not just JIS7 and JIS8. - * - ICU does not distinguish between different versions of JIS X 0208. - */ -#if UCONFIG_ONLY_HTML_CONVERSION -enum { MAX_JA_VERSION=0 }; -#else -enum { MAX_JA_VERSION=4 }; -#endif -static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ - CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), -#if !UCONFIG_ONLY_HTML_CONVERSION - CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), - CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), - CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), - CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) -#endif -}; - -typedef enum { - ASCII1=0, - LATIN1, - SBCS, - DBCS, - MBCS, - HWKANA -}Cnv2022Type; - -typedef struct ISO2022State { - int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */ - int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */ - int8_t prevG; /* g before single shift (SS2 or SS3) */ -} ISO2022State; - -#define UCNV_OPTIONS_VERSION_MASK 0xf -#define UCNV_2022_MAX_CONVERTERS 10 - -typedef struct{ - UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; - UConverter *currentConverter; - Cnv2022Type currentType; - ISO2022State toU2022State, fromU2022State; - uint32_t key; - uint32_t version; -#ifdef U_ENABLE_GENERIC_ISO_2022 - UBool isFirstBuffer; -#endif - UBool isEmptySegment; - char name[30]; - char locale[3]; -}UConverterDataISO2022; - -/* Protos */ -/* ISO-2022 ----------------------------------------------------------------- */ - -/*Forward declaration */ -U_CFUNC void U_CALLCONV -ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args, - UErrorCode * err); -U_CFUNC void U_CALLCONV -ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args, - UErrorCode * err); - -#define ESC_2022 0x1B /*ESC*/ - -typedef enum -{ - INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/ - VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/ - VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/ - VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/ -} UCNV_TableStates_2022; - -/* -* The way these state transition arrays work is: -* ex : ESC$B is the sequence for JISX208 -* a) First Iteration: char is ESC -* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index -* int x = normalize_esq_chars_2022[27] which is equal to 1 -* ii) Search for this value in escSeqStateTable_Key_2022[] -* value of x is stored at escSeqStateTable_Key_2022[0] -* iii) Save this index as offset -* iv) Get state of this sequence from escSeqStateTable_Value_2022[] -* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 -* b) Switch on this state and continue to next char -* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index -* which is normalize_esq_chars_2022[36] == 4 -* ii) x is currently 1(from above) -* x<<=5 -- x is now 32 -* x+=normalize_esq_chars_2022[36] -* now x is 36 -* iii) Search for this value in escSeqStateTable_Key_2022[] -* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2 -* iv) Get state of this sequence from escSeqStateTable_Value_2022[] -* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 -* c) Switch on this state and continue to next char -* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index -* ii) x is currently 36 (from above) -* x<<=5 -- x is now 1152 -* x+=normalize_esq_chars_2022[66] -* now x is 1161 -* iii) Search for this value in escSeqStateTable_Key_2022[] -* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21 -* iv) Get state of this sequence from escSeqStateTable_Value_2022[21] -* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022 -* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208 -*/ - - -/*Below are the 3 arrays depicting a state transition table*/ -static const int8_t normalize_esq_chars_2022[256] = { -/* 0 1 2 3 4 5 6 7 8 9 */ - - 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0 - ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12 - ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28 - ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 - ,0 ,0 ,0 ,0 ,0 ,0 -}; - -#ifdef U_ENABLE_GENERIC_ISO_2022 -/* - * When the generic ISO-2022 converter is completely removed, not just disabled - * per #ifdef, then the following state table and the associated tables that are - * dimensioned with MAX_STATES_2022 should be trimmed. - * - * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of - * the associated escape sequences starting with ESC ( B should be removed. - * This includes the ones with key values 1097 and all of the ones above 1000000. - * - * For the latter, the tables can simply be truncated. - * For the former, since the tables must be kept parallel, it is probably best - * to simply duplicate an adjacent table cell, parallel in all tables. - * - * It may make sense to restructure the tables, especially by using small search - * tables for the variants instead of indexing them parallel to the table here. - */ -#endif - -#define MAX_STATES_2022 74 -static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = { -/* 0 1 2 3 4 5 6 7 8 9 */ - - 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096 - ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 - ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257 - ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940 - ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644 - ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138 - ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 - ,35947631 ,35947635 ,35947636 ,35947638 -}; - -#ifdef U_ENABLE_GENERIC_ISO_2022 - -static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = { - /* 0 1 2 3 4 5 6 7 8 9 */ - - NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" - ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1" - ,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8" - ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" - ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165" - ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" - ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" - ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1" -}; - -#endif - -static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = { -/* 0 1 2 3 4 5 6 7 8 9 */ - VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 - ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 -}; - -/* Type def for refactoring changeState_2022 code*/ -typedef enum{ -#ifdef U_ENABLE_GENERIC_ISO_2022 - ISO_2022=0, -#endif - ISO_2022_JP=1, -#if !UCONFIG_ONLY_HTML_CONVERSION - ISO_2022_KR=2, - ISO_2022_CN=3 -#endif -} Variant2022; - -/*********** ISO 2022 Converter Protos ***********/ -static void U_CALLCONV -_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); - -static void U_CALLCONV - _ISO2022Close(UConverter *converter); - -static void U_CALLCONV -_ISO2022Reset(UConverter *converter, UConverterResetChoice choice); - -U_CDECL_BEGIN -static const char * U_CALLCONV -_ISO2022getName(const UConverter* cnv); -U_CDECL_END - -static void U_CALLCONV -_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err); - -U_CDECL_BEGIN -static UConverter * U_CALLCONV -_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status); - -U_CDECL_END - -#ifdef U_ENABLE_GENERIC_ISO_2022 -static void U_CALLCONV -T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err); -#endif - -namespace { - -/*const UConverterSharedData _ISO2022Data;*/ -extern const UConverterSharedData _ISO2022JPData; - -#if !UCONFIG_ONLY_HTML_CONVERSION -extern const UConverterSharedData _ISO2022KRData; -extern const UConverterSharedData _ISO2022CNData; -#endif - -} // namespace - -/*************** Converter implementations ******************/ - -/* The purpose of this function is to get around gcc compiler warnings. */ -static inline void -fromUWriteUInt8(UConverter *cnv, - const char *bytes, int32_t length, - uint8_t **target, const char *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode) -{ - char *targetChars = (char *)*target; - ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit, - offsets, sourceIndex, pErrorCode); - *target = (uint8_t*)targetChars; - -} - -static inline void -setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){ - if(myConverterData->version == 1) { - UConverter *cnv = myConverterData->currentConverter; - - cnv->toUnicodeStatus=0; /* offset */ - cnv->mode=0; /* state */ - cnv->toULength=0; /* byteIndex */ - } -} - -static inline void -setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){ - /* in ISO-2022-KR the designator sequence appears only once - * in a file so we append it only once - */ - if( converter->charErrorBufferLength==0){ - - converter->charErrorBufferLength = 4; - converter->charErrorBuffer[0] = 0x1b; - converter->charErrorBuffer[1] = 0x24; - converter->charErrorBuffer[2] = 0x29; - converter->charErrorBuffer[3] = 0x43; - } - if(myConverterData->version == 1) { - UConverter *cnv = myConverterData->currentConverter; - - cnv->fromUChar32=0; - cnv->fromUnicodeStatus=1; /* prevLength */ - } -} - -static void U_CALLCONV -_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ - - char myLocale[6]={' ',' ',' ',' ',' ',' '}; - - cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022)); - if(cnv->extraInfo != NULL) { - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; - UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; - uint32_t version; - - stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; - - uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022)); - myConverterData->currentType = ASCII1; - cnv->fromUnicodeStatus =FALSE; - if(pArgs->locale){ - uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)); - } - version = pArgs->options & UCNV_OPTIONS_VERSION_MASK; - myConverterData->version = version; - if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && - (myLocale[2]=='_' || myLocale[2]=='\0')) - { - /* open the required converters and cache them */ - if(version>MAX_JA_VERSION) { - // ICU 55 fails to open a converter for an unsupported version. - // Previously, it fell back to version 0, but that would yield - // unexpected behavior. - *errorCode = U_MISSING_RESOURCE_ERROR; - return; - } - if(jpCharsetMasks[version]&CSM(ISO8859_7)) { - myConverterData->myConverterArray[ISO8859_7] = - ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); - } - myConverterData->myConverterArray[JISX208] = - ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); - if(jpCharsetMasks[version]&CSM(JISX212)) { - myConverterData->myConverterArray[JISX212] = - ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); - } - if(jpCharsetMasks[version]&CSM(GB2312)) { - myConverterData->myConverterArray[GB2312] = - ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ - } - if(jpCharsetMasks[version]&CSM(KSC5601)) { - myConverterData->myConverterArray[KSC5601] = - ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); - } - - /* set the function pointers to appropriate funtions */ - cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); - uprv_strcpy(myConverterData->locale,"ja"); - - (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version="); - size_t len = uprv_strlen(myConverterData->name); - myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); - myConverterData->name[len+1]='\0'; - } -#if !UCONFIG_ONLY_HTML_CONVERSION - else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && - (myLocale[2]=='_' || myLocale[2]=='\0')) - { - if(version>1) { - // ICU 55 fails to open a converter for an unsupported version. - // Previously, it fell back to version 0, but that would yield - // unexpected behavior. - *errorCode = U_MISSING_RESOURCE_ERROR; - return; - } - const char *cnvName; - if(version==1) { - cnvName="icu-internal-25546"; - } else { - cnvName="ibm-949"; - myConverterData->version=version=0; - } - if(pArgs->onlyTestIsLoadable) { - ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */ - uprv_free(cnv->extraInfo); - cnv->extraInfo=NULL; - return; - } else { - myConverterData->currentConverter=ucnv_open(cnvName, errorCode); - if (U_FAILURE(*errorCode)) { - _ISO2022Close(cnv); - return; - } - - if(version==1) { - (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1"); - uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4); - cnv->subCharLen = myConverterData->currentConverter->subCharLen; - }else{ - (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0"); - } - - /* initialize the state variables */ - setInitialStateToUnicodeKR(cnv, myConverterData); - setInitialStateFromUnicodeKR(cnv, myConverterData); - - /* set the function pointers to appropriate funtions */ - cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; - uprv_strcpy(myConverterData->locale,"ko"); - } - } - else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&& - (myLocale[2]=='_' || myLocale[2]=='\0')) - { - if(version>2) { - // ICU 55 fails to open a converter for an unsupported version. - // Previously, it fell back to version 0, but that would yield - // unexpected behavior. - *errorCode = U_MISSING_RESOURCE_ERROR; - return; - } - - /* open the required converters and cache them */ - myConverterData->myConverterArray[GB2312_1] = - ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); - if(version==1) { - myConverterData->myConverterArray[ISO_IR_165] = - ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode); - } - myConverterData->myConverterArray[CNS_11643] = - ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode); - - - /* set the function pointers to appropriate funtions */ - cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; - uprv_strcpy(myConverterData->locale,"cn"); - - if (version==0){ - myConverterData->version = 0; - (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0"); - }else if (version==1){ - myConverterData->version = 1; - (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1"); - }else { - myConverterData->version = 2; - (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); - } - } -#endif // !UCONFIG_ONLY_HTML_CONVERSION - else{ -#ifdef U_ENABLE_GENERIC_ISO_2022 - myConverterData->isFirstBuffer = TRUE; - - /* append the UTF-8 escape sequence */ - cnv->charErrorBufferLength = 3; - cnv->charErrorBuffer[0] = 0x1b; - cnv->charErrorBuffer[1] = 0x25; - cnv->charErrorBuffer[2] = 0x42; - - cnv->sharedData=(UConverterSharedData*)&_ISO2022Data; - /* initialize the state variables */ - uprv_strcpy(myConverterData->name,"ISO_2022"); -#else - *errorCode = U_MISSING_RESOURCE_ERROR; - // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard - // data loading error code. - return; -#endif - } - - cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar; - - if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { - _ISO2022Close(cnv); - } - } else { - *errorCode = U_MEMORY_ALLOCATION_ERROR; - } -} - - -static void U_CALLCONV -_ISO2022Close(UConverter *converter) { - UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo); - UConverterSharedData **array = myData->myConverterArray; - int32_t i; - - if (converter->extraInfo != NULL) { - /*close the array of converter pointers and free the memory*/ - for (i=0; icurrentConverter); - - if(!converter->isExtraLocal){ - uprv_free (converter->extraInfo); - converter->extraInfo = NULL; - } - } -} - -static void U_CALLCONV -_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) { - UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo); - if(choice<=UCNV_RESET_TO_UNICODE) { - uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); - myConverterData->key = 0; - myConverterData->isEmptySegment = FALSE; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); - } -#ifdef U_ENABLE_GENERIC_ISO_2022 - if(myConverterData->locale[0] == 0){ - if(choice<=UCNV_RESET_TO_UNICODE) { - myConverterData->isFirstBuffer = TRUE; - myConverterData->key = 0; - if (converter->mode == UCNV_SO){ - ucnv_close (myConverterData->currentConverter); - myConverterData->currentConverter=NULL; - } - converter->mode = UCNV_SI; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* re-append UTF-8 escape sequence */ - converter->charErrorBufferLength = 3; - converter->charErrorBuffer[0] = 0x1b; - converter->charErrorBuffer[1] = 0x28; - converter->charErrorBuffer[2] = 0x42; - } - } - else -#endif - { - /* reset the state variables */ - if(myConverterData->locale[0] == 'k'){ - if(choice<=UCNV_RESET_TO_UNICODE) { - setInitialStateToUnicodeKR(converter, myConverterData); - } - if(choice!=UCNV_RESET_TO_UNICODE) { - setInitialStateFromUnicodeKR(converter, myConverterData); - } - } - } -} - -U_CDECL_BEGIN - -static const char * U_CALLCONV -_ISO2022getName(const UConverter* cnv){ - if(cnv->extraInfo){ - UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo; - return myData->name; - } - return NULL; -} - -U_CDECL_END - - -/*************** to unicode *******************/ -/**************************************************************************** - * Recognized escape sequences are - * (B ASCII - * .A ISO-8859-1 - * .F ISO-8859-7 - * (J JISX-201 - * (I JISX-201 - * $B JISX-208 - * $@ JISX-208 - * $(D JISX-212 - * $A GB2312 - * $(C KSC5601 - */ -static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { -/* 0 1 2 3 4 5 6 7 8 9 */ - INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE -}; - -#if !UCONFIG_ONLY_HTML_CONVERSION -/*************** to unicode *******************/ -static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { -/* 0 1 2 3 4 5 6 7 8 9 */ - INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 - ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE - ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE -}; -#endif - - -static UCNV_TableStates_2022 -getKey_2022(char c,int32_t* key,int32_t* offset){ - int32_t togo; - int32_t low = 0; - int32_t hi = MAX_STATES_2022; - int32_t oldmid=0; - - togo = normalize_esq_chars_2022[(uint8_t)c]; - if(togo == 0) { - /* not a valid character anywhere in an escape sequence */ - *key = 0; - *offset = 0; - return INVALID_2022; - } - togo = (*key << 5) + togo; - - while (hi != low) /*binary search*/{ - - int32_t mid = (hi+low) >> 1; /*Finds median*/ - - if (mid == oldmid) - break; - - if (escSeqStateTable_Key_2022[mid] > togo){ - hi = mid; - } - else if (escSeqStateTable_Key_2022[mid] < togo){ - low = mid; - } - else /*we found it*/{ - *key = togo; - *offset = mid; - return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid]; - } - oldmid = mid; - - } - - *key = 0; - *offset = 0; - return INVALID_2022; -} - -/*runs through a state machine to determine the escape sequence - codepage correspondance - */ -static void -changeState_2022(UConverter* _this, - const char** source, - const char* sourceLimit, - Variant2022 var, - UErrorCode* err){ - UCNV_TableStates_2022 value; - UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); - uint32_t key = myData2022->key; - int32_t offset = 0; - int8_t initialToULength = _this->toULength; - char c; - - value = VALID_NON_TERMINAL_2022; - while (*source < sourceLimit) { - c = *(*source)++; - _this->toUBytes[_this->toULength++]=(uint8_t)c; - value = getKey_2022(c,(int32_t *) &key, &offset); - - switch (value){ - - case VALID_NON_TERMINAL_2022 : - /* continue with the loop */ - break; - - case VALID_TERMINAL_2022: - key = 0; - goto DONE; - - case INVALID_2022: - goto DONE; - - case VALID_MAYBE_TERMINAL_2022: -#ifdef U_ENABLE_GENERIC_ISO_2022 - /* ESC ( B is ambiguous only for ISO_2022 itself */ - if(var == ISO_2022) { - /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */ - _this->toULength = 0; - - /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */ - - /* continue with the loop */ - value = VALID_NON_TERMINAL_2022; - break; - } else -#endif - { - /* not ISO_2022 itself, finish here */ - value = VALID_TERMINAL_2022; - key = 0; - goto DONE; - } - } - } - -DONE: - myData2022->key = key; - - if (value == VALID_NON_TERMINAL_2022) { - /* indicate that the escape sequence is incomplete: key!=0 */ - return; - } else if (value == INVALID_2022 ) { - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - } else /* value == VALID_TERMINAL_2022 */ { - switch(var){ -#ifdef U_ENABLE_GENERIC_ISO_2022 - case ISO_2022: - { - const char *chosenConverterName = escSeqStateTable_Result_2022[offset]; - if(chosenConverterName == NULL) { - /* SS2 or SS3 */ - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - _this->toUCallbackReason = UCNV_UNASSIGNED; - return; - } - - _this->mode = UCNV_SI; - ucnv_close(myData2022->currentConverter); - myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err); - if(U_SUCCESS(*err)) { - myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; - _this->mode = UCNV_SO; - } - break; - } -#endif - case ISO_2022_JP: - { - StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset]; - switch(tempState) { - case INVALID_STATE: - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - break; - case SS2_STATE: - if(myData2022->toU2022State.cs[2]!=0) { - if(myData2022->toU2022State.g<2) { - myData2022->toU2022State.prevG=myData2022->toU2022State.g; - } - myData2022->toU2022State.g=2; - } else { - /* illegal to have SS2 before a matching designator */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - } - break; - /* case SS3_STATE: not used in ISO-2022-JP-x */ - case ISO8859_1: - case ISO8859_7: - if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - } else { - /* G2 charset for SS2 */ - myData2022->toU2022State.cs[2]=(int8_t)tempState; - } - break; - default: - if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - } else { - /* G0 charset */ - myData2022->toU2022State.cs[0]=(int8_t)tempState; - } - break; - } - } - break; -#if !UCONFIG_ONLY_HTML_CONVERSION - case ISO_2022_CN: - { - StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; - switch(tempState) { - case INVALID_STATE: - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - break; - case SS2_STATE: - if(myData2022->toU2022State.cs[2]!=0) { - if(myData2022->toU2022State.g<2) { - myData2022->toU2022State.prevG=myData2022->toU2022State.g; - } - myData2022->toU2022State.g=2; - } else { - /* illegal to have SS2 before a matching designator */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - } - break; - case SS3_STATE: - if(myData2022->toU2022State.cs[3]!=0) { - if(myData2022->toU2022State.g<2) { - myData2022->toU2022State.prevG=myData2022->toU2022State.g; - } - myData2022->toU2022State.g=3; - } else { - /* illegal to have SS3 before a matching designator */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - } - break; - case ISO_IR_165: - if(myData2022->version==0) { - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - break; - } - U_FALLTHROUGH; - case GB2312_1: - U_FALLTHROUGH; - case CNS_11643_1: - myData2022->toU2022State.cs[1]=(int8_t)tempState; - break; - case CNS_11643_2: - myData2022->toU2022State.cs[2]=(int8_t)tempState; - break; - default: - /* other CNS 11643 planes */ - if(myData2022->version==0) { - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - } else { - myData2022->toU2022State.cs[3]=(int8_t)tempState; - } - break; - } - } - break; - case ISO_2022_KR: - if(offset==0x30){ - /* nothing to be done, just accept this one escape sequence */ - } else { - *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; - } - break; -#endif // !UCONFIG_ONLY_HTML_CONVERSION - - default: - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - break; - } - } - if(U_SUCCESS(*err)) { - _this->toULength = 0; - } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { - if(_this->toULength>1) { - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte (ESC) in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - * In escape sequences, all following bytes are "printable", that is, - * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS), - * they are valid single/lead bytes. - * For simplicity, we always only report the initial ESC byte as the - * illegal sequence and back out all other bytes we looked at. - */ - /* Back out some bytes. */ - int8_t backOutDistance=_this->toULength-1; - int8_t bytesFromThisBuffer=_this->toULength-initialToULength; - if(backOutDistance<=bytesFromThisBuffer) { - /* same as initialToULength<=1 */ - *source-=backOutDistance; - } else { - /* Back out bytes from the previous buffer: Need to replay them. */ - _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); - /* same as -(initialToULength-1) */ - /* preToULength is negative! */ - uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength); - *source-=bytesFromThisBuffer; - } - _this->toULength=1; - } - } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { - _this->toUCallbackReason = UCNV_UNASSIGNED; - } -} - -#if !UCONFIG_ONLY_HTML_CONVERSION -/*Checks the characters of the buffer against valid 2022 escape sequences -*if the match we return a pointer to the initial start of the sequence otherwise -*we return sourceLimit -*/ -/*for 2022 looks ahead in the stream - *to determine the longest possible convertible - *data stream - */ -static inline const char* -getEndOfBuffer_2022(const char** source, - const char* sourceLimit, - UBool /*flush*/){ - - const char* mySource = *source; - -#ifdef U_ENABLE_GENERIC_ISO_2022 - if (*source >= sourceLimit) - return sourceLimit; - - do{ - - if (*mySource == ESC_2022){ - int8_t i; - int32_t key = 0; - int32_t offset; - UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022; - - /* Kludge: I could not - * figure out the reason for validating an escape sequence - * twice - once here and once in changeState_2022(). - * is it possible to have an ESC character in a ISO2022 - * byte stream which is valid in a code page? Is it legal? - */ - for (i=0; - (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022); - i++) { - value = getKey_2022(*(mySource+i), &key, &offset); - } - if (value > 0 || *mySource==ESC_2022) - return mySource; - - if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) - return sourceLimit; - } - }while (++mySource < sourceLimit); - - return sourceLimit; -#else - while(mySource < sourceLimit && *mySource != ESC_2022) { - ++mySource; - } - return mySource; -#endif -} -#endif - -/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c - * any future change in _MBCSFromUChar32() function should be reflected here. - * @return number of bytes in *value; negative number if fallback; 0 if no mapping - */ -static inline int32_t -MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, - UChar32 c, - uint32_t* value, - UBool useFallback, - int outputType) -{ - const int32_t *cx; - const uint16_t *table; - uint32_t stage2Entry; - uint32_t myValue; - int32_t length; - const uint8_t *p; - /* - * TODO(markus): Use and require new, faster MBCS conversion table structures. - * Use internal version of ucnv_open() that verifies that the new structures are available, - * else U_INTERNAL_PROGRAM_ERROR. - */ - /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { - table=sharedData->mbcs.fromUnicodeTable; - stage2Entry=MBCS_STAGE_2_FROM_U(table, c); - /* get the bytes and the length for the output */ - if(outputType==MBCS_OUTPUT_2){ - myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - if(myValue<=0xff) { - length=1; - } else { - length=2; - } - } else /* outputType==MBCS_OUTPUT_3 */ { - p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; - if(myValue<=0xff) { - length=1; - } else if(myValue<=0xffff) { - length=2; - } else { - length=3; - } - } - /* is this code point assigned, or do we use fallbacks? */ - if((stage2Entry&(1<<(16+(c&0xf))))!=0) { - /* assigned */ - *value=myValue; - return length; - } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) { - /* - * We allow a 0 byte output if the "assigned" bit is set for this entry. - * There is no way with this data structure for fallback output - * to be a zero byte. - */ - *value=myValue; - return -length; - } - } - - cx=sharedData->mbcs.extIndexes; - if(cx!=NULL) { - return ucnv_extSimpleMatchFromU(cx, c, value, useFallback); - } - - /* unassigned */ - return 0; -} - -/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c - * any future change in _MBCSSingleFromUChar32() function should be reflected here. - * @param retval pointer to output byte - * @return 1 roundtrip byte 0 no mapping -1 fallback byte - */ -static inline int32_t -MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData, - UChar32 c, - uint32_t* retval, - UBool useFallback) -{ - const uint16_t *table; - int32_t value; - /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { - return 0; - } - /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ - table=sharedData->mbcs.fromUnicodeTable; - /* get the byte for the output */ - value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); - /* is this code point assigned, or do we use fallbacks? */ - *retval=(uint32_t)(value&0xff); - if(value>=0xf00) { - return 1; /* roundtrip */ - } else if(useFallback ? value>=0x800 : value>=0xc00) { - return -1; /* fallback taken */ - } else { - return 0; /* no mapping */ - } -} - -/* - * Check that the result is a 2-byte value with each byte in the range A1..FE - * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte - * to move it to the ISO 2022 range 21..7E. - * Return 0 if out of range. - */ -static inline uint32_t -_2022FromGR94DBCS(uint32_t value) { - if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && - (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) - ) { - return value - 0x8080; /* shift down to 21..7e byte range */ - } else { - return 0; /* not valid for ISO 2022 */ - } -} - -#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */ -/* - * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the - * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point - * unchanged. - */ -static inline uint32_t -_2022ToGR94DBCS(uint32_t value) { - uint32_t returnValue = value + 0x8080; - if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) && - (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) { - return returnValue; - } else { - return value; - } -} -#endif - -#ifdef U_ENABLE_GENERIC_ISO_2022 - -/********************************************************************************** -* ISO-2022 Converter -* -* -*/ - -static void U_CALLCONV -T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, - UErrorCode* err){ - const char* mySourceLimit, *realSourceLimit; - const char* sourceStart; - const UChar* myTargetStart; - UConverter* saveThis; - UConverterDataISO2022* myData; - int8_t length; - - saveThis = args->converter; - myData=((UConverterDataISO2022*)(saveThis->extraInfo)); - - realSourceLimit = args->sourceLimit; - while (args->source < realSourceLimit) { - if(myData->key == 0) { /* are we in the middle of an escape sequence? */ - /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ - mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush); - - if(args->source < mySourceLimit) { - if(myData->currentConverter==NULL) { - myData->currentConverter = ucnv_open("ASCII",err); - if(U_FAILURE(*err)){ - return; - } - - myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; - saveThis->mode = UCNV_SO; - } - - /* convert to before the ESC or until the end of the buffer */ - myData->isFirstBuffer=FALSE; - sourceStart = args->source; - myTargetStart = args->target; - args->converter = myData->currentConverter; - ucnv_toUnicode(args->converter, - &args->target, - args->targetLimit, - &args->source, - mySourceLimit, - args->offsets, - (UBool)(args->flush && mySourceLimit == realSourceLimit), - err); - args->converter = saveThis; - - if (*err == U_BUFFER_OVERFLOW_ERROR) { - /* move the overflow buffer */ - length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength; - myData->currentConverter->UCharErrorBufferLength = 0; - if(length > 0) { - uprv_memcpy(saveThis->UCharErrorBuffer, - myData->currentConverter->UCharErrorBuffer, - length*U_SIZEOF_UCHAR); - } - return; - } - - /* - * At least one of: - * -Error while converting - * -Done with entire buffer - * -Need to write offsets or update the current offset - * (leave that up to the code in ucnv.c) - * - * or else we just stopped at an ESC byte and continue with changeState_2022() - */ - if (U_FAILURE(*err) || - (args->source == realSourceLimit) || - (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) || - (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0)) - ) { - /* copy partial or error input for truncated detection and error handling */ - if(U_FAILURE(*err)) { - length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength; - if(length > 0) { - uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length); - } - } else { - length = saveThis->toULength = myData->currentConverter->toULength; - if(length > 0) { - uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length); - if(args->source < mySourceLimit) { - *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */ - } - } - } - return; - } - } - } - - sourceStart = args->source; - changeState_2022(args->converter, - &(args->source), - realSourceLimit, - ISO_2022, - err); - if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) { - /* let the ucnv.c code update its current offset */ - return; - } - } -} - -#endif - -/* - * To Unicode Callback helper function - */ -static void -toUnicodeCallback(UConverter *cnv, - const uint32_t sourceChar, const uint32_t targetUniChar, - UErrorCode* err){ - if(sourceChar>0xff){ - cnv->toUBytes[0] = (uint8_t)(sourceChar>>8); - cnv->toUBytes[1] = (uint8_t)sourceChar; - cnv->toULength = 2; - } - else{ - cnv->toUBytes[0] =(char) sourceChar; - cnv->toULength = 1; - } - - if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){ - *err = U_INVALID_CHAR_FOUND; - } - else{ - *err = U_ILLEGAL_CHAR_FOUND; - } -} - -/**************************************ISO-2022-JP*************************************************/ - -/************************************** IMPORTANT ************************************************** -* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and -* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32(). -* The converter iterates over each Unicode codepoint -* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is -* processed one char at a time it would make sense to reduce the extra processing a canned converter -* would do as far as possible. -* -* If the implementation of these macros or structure of sharedData struct change in the future, make -* sure that ISO-2022 is also changed. -*************************************************************************************************** -*/ - -/*************************************************************************************************** -* Rules for ISO-2022-jp encoding -* (i) Escape sequences must be fully contained within a line they should not -* span new lines or CRs -* (ii) If the last character on a line is represented by two bytes then an ASCII or -* JIS-Roman character escape sequence should follow before the line terminates -* (iii) If the first character on the line is represented by two bytes then a two -* byte character escape sequence should precede it -* (iv) If no escape sequence is encountered then the characters are ASCII -* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2, -* and invoked with SS2 (ESC N). -* (vi) If there is any G0 designation in text, there must be a switch to -* ASCII or to JIS X 0201-Roman before a space character (but not -* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control -* characters such as tab or CRLF. -* (vi) Supported encodings: -* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7 -* -* source : RFC-1554 -* -* JISX201, JISX208,JISX212 : new .cnv data files created -* KSC5601 : alias to ibm-949 mapping table -* GB2312 : alias to ibm-1386 mapping table -* ISO-8859-1 : Algorithmic implemented as LATIN1 case -* ISO-8859-7 : alisas to ibm-9409 mapping table -*/ - -/* preference order of JP charsets */ -static const StateEnum jpCharsetPref[]={ - ASCII, - JISX201, - ISO8859_1, - JISX208, - ISO8859_7, - JISX212, - GB2312, - KSC5601, - HWKANA_7BIT -}; - -/* - * The escape sequences must be in order of the enum constants like JISX201 = 3, - * not in order of jpCharsetPref[]! - */ -static const char escSeqChars[][6] ={ - "\x1B\x28\x42", /* (B ASCII */ - "\x1B\x2E\x41", /* .A ISO-8859-1 */ - "\x1B\x2E\x46", /* .F ISO-8859-7 */ - "\x1B\x28\x4A", /* (J JISX-201 */ - "\x1B\x24\x42", /* $B JISX-208 */ - "\x1B\x24\x28\x44", /* $(D JISX-212 */ - "\x1B\x24\x41", /* $A GB2312 */ - "\x1B\x24\x28\x43", /* $(C KSC5601 */ - "\x1B\x28\x49" /* (I HWKANA_7BIT */ - -}; -static const int8_t escSeqCharsLen[] ={ - 3, /* length of (B ASCII */ - 3, /* length of .A ISO-8859-1 */ - 3, /* length of .F ISO-8859-7 */ - 3, /* length of (J JISX-201 */ - 3, /* length of $B JISX-208 */ - 4, /* length of $(D JISX-212 */ - 3, /* length of $A GB2312 */ - 4, /* length of $(C KSC5601 */ - 3 /* length of (I HWKANA_7BIT */ -}; - -/* -* The iteration over various code pages works this way: -* i) Get the currentState from myConverterData->currentState -* ii) Check if the character is mapped to a valid character in the currentState -* Yes -> a) set the initIterState to currentState -* b) remain in this state until an invalid character is found -* No -> a) go to the next code page and find the character -* iii) Before changing the state increment the current state check if the current state -* is equal to the intitIteration state -* Yes -> A character that cannot be represented in any of the supported encodings -* break and return a U_INVALID_CHARACTER error -* No -> Continue and find the character in next code page -* -* -* TODO: Implement a priority technique where the users are allowed to set the priority of code pages -*/ - -/* Map 00..7F to Unicode according to JIS X 0201. */ -static inline uint32_t -jisx201ToU(uint32_t value) { - if(value < 0x5c) { - return value; - } else if(value == 0x5c) { - return 0xa5; - } else if(value == 0x7e) { - return 0x203e; - } else /* value <= 0x7f */ { - return value; - } -} - -/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */ -static inline uint32_t -jisx201FromU(uint32_t value) { - if(value<=0x7f) { - if(value!=0x5c && value!=0x7e) { - return value; - } - } else if(value==0xa5) { - return 0x5c; - } else if(value==0x203e) { - return 0x7e; - } - return 0xfffe; -} - -/* - * Take a valid Shift-JIS byte pair, check that it is in the range corresponding - * to JIS X 0208, and convert it to a pair of 21..7E bytes. - * Return 0 if the byte pair is out of range. - */ -static inline uint32_t -_2022FromSJIS(uint32_t value) { - uint8_t trail; - - if(value > 0xEFFC) { - return 0; /* beyond JIS X 0208 */ - } - - trail = (uint8_t)value; - - value &= 0xff00; /* lead byte */ - if(value <= 0x9f00) { - value -= 0x7000; - } else /* 0xe000 <= value <= 0xef00 */ { - value -= 0xb000; - } - value <<= 1; - - if(trail <= 0x9e) { - value -= 0x100; - if(trail <= 0x7e) { - value |= trail - 0x1f; - } else { - value |= trail - 0x20; - } - } else /* trail <= 0xfc */ { - value |= trail - 0x7e; - } - return value; -} - -/* - * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS. - * If either byte is outside 21..7E make sure that the result is not valid - * for Shift-JIS so that the converter catches it. - * Some invalid byte values already turn into equally invalid Shift-JIS - * byte values and need not be tested explicitly. - */ -static inline void -_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) { - if(c1&1) { - ++c1; - if(c2 <= 0x5f) { - c2 += 0x1f; - } else if(c2 <= 0x7e) { - c2 += 0x20; - } else { - c2 = 0; /* invalid */ - } - } else { - if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) { - c2 += 0x7e; - } else { - c2 = 0; /* invalid */ - } - } - c1 >>= 1; - if(c1 <= 0x2f) { - c1 += 0x70; - } else if(c1 <= 0x3f) { - c1 += 0xb0; - } else { - c1 = 0; /* invalid */ - } - bytes[0] = (char)c1; - bytes[1] = (char)c2; -} - -/* - * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS) - * Katakana. - * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks - * because Shift-JIS roundtrips half-width Katakana to single bytes. - * These were the only fallbacks in ICU's jisx-208.ucm file. - */ -static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = { - 0x2123, /* U+FF61 */ - 0x2156, - 0x2157, - 0x2122, - 0x2126, - 0x2572, - 0x2521, - 0x2523, - 0x2525, - 0x2527, - 0x2529, - 0x2563, - 0x2565, - 0x2567, - 0x2543, - 0x213C, /* U+FF70 */ - 0x2522, - 0x2524, - 0x2526, - 0x2528, - 0x252A, - 0x252B, - 0x252D, - 0x252F, - 0x2531, - 0x2533, - 0x2535, - 0x2537, - 0x2539, - 0x253B, - 0x253D, - 0x253F, /* U+FF80 */ - 0x2541, - 0x2544, - 0x2546, - 0x2548, - 0x254A, - 0x254B, - 0x254C, - 0x254D, - 0x254E, - 0x254F, - 0x2552, - 0x2555, - 0x2558, - 0x255B, - 0x255E, - 0x255F, /* U+FF90 */ - 0x2560, - 0x2561, - 0x2562, - 0x2564, - 0x2566, - 0x2568, - 0x2569, - 0x256A, - 0x256B, - 0x256C, - 0x256D, - 0x256F, - 0x2573, - 0x212B, - 0x212C /* U+FF9F */ -}; - -static void U_CALLCONV -UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) { - UConverter *cnv = args->converter; - UConverterDataISO2022 *converterData; - ISO2022State *pFromU2022State; - uint8_t *target = (uint8_t *) args->target; - const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; - const UChar* source = args->source; - const UChar* sourceLimit = args->sourceLimit; - int32_t* offsets = args->offsets; - UChar32 sourceChar; - char buffer[8]; - int32_t len, outLen; - int8_t choices[10]; - int32_t choiceCount; - uint32_t targetValue = 0; - UBool useFallback; - - int32_t i; - int8_t cs, g; - - /* set up the state */ - converterData = (UConverterDataISO2022*)cnv->extraInfo; - pFromU2022State = &converterData->fromU2022State; - - choiceCount = 0; - - /* check if the last codepoint of previous buffer was a lead surrogate*/ - if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { - goto getTrail; - } - - while(source < sourceLimit) { - if(target < targetLimit) { - - sourceChar = *(source++); - /*check if the char is a First surrogate*/ - if(U16_IS_SURROGATE(sourceChar)) { - if(U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if(source < sourceLimit) { - /* test the following code unit */ - UChar trail=(UChar) *source; - if(U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - cnv->fromUChar32=0x00; - /* convert this supplementary code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* no more input */ - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } - - /* do not convert SO/SI/ESC */ - if(IS_2022_CONTROL(sourceChar)) { - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - - /* do the conversion */ - - if(choiceCount == 0) { - uint16_t csm; - - /* - * The csm variable keeps track of which charsets are allowed - * and not used yet while building the choices[]. - */ - csm = jpCharsetMasks[converterData->version]; - choiceCount = 0; - - /* JIS7/8: try single-byte half-width Katakana before JISX208 */ - if(converterData->version == 3 || converterData->version == 4) { - choices[choiceCount++] = (int8_t)HWKANA_7BIT; - } - /* Do not try single-byte half-width Katakana for other versions. */ - csm &= ~CSM(HWKANA_7BIT); - - /* try the current G0 charset */ - choices[choiceCount++] = cs = pFromU2022State->cs[0]; - csm &= ~CSM(cs); - - /* try the current G2 charset */ - if((cs = pFromU2022State->cs[2]) != 0) { - choices[choiceCount++] = cs; - csm &= ~CSM(cs); - } - - /* try all the other possible charsets */ - for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) { - cs = (int8_t)jpCharsetPref[i]; - if(CSM(cs) & csm) { - choices[choiceCount++] = cs; - csm &= ~CSM(cs); - } - } - } - - cs = g = 0; - /* - * len==0: no mapping found yet - * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks - * len>0: found a roundtrip result, done - */ - len = 0; - /* - * We will turn off useFallback after finding a fallback, - * but we still get fallbacks from PUA code points as usual. - * Therefore, we will also need to check that we don't overwrite - * an early fallback with a later one. - */ - useFallback = cnv->useFallback; - - for(i = 0; i < choiceCount && len <= 0; ++i) { - uint32_t value; - int32_t len2; - int8_t cs0 = choices[i]; - switch(cs0) { - case ASCII: - if(sourceChar <= 0x7f) { - targetValue = (uint32_t)sourceChar; - len = 1; - cs = cs0; - g = 0; - } - break; - case ISO8859_1: - if(GR96_START <= sourceChar && sourceChar <= GR96_END) { - targetValue = (uint32_t)sourceChar - 0x80; - len = 1; - cs = cs0; - g = 2; - } - break; - case HWKANA_7BIT: - if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { - if(converterData->version==3) { - /* JIS7: use G1 (SO) */ - /* Shift U+FF61..U+FF9F to bytes 21..5F. */ - targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); - len = 1; - pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ - g = 1; - } else if(converterData->version==4) { - /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */ - /* Shift U+FF61..U+FF9F to bytes A1..DF. */ - targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1)); - len = 1; - - cs = pFromU2022State->cs[0]; - if(IS_JP_DBCS(cs)) { - /* switch from a DBCS charset to JISX201 */ - cs = (int8_t)JISX201; - } - /* else stay in the current G0 charset */ - g = 0; - } - /* else do not use HWKANA_7BIT with other versions */ - } - break; - case JISX201: - /* G0 SBCS */ - value = jisx201FromU(sourceChar); - if(value <= 0x7f) { - targetValue = value; - len = 1; - cs = cs0; - g = 0; - useFallback = FALSE; - } - break; - case JISX208: - /* G0 DBCS from Shift-JIS table */ - len2 = MBCS_FROM_UCHAR32_ISO2022( - converterData->myConverterArray[cs0], - sourceChar, &value, - useFallback, MBCS_OUTPUT_2); - if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ - value = _2022FromSJIS(value); - if(value != 0) { - targetValue = value; - len = len2; - cs = cs0; - g = 0; - useFallback = FALSE; - } - } else if(len == 0 && useFallback && - (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { - targetValue = hwkana_fb[sourceChar - HWKANA_START]; - len = -2; - cs = cs0; - g = 0; - useFallback = FALSE; - } - break; - case ISO8859_7: - /* G0 SBCS forced to 7-bit output */ - len2 = MBCS_SINGLE_FROM_UCHAR32( - converterData->myConverterArray[cs0], - sourceChar, &value, - useFallback); - if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) { - targetValue = value - 0x80; - len = len2; - cs = cs0; - g = 2; - useFallback = FALSE; - } - break; - default: - /* G0 DBCS */ - len2 = MBCS_FROM_UCHAR32_ISO2022( - converterData->myConverterArray[cs0], - sourceChar, &value, - useFallback, MBCS_OUTPUT_2); - if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ - if(cs0 == KSC5601) { - /* - * Check for valid bytes for the encoding scheme. - * This is necessary because the sub-converter (windows-949) - * has a broader encoding scheme than is valid for 2022. - */ - value = _2022FromGR94DBCS(value); - if(value == 0) { - break; - } - } - targetValue = value; - len = len2; - cs = cs0; - g = 0; - useFallback = FALSE; - } - break; - } - } - - if(len != 0) { - if(len < 0) { - len = -len; /* fallback */ - } - outLen = 0; /* count output bytes */ - - /* write SI if necessary (only for JIS7) */ - if(pFromU2022State->g == 1 && g == 0) { - buffer[outLen++] = UCNV_SI; - pFromU2022State->g = 0; - } - - /* write the designation sequence if necessary */ - if(cs != pFromU2022State->cs[g]) { - int32_t escLen = escSeqCharsLen[cs]; - uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen); - outLen += escLen; - pFromU2022State->cs[g] = cs; - - /* invalidate the choices[] */ - choiceCount = 0; - } - - /* write the shift sequence if necessary */ - if(g != pFromU2022State->g) { - switch(g) { - /* case 0 handled before writing escapes */ - case 1: - buffer[outLen++] = UCNV_SO; - pFromU2022State->g = 1; - break; - default: /* case 2 */ - buffer[outLen++] = 0x1b; - buffer[outLen++] = 0x4e; - break; - /* no case 3: no SS3 in ISO-2022-JP-x */ - } - } - - /* write the output bytes */ - if(len == 1) { - buffer[outLen++] = (char)targetValue; - } else /* len == 2 */ { - buffer[outLen++] = (char)(targetValue >> 8); - buffer[outLen++] = (char)targetValue; - } - } else { - /* - * if we cannot find the character after checking all codepages - * then this is an error - */ - *err = U_INVALID_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - - if(sourceChar == CR || sourceChar == LF) { - /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */ - pFromU2022State->cs[2] = 0; - choiceCount = 0; - } - - /* output outLen>0 bytes in buffer[] */ - if(outLen == 1) { - *target++ = buffer[0]; - if(offsets) { - *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ - } - } else if(outLen == 2 && (target + 2) <= targetLimit) { - *target++ = buffer[0]; - *target++ = buffer[1]; - if(offsets) { - int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); - *offsets++ = sourceIndex; - *offsets++ = sourceIndex; - } - } else { - fromUWriteUInt8( - cnv, - buffer, outLen, - &target, (const char *)targetLimit, - &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), - err); - if(U_FAILURE(*err)) { - break; - } - } - } /* end if(myTargetIndexg!=0 || pFromU2022State->cs[0]!=ASCII) && - args->flush && source>=sourceLimit && cnv->fromUChar32==0 - ) { - int32_t sourceIndex; - - outLen = 0; - - if(pFromU2022State->g != 0) { - buffer[outLen++] = UCNV_SI; - pFromU2022State->g = 0; - } - - if(pFromU2022State->cs[0] != ASCII) { - int32_t escLen = escSeqCharsLen[ASCII]; - uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen); - outLen += escLen; - pFromU2022State->cs[0] = (int8_t)ASCII; - } - - /* get the source index of the last input character */ - /* - * TODO this would be simpler and more reliable if we used a pair - * of sourceIndex/prevSourceIndex like in ucnvmbcs.c - * so that we could simply use the prevSourceIndex here; - * this code gives an incorrect result for the rare case of an unmatched - * trail surrogate that is alone in the last buffer of the text stream - */ - sourceIndex=(int32_t)(source-args->source); - if(sourceIndex>0) { - --sourceIndex; - if( U16_IS_TRAIL(args->source[sourceIndex]) && - (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) - ) { - --sourceIndex; - } - } else { - sourceIndex=-1; - } - - fromUWriteUInt8( - cnv, - buffer, outLen, - &target, (const char *)targetLimit, - &offsets, sourceIndex, - err); - } - - /*save the state and return */ - args->source = source; - args->target = (char*)target; -} - -/*************** to unicode *******************/ - -static void U_CALLCONV -UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, - UErrorCode* err){ - char tempBuf[2]; - const char *mySource = (char *) args->source; - UChar *myTarget = args->target; - const char *mySourceLimit = args->sourceLimit; - uint32_t targetUniChar = 0x0000; - uint32_t mySourceChar = 0x0000; - uint32_t tmpSourceChar = 0x0000; - UConverterDataISO2022* myData; - ISO2022State *pToU2022State; - StateEnum cs; - - myData=(UConverterDataISO2022*)(args->converter->extraInfo); - pToU2022State = &myData->toU2022State; - - if(myData->key != 0) { - /* continue with a partial escape sequence */ - goto escape; - } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { - /* continue with a partial double-byte character */ - mySourceChar = args->converter->toUBytes[0]; - args->converter->toULength = 0; - cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; - targetUniChar = missingCharMarker; - goto getTrailByte; - } - - while(mySource < mySourceLimit){ - - targetUniChar =missingCharMarker; - - if(myTarget < args->targetLimit){ - - mySourceChar= (unsigned char) *mySource++; - - switch(mySourceChar) { - case UCNV_SI: - if(myData->version==3) { - pToU2022State->g=0; - continue; - } else { - /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ - myData->isEmptySegment = FALSE; /* reset this, we have a different error */ - break; - } - - case UCNV_SO: - if(myData->version==3) { - /* JIS7: switch to G1 half-width Katakana */ - pToU2022State->cs[1] = (int8_t)HWKANA_7BIT; - pToU2022State->g=1; - continue; - } else { - /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ - myData->isEmptySegment = FALSE; /* reset this, we have a different error */ - break; - } - - case ESC_2022: - mySource--; -escape: - { - const char * mySourceBefore = mySource; - int8_t toULengthBefore = args->converter->toULength; - - changeState_2022(args->converter,&(mySource), - mySourceLimit, ISO_2022_JP,err); - - /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */ - if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUCallbackReason = UCNV_IRREGULAR; - args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); - } - } - - /* invalid or illegal escape sequence */ - if(U_FAILURE(*err)){ - args->target = myTarget; - args->source = mySource; - myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ - return; - } - /* If we successfully completed an escape sequence, we begin a new segment, empty so far */ - if(myData->key==0) { - myData->isEmptySegment = TRUE; - } - continue; - - /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ - - case CR: - case LF: - /* automatically reset to single-byte mode */ - if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) { - pToU2022State->cs[0] = (int8_t)ASCII; - } - pToU2022State->cs[2] = 0; - pToU2022State->g = 0; - U_FALLTHROUGH; - default: - /* convert one or two bytes */ - myData->isEmptySegment = FALSE; - cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; - if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 && - !IS_JP_DBCS(cs) - ) { - /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */ - targetUniChar = mySourceChar + (HWKANA_START - 0xa1); - - /* return from a single-shift state to the previous one */ - if(pToU2022State->g >= 2) { - pToU2022State->g=pToU2022State->prevG; - } - } else switch(cs) { - case ASCII: - if(mySourceChar <= 0x7f) { - targetUniChar = mySourceChar; - } - break; - case ISO8859_1: - if(mySourceChar <= 0x7f) { - targetUniChar = mySourceChar + 0x80; - } - /* return from a single-shift state to the previous one */ - pToU2022State->g=pToU2022State->prevG; - break; - case ISO8859_7: - if(mySourceChar <= 0x7f) { - /* convert mySourceChar+0x80 to use a normal 8-bit table */ - targetUniChar = - _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( - myData->myConverterArray[cs], - mySourceChar + 0x80); - } - /* return from a single-shift state to the previous one */ - pToU2022State->g=pToU2022State->prevG; - break; - case JISX201: - if(mySourceChar <= 0x7f) { - targetUniChar = jisx201ToU(mySourceChar); - } - break; - case HWKANA_7BIT: - if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { - /* 7-bit halfwidth Katakana */ - targetUniChar = mySourceChar + (HWKANA_START - 0x21); - } - break; - default: - /* G0 DBCS */ - if(mySource < mySourceLimit) { - int leadIsOk, trailIsOk; - uint8_t trailByte; -getTrailByte: - trailByte = (uint8_t)*mySource; - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - * - * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is - * an ESC/SO/SI, we report only the first byte as the illegal sequence. - * Otherwise we convert or report the pair of bytes. - */ - leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); - trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); - if (leadIsOk && trailIsOk) { - ++mySource; - tmpSourceChar = (mySourceChar << 8) | trailByte; - if(cs == JISX208) { - _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf); - mySourceChar = tmpSourceChar; - } else { - /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ - mySourceChar = tmpSourceChar; - if (cs == KSC5601) { - tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ - } - tempBuf[0] = (char)(tmpSourceChar >> 8); - tempBuf[1] = (char)(tmpSourceChar); - } - targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); - } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { - /* report a pair of illegal bytes if the second byte is not a DBCS starter */ - ++mySource; - /* add another bit so that the code below writes 2 bytes in case of error */ - mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; - } - } else { - args->converter->toUBytes[0] = (uint8_t)mySourceChar; - args->converter->toULength = 1; - goto endloop; - } - } /* End of inner switch */ - break; - } /* End of outer switch */ - if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ - if(args->offsets){ - args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); - } - *(myTarget++)=(UChar)targetUniChar; - } - else if(targetUniChar > missingCharMarker){ - /* disassemble the surrogate pair and write to output*/ - targetUniChar-=0x0010000; - *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); - if(args->offsets){ - args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); - } - ++myTarget; - if(myTarget< args->targetLimit){ - *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); - if(args->offsets){ - args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); - } - ++myTarget; - }else{ - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= - (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); - } - - } - else{ - /* Call the callback function*/ - toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); - break; - } - } - else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */ - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } -endloop: - args->target = myTarget; - args->source = mySource; -} - - -#if !UCONFIG_ONLY_HTML_CONVERSION -/*************************************************************** -* Rules for ISO-2022-KR encoding -* i) The KSC5601 designator sequence should appear only once in a file, -* at the begining of a line before any KSC5601 characters. This usually -* means that it appears by itself on the first line of the file -* ii) There are only 2 shifting sequences SO to shift into double byte mode -* and SI to shift into single byte mode -*/ -static void U_CALLCONV -UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ - - UConverter* saveConv = args->converter; - UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo; - args->converter=myConverterData->currentConverter; - - myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32; - ucnv_MBCSFromUnicodeWithOffsets(args,err); - saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32; - - if(*err == U_BUFFER_OVERFLOW_ERROR) { - if(myConverterData->currentConverter->charErrorBufferLength > 0) { - uprv_memcpy( - saveConv->charErrorBuffer, - myConverterData->currentConverter->charErrorBuffer, - myConverterData->currentConverter->charErrorBufferLength); - } - saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; - myConverterData->currentConverter->charErrorBufferLength = 0; - } - args->converter=saveConv; -} - -static void U_CALLCONV -UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ - - const UChar *source = args->source; - const UChar *sourceLimit = args->sourceLimit; - unsigned char *target = (unsigned char *) args->target; - unsigned char *targetLimit = (unsigned char *) args->targetLimit; - int32_t* offsets = args->offsets; - uint32_t targetByteUnit = 0x0000; - UChar32 sourceChar = 0x0000; - UBool isTargetByteDBCS; - UBool oldIsTargetByteDBCS; - UConverterDataISO2022 *converterData; - UConverterSharedData* sharedData; - UBool useFallback; - int32_t length =0; - - converterData=(UConverterDataISO2022*)args->converter->extraInfo; - /* if the version is 1 then the user is requesting - * conversion with ibm-25546 pass the arguments to - * MBCS converter and return - */ - if(converterData->version==1){ - UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); - return; - } - - /* initialize data */ - sharedData = converterData->currentConverter->sharedData; - useFallback = args->converter->useFallback; - isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus; - oldIsTargetByteDBCS = isTargetByteDBCS; - - isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus; - if((sourceChar = args->converter->fromUChar32)!=0 && target targetLimit){ - sourceChar = *source++; - - /* do not convert SO/SI/ESC */ - if(IS_2022_CONTROL(sourceChar)) { - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - args->converter->fromUChar32=sourceChar; - break; - } - - length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2); - if(length < 0) { - length = -length; /* fallback */ - } - /* only DBCS or SBCS characters are expected*/ - /* DB characters with high bit set to 1 are expected */ - if( length > 2 || length==0 || - (length == 1 && targetByteUnit > 0x7f) || - (length == 2 && - ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) || - (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1))) - ) { - targetByteUnit=missingCharMarker; - } - if (targetByteUnit != missingCharMarker){ - - oldIsTargetByteDBCS = isTargetByteDBCS; - isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF); - /* append the shift sequence */ - if (oldIsTargetByteDBCS != isTargetByteDBCS ){ - - if (isTargetByteDBCS) - *target++ = UCNV_SO; - else - *target++ = UCNV_SI; - if(offsets) - *(offsets++) = (int32_t)(source - args->source-1); - } - /* write the targetUniChar to target */ - if(targetByteUnit <= 0x00FF){ - if( target < targetLimit){ - *(target++) = (unsigned char) targetByteUnit; - if(offsets){ - *(offsets++) = (int32_t)(source - args->source-1); - } - - }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit); - *err = U_BUFFER_OVERFLOW_ERROR; - } - }else{ - if(target < targetLimit){ - *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80); - if(offsets){ - *(offsets++) = (int32_t)(source - args->source-1); - } - if(target < targetLimit){ - *(target++) =(unsigned char) (targetByteUnit -0x80); - if(offsets){ - *(offsets++) = (int32_t)(source - args->source-1); - } - }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80); - *err = U_BUFFER_OVERFLOW_ERROR; - } - }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80); - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80); - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - - } - else{ - /* oops.. the code point is unassingned - * set the error and reason - */ - - /*check if the char is a First surrogate*/ - if(U16_IS_SURROGATE(sourceChar)) { - if(U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if(source < sourceLimit) { - /* test the following code unit */ - UChar trail=(UChar) *source; - if(U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - *err = U_INVALID_CHAR_FOUND; - /* convert this surrogate code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* no more input */ - *err = U_ZERO_ERROR; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* callback(unassigned) for a BMP code point */ - *err = U_INVALID_CHAR_FOUND; - } - - args->converter->fromUChar32=sourceChar; - break; - } - } /* end if(myTargetIndexflush && source>=sourceLimit && args->converter->fromUChar32==0 - ) { - int32_t sourceIndex; - - /* we are switching to ASCII */ - isTargetByteDBCS=FALSE; - - /* get the source index of the last input character */ - /* - * TODO this would be simpler and more reliable if we used a pair - * of sourceIndex/prevSourceIndex like in ucnvmbcs.c - * so that we could simply use the prevSourceIndex here; - * this code gives an incorrect result for the rare case of an unmatched - * trail surrogate that is alone in the last buffer of the text stream - */ - sourceIndex=(int32_t)(source-args->source); - if(sourceIndex>0) { - --sourceIndex; - if( U16_IS_TRAIL(args->source[sourceIndex]) && - (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) - ) { - --sourceIndex; - } - } else { - sourceIndex=-1; - } - - fromUWriteUInt8( - args->converter, - SHIFT_IN_STR, 1, - &target, (const char *)targetLimit, - &offsets, sourceIndex, - err); - } - - /*save the state and return */ - args->source = source; - args->target = (char*)target; - args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS; -} - -/************************ To Unicode ***************************************/ - -static void U_CALLCONV -UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args, - UErrorCode* err){ - char const* sourceStart; - UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo); - - UConverterToUnicodeArgs subArgs; - int32_t minArgsSize; - - /* set up the subconverter arguments */ - if(args->sizesize; - } else { - minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); - } - - uprv_memcpy(&subArgs, args, minArgsSize); - subArgs.size = (uint16_t)minArgsSize; - subArgs.converter = myData->currentConverter; - - /* remember the original start of the input for offsets */ - sourceStart = args->source; - - if(myData->key != 0) { - /* continue with a partial escape sequence */ - goto escape; - } - - while(U_SUCCESS(*err) && args->source < args->sourceLimit) { - /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ - subArgs.source = args->source; - subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush); - if(subArgs.source != subArgs.sourceLimit) { - /* - * get the current partial byte sequence - * - * it needs to be moved between the public and the subconverter - * so that the conversion framework, which only sees the public - * converter, can handle truncated and illegal input etc. - */ - if(args->converter->toULength > 0) { - uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength); - } - subArgs.converter->toULength = args->converter->toULength; - - /* - * Convert up to the end of the input, or to before the next escape character. - * Does not handle conversion extensions because the preToU[] state etc. - * is not copied. - */ - ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); - - if(args->offsets != NULL && sourceStart != args->source) { - /* update offsets to base them on the actual start of the input */ - int32_t *offsets = args->offsets; - UChar *target = args->target; - int32_t delta = (int32_t)(args->source - sourceStart); - while(target < subArgs.target) { - if(*offsets >= 0) { - *offsets += delta; - } - ++offsets; - ++target; - } - } - args->source = subArgs.source; - args->target = subArgs.target; - args->offsets = subArgs.offsets; - - /* copy input/error/overflow buffers */ - if(subArgs.converter->toULength > 0) { - uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength); - } - args->converter->toULength = subArgs.converter->toULength; - - if(*err == U_BUFFER_OVERFLOW_ERROR) { - if(subArgs.converter->UCharErrorBufferLength > 0) { - uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, - subArgs.converter->UCharErrorBufferLength); - } - args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; - subArgs.converter->UCharErrorBufferLength = 0; - } - } - - if (U_FAILURE(*err) || (args->source == args->sourceLimit)) { - return; - } - -escape: - changeState_2022(args->converter, - &(args->source), - args->sourceLimit, - ISO_2022_KR, - err); - } -} - -static void U_CALLCONV -UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, - UErrorCode* err){ - char tempBuf[2]; - const char *mySource = ( char *) args->source; - UChar *myTarget = args->target; - const char *mySourceLimit = args->sourceLimit; - UChar32 targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; - UConverterDataISO2022* myData; - UConverterSharedData* sharedData ; - UBool useFallback; - - myData=(UConverterDataISO2022*)(args->converter->extraInfo); - if(myData->version==1){ - UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); - return; - } - - /* initialize state */ - sharedData = myData->currentConverter->sharedData; - useFallback = args->converter->useFallback; - - if(myData->key != 0) { - /* continue with a partial escape sequence */ - goto escape; - } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { - /* continue with a partial double-byte character */ - mySourceChar = args->converter->toUBytes[0]; - args->converter->toULength = 0; - goto getTrailByte; - } - - while(mySource< mySourceLimit){ - - if(myTarget < args->targetLimit){ - - mySourceChar= (unsigned char) *mySource++; - - if(mySourceChar==UCNV_SI){ - myData->toU2022State.g = 0; - if (myData->isEmptySegment) { - myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUCallbackReason = UCNV_IRREGULAR; - args->converter->toUBytes[0] = (uint8_t)mySourceChar; - args->converter->toULength = 1; - args->target = myTarget; - args->source = mySource; - return; - } - /*consume the source */ - continue; - }else if(mySourceChar==UCNV_SO){ - myData->toU2022State.g = 1; - myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ - /*consume the source */ - continue; - }else if(mySourceChar==ESC_2022){ - mySource--; -escape: - myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */ - changeState_2022(args->converter,&(mySource), - mySourceLimit, ISO_2022_KR, err); - if(U_FAILURE(*err)){ - args->target = myTarget; - args->source = mySource; - return; - } - continue; - } - - myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */ - if(myData->toU2022State.g == 1) { - if(mySource < mySourceLimit) { - int leadIsOk, trailIsOk; - uint8_t trailByte; -getTrailByte: - targetUniChar = missingCharMarker; - trailByte = (uint8_t)*mySource; - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - * - * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is - * an ESC/SO/SI, we report only the first byte as the illegal sequence. - * Otherwise we convert or report the pair of bytes. - */ - leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); - trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); - if (leadIsOk && trailIsOk) { - ++mySource; - tempBuf[0] = (char)(mySourceChar + 0x80); - tempBuf[1] = (char)(trailByte + 0x80); - targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); - mySourceChar = (mySourceChar << 8) | trailByte; - } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { - /* report a pair of illegal bytes if the second byte is not a DBCS starter */ - ++mySource; - /* add another bit so that the code below writes 2 bytes in case of error */ - mySourceChar = static_cast(0x10000 | (mySourceChar << 8) | trailByte); - } - } else { - args->converter->toUBytes[0] = (uint8_t)mySourceChar; - args->converter->toULength = 1; - break; - } - } - else if(mySourceChar <= 0x7f) { - targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback); - } else { - targetUniChar = 0xffff; - } - if(targetUniChar < 0xfffe){ - if(args->offsets) { - args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); - } - *(myTarget++)=(UChar)targetUniChar; - } - else { - /* Call the callback function*/ - toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); - break; - } - } - else{ - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } - args->target = myTarget; - args->source = mySource; -} - -/*************************** END ISO2022-KR *********************************/ - -/*************************** ISO-2022-CN ********************************* -* -* Rules for ISO-2022-CN Encoding: -* i) The designator sequence must appear once on a line before any instance -* of character set it designates. -* ii) If two lines contain characters from the same character set, both lines -* must include the designator sequence. -* iii) Once the designator sequence is known, a shifting sequence has to be found -* to invoke the shifting -* iv) All lines start in ASCII and end in ASCII. -* v) Four shifting sequences are employed for this purpose: -* -* Sequcence ASCII Eq Charsets -* ---------- ------- --------- -* SI US-ASCII -* SO CNS-11643-1992 Plane 1, GB2312, ISO-IR-165 -* SS2 N CNS-11643-1992 Plane 2 -* SS3 O CNS-11643-1992 Planes 3-7 -* -* vi) -* SOdesignator : ESC "$" ")" finalchar_for_SO -* SS2designator : ESC "$" "*" finalchar_for_SS2 -* SS3designator : ESC "$" "+" finalchar_for_SS3 -* -* ESC $ ) A Indicates the bytes following SO are Chinese -* characters as defined in GB 2312-80, until -* another SOdesignation appears -* -* -* ESC $ ) E Indicates the bytes following SO are as defined -* in ISO-IR-165 (for details, see section 2.1), -* until another SOdesignation appears -* -* ESC $ ) G Indicates the bytes following SO are as defined -* in CNS 11643-plane-1, until another -* SOdesignation appears -* -* ESC $ * H Indicates the two bytes immediately following -* SS2 is a Chinese character as defined in CNS -* 11643-plane-2, until another SS2designation -* appears -* (Meaning N must preceed every 2 byte -* sequence.) -* -* ESC $ + I Indicates the immediate two bytes following SS3 -* is a Chinese character as defined in CNS -* 11643-plane-3, until another SS3designation -* appears -* (Meaning O must preceed every 2 byte -* sequence.) -* -* ESC $ + J Indicates the immediate two bytes following SS3 -* is a Chinese character as defined in CNS -* 11643-plane-4, until another SS3designation -* appears -* (In English: O must preceed every 2 byte -* sequence.) -* -* ESC $ + K Indicates the immediate two bytes following SS3 -* is a Chinese character as defined in CNS -* 11643-plane-5, until another SS3designation -* appears -* -* ESC $ + L Indicates the immediate two bytes following SS3 -* is a Chinese character as defined in CNS -* 11643-plane-6, until another SS3designation -* appears -* -* ESC $ + M Indicates the immediate two bytes following SS3 -* is a Chinese character as defined in CNS -* 11643-plane-7, until another SS3designation -* appears -* -* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and -* has its own designation information before any Chinese characters -* appear -* -*/ - -/* The following are defined this way to make the strings truly readonly */ -static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41"; -static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45"; -static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47"; -static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48"; -static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49"; -static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A"; -static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B"; -static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C"; -static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D"; - -/********************** ISO2022-CN Data **************************/ -static const char* const escSeqCharsCN[10] ={ - SHIFT_IN_STR, /* 0 ASCII */ - GB_2312_80_STR, /* 1 GB2312_1 */ - ISO_IR_165_STR, /* 2 ISO_IR_165 */ - CNS_11643_1992_Plane_1_STR, - CNS_11643_1992_Plane_2_STR, - CNS_11643_1992_Plane_3_STR, - CNS_11643_1992_Plane_4_STR, - CNS_11643_1992_Plane_5_STR, - CNS_11643_1992_Plane_6_STR, - CNS_11643_1992_Plane_7_STR -}; - -static void U_CALLCONV -UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ - UConverter *cnv = args->converter; - UConverterDataISO2022 *converterData; - ISO2022State *pFromU2022State; - uint8_t *target = (uint8_t *) args->target; - const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; - const UChar* source = args->source; - const UChar* sourceLimit = args->sourceLimit; - int32_t* offsets = args->offsets; - UChar32 sourceChar; - char buffer[8]; - int32_t len; - int8_t choices[3]; - int32_t choiceCount; - uint32_t targetValue = 0; - UBool useFallback; - - /* set up the state */ - converterData = (UConverterDataISO2022*)cnv->extraInfo; - pFromU2022State = &converterData->fromU2022State; - - choiceCount = 0; - - /* check if the last codepoint of previous buffer was a lead surrogate*/ - if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { - goto getTrail; - } - - while( source < sourceLimit){ - if(target < targetLimit){ - - sourceChar = *(source++); - /*check if the char is a First surrogate*/ - if(U16_IS_SURROGATE(sourceChar)) { - if(U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if(source < sourceLimit) { - /* test the following code unit */ - UChar trail=(UChar) *source; - if(U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - cnv->fromUChar32=0x00; - /* convert this supplementary code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* no more input */ - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } - - /* do the conversion */ - if(sourceChar <= 0x007f ){ - /* do not convert SO/SI/ESC */ - if(IS_2022_CONTROL(sourceChar)) { - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - - /* US-ASCII */ - if(pFromU2022State->g == 0) { - buffer[0] = (char)sourceChar; - len = 1; - } else { - buffer[0] = UCNV_SI; - buffer[1] = (char)sourceChar; - len = 2; - pFromU2022State->g = 0; - choiceCount = 0; - } - if(sourceChar == CR || sourceChar == LF) { - /* reset the state at the end of a line */ - uprv_memset(pFromU2022State, 0, sizeof(ISO2022State)); - choiceCount = 0; - } - } - else{ - /* convert U+0080..U+10ffff */ - int32_t i; - int8_t cs, g; - - if(choiceCount == 0) { - /* try the current SO/G1 converter first */ - choices[0] = pFromU2022State->cs[1]; - - /* default to GB2312_1 if none is designated yet */ - if(choices[0] == 0) { - choices[0] = GB2312_1; - } - - if(converterData->version == 0) { - /* ISO-2022-CN */ - - /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */ - if(choices[0] == GB2312_1) { - choices[1] = (int8_t)CNS_11643_1; - } else { - choices[1] = (int8_t)GB2312_1; - } - - choiceCount = 2; - } else if (converterData->version == 1) { - /* ISO-2022-CN-EXT */ - - /* try one of the other converters */ - switch(choices[0]) { - case GB2312_1: - choices[1] = (int8_t)CNS_11643_1; - choices[2] = (int8_t)ISO_IR_165; - break; - case ISO_IR_165: - choices[1] = (int8_t)GB2312_1; - choices[2] = (int8_t)CNS_11643_1; - break; - default: /* CNS_11643_x */ - choices[1] = (int8_t)GB2312_1; - choices[2] = (int8_t)ISO_IR_165; - break; - } - - choiceCount = 3; - } else { - choices[0] = (int8_t)CNS_11643_1; - choices[1] = (int8_t)GB2312_1; - } - } - - cs = g = 0; - /* - * len==0: no mapping found yet - * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks - * len>0: found a roundtrip result, done - */ - len = 0; - /* - * We will turn off useFallback after finding a fallback, - * but we still get fallbacks from PUA code points as usual. - * Therefore, we will also need to check that we don't overwrite - * an early fallback with a later one. - */ - useFallback = cnv->useFallback; - - for(i = 0; i < choiceCount && len <= 0; ++i) { - int8_t cs0 = choices[i]; - if(cs0 > 0) { - uint32_t value; - int32_t len2; - if(cs0 >= CNS_11643_0) { - len2 = MBCS_FROM_UCHAR32_ISO2022( - converterData->myConverterArray[CNS_11643], - sourceChar, - &value, - useFallback, - MBCS_OUTPUT_3); - if(len2 == 3 || (len2 == -3 && len == 0)) { - targetValue = value; - cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80); - if(len2 >= 0) { - len = 2; - } else { - len = -2; - useFallback = FALSE; - } - if(cs == CNS_11643_1) { - g = 1; - } else if(cs == CNS_11643_2) { - g = 2; - } else /* plane 3..7 */ if(converterData->version == 1) { - g = 3; - } else { - /* ISO-2022-CN (without -EXT) does not support plane 3..7 */ - len = 0; - } - } - } else { - /* GB2312_1 or ISO-IR-165 */ - U_ASSERT(cs0myConverterArray[cs0], - sourceChar, - &value, - useFallback, - MBCS_OUTPUT_2); - if(len2 == 2 || (len2 == -2 && len == 0)) { - targetValue = value; - len = len2; - cs = cs0; - g = 1; - useFallback = FALSE; - } - } - } - } - - if(len != 0) { - len = 0; /* count output bytes; it must have been abs(len) == 2 */ - - /* write the designation sequence if necessary */ - if(cs != pFromU2022State->cs[g]) { - if(cs < CNS_11643) { - uprv_memcpy(buffer, escSeqCharsCN[cs], 4); - } else { - U_ASSERT(cs >= CNS_11643_1); - uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4); - } - len = 4; - pFromU2022State->cs[g] = cs; - if(g == 1) { - /* changing the SO/G1 charset invalidates the choices[] */ - choiceCount = 0; - } - } - - /* write the shift sequence if necessary */ - if(g != pFromU2022State->g) { - switch(g) { - case 1: - buffer[len++] = UCNV_SO; - - /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */ - pFromU2022State->g = 1; - break; - case 2: - buffer[len++] = 0x1b; - buffer[len++] = 0x4e; - break; - default: /* case 3 */ - buffer[len++] = 0x1b; - buffer[len++] = 0x4f; - break; - } - } - - /* write the two output bytes */ - buffer[len++] = (char)(targetValue >> 8); - buffer[len++] = (char)targetValue; - } else { - /* if we cannot find the character after checking all codepages - * then this is an error - */ - *err = U_INVALID_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } - - /* output len>0 bytes in buffer[] */ - if(len == 1) { - *target++ = buffer[0]; - if(offsets) { - *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ - } - } else if(len == 2 && (target + 2) <= targetLimit) { - *target++ = buffer[0]; - *target++ = buffer[1]; - if(offsets) { - int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); - *offsets++ = sourceIndex; - *offsets++ = sourceIndex; - } - } else { - fromUWriteUInt8( - cnv, - buffer, len, - &target, (const char *)targetLimit, - &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), - err); - if(U_FAILURE(*err)) { - break; - } - } - } /* end if(myTargetIndexg!=0 && - args->flush && source>=sourceLimit && cnv->fromUChar32==0 - ) { - int32_t sourceIndex; - - /* we are switching to ASCII */ - pFromU2022State->g=0; - - /* get the source index of the last input character */ - /* - * TODO this would be simpler and more reliable if we used a pair - * of sourceIndex/prevSourceIndex like in ucnvmbcs.c - * so that we could simply use the prevSourceIndex here; - * this code gives an incorrect result for the rare case of an unmatched - * trail surrogate that is alone in the last buffer of the text stream - */ - sourceIndex=(int32_t)(source-args->source); - if(sourceIndex>0) { - --sourceIndex; - if( U16_IS_TRAIL(args->source[sourceIndex]) && - (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) - ) { - --sourceIndex; - } - } else { - sourceIndex=-1; - } - - fromUWriteUInt8( - cnv, - SHIFT_IN_STR, 1, - &target, (const char *)targetLimit, - &offsets, sourceIndex, - err); - } - - /*save the state and return */ - args->source = source; - args->target = (char*)target; -} - - -static void U_CALLCONV -UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, - UErrorCode* err){ - char tempBuf[3]; - const char *mySource = (char *) args->source; - UChar *myTarget = args->target; - const char *mySourceLimit = args->sourceLimit; - uint32_t targetUniChar = 0x0000; - uint32_t mySourceChar = 0x0000; - UConverterDataISO2022* myData; - ISO2022State *pToU2022State; - - myData=(UConverterDataISO2022*)(args->converter->extraInfo); - pToU2022State = &myData->toU2022State; - - if(myData->key != 0) { - /* continue with a partial escape sequence */ - goto escape; - } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { - /* continue with a partial double-byte character */ - mySourceChar = args->converter->toUBytes[0]; - args->converter->toULength = 0; - targetUniChar = missingCharMarker; - goto getTrailByte; - } - - while(mySource < mySourceLimit){ - - targetUniChar =missingCharMarker; - - if(myTarget < args->targetLimit){ - - mySourceChar= (unsigned char) *mySource++; - - switch(mySourceChar){ - case UCNV_SI: - pToU2022State->g=0; - if (myData->isEmptySegment) { - myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUCallbackReason = UCNV_IRREGULAR; - args->converter->toUBytes[0] = static_cast(mySourceChar); - args->converter->toULength = 1; - args->target = myTarget; - args->source = mySource; - return; - } - continue; - - case UCNV_SO: - if(pToU2022State->cs[1] != 0) { - pToU2022State->g=1; - myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ - continue; - } else { - /* illegal to have SO before a matching designator */ - myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */ - break; - } - - case ESC_2022: - mySource--; -escape: - { - const char * mySourceBefore = mySource; - int8_t toULengthBefore = args->converter->toULength; - - changeState_2022(args->converter,&(mySource), - mySourceLimit, ISO_2022_CN,err); - - /* After SO there must be at least one character before a designator (designator error handled separately) */ - if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUCallbackReason = UCNV_IRREGULAR; - args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); - } - } - - /* invalid or illegal escape sequence */ - if(U_FAILURE(*err)){ - args->target = myTarget; - args->source = mySource; - myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ - return; - } - continue; - - /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ - - case CR: - case LF: - uprv_memset(pToU2022State, 0, sizeof(ISO2022State)); - U_FALLTHROUGH; - default: - /* convert one or two bytes */ - myData->isEmptySegment = FALSE; - if(pToU2022State->g != 0) { - if(mySource < mySourceLimit) { - UConverterSharedData *cnv; - StateEnum tempState; - int32_t tempBufLen; - int leadIsOk, trailIsOk; - uint8_t trailByte; -getTrailByte: - trailByte = (uint8_t)*mySource; - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - * - * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is - * an ESC/SO/SI, we report only the first byte as the illegal sequence. - * Otherwise we convert or report the pair of bytes. - */ - leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); - trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); - if (leadIsOk && trailIsOk) { - ++mySource; - tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; - if(tempState >= CNS_11643_0) { - cnv = myData->myConverterArray[CNS_11643]; - tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0)); - tempBuf[1] = (char) (mySourceChar); - tempBuf[2] = (char) trailByte; - tempBufLen = 3; - - }else{ - U_ASSERT(tempStatemyConverterArray[tempState]; - tempBuf[0] = (char) (mySourceChar); - tempBuf[1] = (char) trailByte; - tempBufLen = 2; - } - targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); - mySourceChar = (mySourceChar << 8) | trailByte; - } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { - /* report a pair of illegal bytes if the second byte is not a DBCS starter */ - ++mySource; - /* add another bit so that the code below writes 2 bytes in case of error */ - mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; - } - if(pToU2022State->g>=2) { - /* return from a single-shift state to the previous one */ - pToU2022State->g=pToU2022State->prevG; - } - } else { - args->converter->toUBytes[0] = (uint8_t)mySourceChar; - args->converter->toULength = 1; - goto endloop; - } - } - else{ - if(mySourceChar <= 0x7f) { - targetUniChar = (UChar) mySourceChar; - } - } - break; - } - if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ - if(args->offsets){ - args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); - } - *(myTarget++)=(UChar)targetUniChar; - } - else if(targetUniChar > missingCharMarker){ - /* disassemble the surrogate pair and write to output*/ - targetUniChar-=0x0010000; - *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); - if(args->offsets){ - args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); - } - ++myTarget; - if(myTarget< args->targetLimit){ - *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); - if(args->offsets){ - args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); - } - ++myTarget; - }else{ - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= - (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); - } - - } - else{ - /* Call the callback function*/ - toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); - break; - } - } - else{ - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } -endloop: - args->target = myTarget; - args->source = mySource; -} -#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ - -static void U_CALLCONV -_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { - UConverter *cnv = args->converter; - UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; - ISO2022State *pFromU2022State=&myConverterData->fromU2022State; - char *p, *subchar; - char buffer[8]; - int32_t length; - - subchar=(char *)cnv->subChars; - length=cnv->subCharLen; /* assume length==1 for most variants */ - - p = buffer; - switch(myConverterData->locale[0]){ - case 'j': - { - int8_t cs; - - if(pFromU2022State->g == 1) { - /* JIS7: switch from G1 to G0 */ - pFromU2022State->g = 0; - *p++ = UCNV_SI; - } - - cs = pFromU2022State->cs[0]; - if(cs != ASCII && cs != JISX201) { - /* not in ASCII or JIS X 0201: switch to ASCII */ - pFromU2022State->cs[0] = (int8_t)ASCII; - *p++ = '\x1b'; - *p++ = '\x28'; - *p++ = '\x42'; - } - - *p++ = subchar[0]; - break; - } - case 'c': - if(pFromU2022State->g != 0) { - /* not in ASCII mode: switch to ASCII */ - pFromU2022State->g = 0; - *p++ = UCNV_SI; - } - *p++ = subchar[0]; - break; - case 'k': - if(myConverterData->version == 0) { - if(length == 1) { - if(args->converter->fromUnicodeStatus) { - /* in DBCS mode: switch to SBCS */ - args->converter->fromUnicodeStatus = 0; - *p++ = UCNV_SI; - } - *p++ = subchar[0]; - } else /* length == 2*/ { - if(!args->converter->fromUnicodeStatus) { - /* in SBCS mode: switch to DBCS */ - args->converter->fromUnicodeStatus = 1; - *p++ = UCNV_SO; - } - *p++ = subchar[0]; - *p++ = subchar[1]; - } - break; - } else { - /* save the subconverter's substitution string */ - uint8_t *currentSubChars = myConverterData->currentConverter->subChars; - int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen; - - /* set our substitution string into the subconverter */ - myConverterData->currentConverter->subChars = (uint8_t *)subchar; - myConverterData->currentConverter->subCharLen = (int8_t)length; - - /* let the subconverter write the subchar, set/retrieve fromUChar32 state */ - args->converter = myConverterData->currentConverter; - myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32; - ucnv_cbFromUWriteSub(args, 0, err); - cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32; - args->converter = cnv; - - /* restore the subconverter's substitution string */ - myConverterData->currentConverter->subChars = currentSubChars; - myConverterData->currentConverter->subCharLen = currentSubCharLen; - - if(*err == U_BUFFER_OVERFLOW_ERROR) { - if(myConverterData->currentConverter->charErrorBufferLength > 0) { - uprv_memcpy( - cnv->charErrorBuffer, - myConverterData->currentConverter->charErrorBuffer, - myConverterData->currentConverter->charErrorBufferLength); - } - cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; - myConverterData->currentConverter->charErrorBufferLength = 0; - } - return; - } - default: - /* not expected */ - break; - } - ucnv_cbFromUWriteBytes(args, - buffer, (int32_t)(p - buffer), - offsetIndex, err); -} - -/* - * Structure for cloning an ISO 2022 converter into a single memory block. - * ucnv_safeClone() of the converter will align the entire cloneStruct, - * and then ucnv_safeClone() of the sub-converter may additionally align - * currentConverter inside the cloneStruct, for which we need the deadSpace - * after currentConverter. - * This is because UAlignedMemory may be larger than the actually - * necessary alignment size for the platform. - * The other cloneStruct fields will not be moved around, - * and are aligned properly with cloneStruct's alignment. - */ -struct cloneStruct -{ - UConverter cnv; - UConverter currentConverter; - UAlignedMemory deadSpace; - UConverterDataISO2022 mydata; -}; - - -U_CDECL_BEGIN - -static UConverter * U_CALLCONV -_ISO_2022_SafeClone( - const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneStruct * localClone; - UConverterDataISO2022 *cnvData; - int32_t i, size; - - if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = (int32_t)sizeof(struct cloneStruct); - return NULL; - } - - cnvData = (UConverterDataISO2022 *)cnv->extraInfo; - localClone = (struct cloneStruct *)stackBuffer; - - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022)); - localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */ - localClone->cnv.isExtraLocal = TRUE; - - /* share the subconverters */ - - if(cnvData->currentConverter != NULL) { - size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ - localClone->mydata.currentConverter = - ucnv_safeClone(cnvData->currentConverter, - &localClone->currentConverter, - &size, status); - if(U_FAILURE(*status)) { - return NULL; - } - } - - for(i=0; imyConverterArray[i] != NULL) { - ucnv_incrementRefCount(cnvData->myConverterArray[i]); - } - } - - return &localClone->cnv; -} - -U_CDECL_END - -static void U_CALLCONV -_ISO_2022_GetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) -{ - int32_t i; - UConverterDataISO2022* cnvData; - - if (U_FAILURE(*pErrorCode)) { - return; - } -#ifdef U_ENABLE_GENERIC_ISO_2022 - if (cnv->sharedData == &_ISO2022Data) { - /* We use UTF-8 in this case */ - sa->addRange(sa->set, 0, 0xd7FF); - sa->addRange(sa->set, 0xE000, 0x10FFFF); - return; - } -#endif - - cnvData = (UConverterDataISO2022*)cnv->extraInfo; - - /* open a set and initialize it with code points that are algorithmically round-tripped */ - switch(cnvData->locale[0]){ - case 'j': - /* include JIS X 0201 which is hardcoded */ - sa->add(sa->set, 0xa5); - sa->add(sa->set, 0x203e); - if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { - /* include Latin-1 for some variants of JP */ - sa->addRange(sa->set, 0, 0xff); - } else { - /* include ASCII for JP */ - sa->addRange(sa->set, 0, 0x7f); - } - if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { - /* - * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 - * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) - * use half-width Katakana. - * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) - * half-width Katakana via the ESC ( I sequence. - * However, we only emit (fromUnicode) half-width Katakana according to the - * definition of each variant. - * - * When including fallbacks, - * we need to include half-width Katakana Unicode code points for all JP variants because - * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana). - */ - /* include half-width Katakana for JP */ - sa->addRange(sa->set, HWKANA_START, HWKANA_END); - } - break; -#if !UCONFIG_ONLY_HTML_CONVERSION - case 'c': - case 'z': - /* include ASCII for CN */ - sa->addRange(sa->set, 0, 0x7f); - break; - case 'k': - /* there is only one converter for KR, and it is not in the myConverterArray[] */ - cnvData->currentConverter->sharedData->impl->getUnicodeSet( - cnvData->currentConverter, sa, which, pErrorCode); - /* the loop over myConverterArray[] will simply not find another converter */ - break; -#endif - default: - break; - } - -#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */ - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && - cnvData->version==0 && i==CNS_11643 - ) { - /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */ - ucnv_MBCSGetUnicodeSetForBytes( - cnvData->myConverterArray[i], - sa, UCNV_ROUNDTRIP_SET, - 0, 0x81, 0x82, - pErrorCode); - } -#endif - - for (i=0; imyConverterArray[i]!=NULL) { - if(cnvData->locale[0]=='j' && i==JISX208) { - /* - * Only add code points that map to Shift-JIS codes - * corresponding to JIS X 0208. - */ - filter=UCNV_SET_FILTER_SJIS; -#if !UCONFIG_ONLY_HTML_CONVERSION - } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && - cnvData->version==0 && i==CNS_11643) { - /* - * Version-specific for CN: - * CN version 0 does not map CNS planes 3..7 although - * they are all available in the CNS conversion table; - * CN version 1 (-EXT) does map them all. - * The two versions create different Unicode sets. - */ - filter=UCNV_SET_FILTER_2022_CN; - } else if(i==KSC5601) { - /* - * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) - * are broader than GR94. - */ - filter=UCNV_SET_FILTER_GR94DBCS; -#endif - } else { - filter=UCNV_SET_FILTER_NONE; - } - ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode); - } - } - - /* - * ISO 2022 converters must not convert SO/SI/ESC despite what - * sub-converters do by themselves. - * Remove these characters from the set. - */ - sa->remove(sa->set, 0x0e); - sa->remove(sa->set, 0x0f); - sa->remove(sa->set, 0x1b); - - /* ISO 2022 converters do not convert C1 controls either */ - sa->removeRange(sa->set, 0x80, 0x9f); -} - -static const UConverterImpl _ISO2022Impl={ - UCNV_ISO_2022, - - NULL, - NULL, - - _ISO2022Open, - _ISO2022Close, - _ISO2022Reset, - -#ifdef U_ENABLE_GENERIC_ISO_2022 - T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, - T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, - ucnv_fromUnicode_UTF8, - ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, -#else - NULL, - NULL, - NULL, - NULL, -#endif - NULL, - - NULL, - _ISO2022getName, - _ISO_2022_WriteSub, - _ISO_2022_SafeClone, - _ISO_2022_GetUnicodeSet, - - NULL, - NULL -}; -static const UConverterStaticData _ISO2022StaticData={ - sizeof(UConverterStaticData), - "ISO_2022", - 2022, - UCNV_IBM, - UCNV_ISO_2022, - 1, - 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ - { 0x1a, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; -const UConverterSharedData _ISO2022Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl); - -/*************JP****************/ -static const UConverterImpl _ISO2022JPImpl={ - UCNV_ISO_2022, - - NULL, - NULL, - - _ISO2022Open, - _ISO2022Close, - _ISO2022Reset, - - UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, - UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, - UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, - UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, - NULL, - - NULL, - _ISO2022getName, - _ISO_2022_WriteSub, - _ISO_2022_SafeClone, - _ISO_2022_GetUnicodeSet, - - NULL, - NULL -}; -static const UConverterStaticData _ISO2022JPStaticData={ - sizeof(UConverterStaticData), - "ISO_2022_JP", - 0, - UCNV_IBM, - UCNV_ISO_2022, - 1, - 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */ - { 0x1a, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -namespace { - -const UConverterSharedData _ISO2022JPData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl); - -} // namespace - -#if !UCONFIG_ONLY_HTML_CONVERSION -/************* KR ***************/ -static const UConverterImpl _ISO2022KRImpl={ - UCNV_ISO_2022, - - NULL, - NULL, - - _ISO2022Open, - _ISO2022Close, - _ISO2022Reset, - - UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, - UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, - UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, - UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, - NULL, - - NULL, - _ISO2022getName, - _ISO_2022_WriteSub, - _ISO_2022_SafeClone, - _ISO_2022_GetUnicodeSet, - - NULL, - NULL -}; -static const UConverterStaticData _ISO2022KRStaticData={ - sizeof(UConverterStaticData), - "ISO_2022_KR", - 0, - UCNV_IBM, - UCNV_ISO_2022, - 1, - 8, /* max 8 bytes per UChar */ - { 0x1a, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -namespace { - -const UConverterSharedData _ISO2022KRData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl); - -} // namespace - -/*************** CN ***************/ -static const UConverterImpl _ISO2022CNImpl={ - - UCNV_ISO_2022, - - NULL, - NULL, - - _ISO2022Open, - _ISO2022Close, - _ISO2022Reset, - - UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, - UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, - UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, - UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, - NULL, - - NULL, - _ISO2022getName, - _ISO_2022_WriteSub, - _ISO_2022_SafeClone, - _ISO_2022_GetUnicodeSet, - - NULL, - NULL -}; -static const UConverterStaticData _ISO2022CNStaticData={ - sizeof(UConverterStaticData), - "ISO_2022_CN", - 0, - UCNV_IBM, - UCNV_ISO_2022, - 1, - 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */ - { 0x1a, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -namespace { - -const UConverterSharedData _ISO2022CNData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl); - -} // namespace -#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_bld.cpp b/deps/node/deps/icu-small/source/common/ucnv_bld.cpp deleted file mode 100644 index 1a1625d6..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_bld.cpp +++ /dev/null @@ -1,1689 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************** - * COPYRIGHT: - * Copyright (c) 1996-2016, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************** - * - * ucnv_bld.cpp: - * - * Defines functions that are used in the creation/initialization/deletion - * of converters and related structures. - * uses uconv_io.h routines to access disk information - * is used by ucnv.h to implement public API create/delete/flushCache routines - * Modification History: - * - * Date Name Description - * - * 06/20/2000 helena OS/400 port changes; mostly typecast. - * 06/29/2000 helena Major rewrite of the callback interface. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/putil.h" -#include "unicode/udata.h" -#include "unicode/ucnv.h" -#include "unicode/uloc.h" -#include "mutex.h" -#include "putilimp.h" -#include "uassert.h" -#include "utracimp.h" -#include "ucnv_io.h" -#include "ucnv_bld.h" -#include "ucnvmbcs.h" -#include "ucnv_ext.h" -#include "ucnv_cnv.h" -#include "ucnv_imp.h" -#include "uhash.h" -#include "umutex.h" -#include "cstring.h" -#include "cmemory.h" -#include "ucln_cmn.h" -#include "ustr_cnv.h" - - -#if 0 -#include -extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); -#define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) -#else -# define UCNV_DEBUG_LOG(x,y,z) -#endif - -static const UConverterSharedData * const -converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ - NULL, NULL, - -#if UCONFIG_NO_LEGACY_CONVERSION - NULL, -#else - &_MBCSData, -#endif - - &_Latin1Data, - &_UTF8Data, &_UTF16BEData, &_UTF16LEData, -#if UCONFIG_ONLY_HTML_CONVERSION - NULL, NULL, -#else - &_UTF32BEData, &_UTF32LEData, -#endif - NULL, - -#if UCONFIG_NO_LEGACY_CONVERSION - NULL, -#else - &_ISO2022Data, -#endif - -#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION - NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, - NULL, -#else - &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, - &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, - &_HZData, -#endif - -#if UCONFIG_ONLY_HTML_CONVERSION - NULL, -#else - &_SCSUData, -#endif - - -#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION - NULL, -#else - &_ISCIIData, -#endif - - &_ASCIIData, -#if UCONFIG_ONLY_HTML_CONVERSION - NULL, NULL, &_UTF16Data, NULL, NULL, NULL, -#else - &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, -#endif - -#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION - NULL, -#else - &_CompoundTextData -#endif -}; - -/* Please keep this in binary sorted order for getAlgorithmicTypeFromName. - Also the name should be in lower case and all spaces, dashes and underscores - removed -*/ -static struct { - const char *name; - const UConverterType type; -} const cnvNameType[] = { -#if !UCONFIG_ONLY_HTML_CONVERSION - { "bocu1", UCNV_BOCU1 }, - { "cesu8", UCNV_CESU8 }, -#endif -#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - { "hz",UCNV_HZ }, -#endif -#if !UCONFIG_ONLY_HTML_CONVERSION - { "imapmailboxname", UCNV_IMAP_MAILBOX }, -#endif -#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - { "iscii", UCNV_ISCII }, -#endif -#if !UCONFIG_NO_LEGACY_CONVERSION - { "iso2022", UCNV_ISO_2022 }, -#endif - { "iso88591", UCNV_LATIN_1 }, -#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - { "lmbcs1", UCNV_LMBCS_1 }, - { "lmbcs11",UCNV_LMBCS_11 }, - { "lmbcs16",UCNV_LMBCS_16 }, - { "lmbcs17",UCNV_LMBCS_17 }, - { "lmbcs18",UCNV_LMBCS_18 }, - { "lmbcs19",UCNV_LMBCS_19 }, - { "lmbcs2", UCNV_LMBCS_2 }, - { "lmbcs3", UCNV_LMBCS_3 }, - { "lmbcs4", UCNV_LMBCS_4 }, - { "lmbcs5", UCNV_LMBCS_5 }, - { "lmbcs6", UCNV_LMBCS_6 }, - { "lmbcs8", UCNV_LMBCS_8 }, -#endif -#if !UCONFIG_ONLY_HTML_CONVERSION - { "scsu", UCNV_SCSU }, -#endif - { "usascii", UCNV_US_ASCII }, - { "utf16", UCNV_UTF16 }, - { "utf16be", UCNV_UTF16_BigEndian }, - { "utf16le", UCNV_UTF16_LittleEndian }, -#if U_IS_BIG_ENDIAN - { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, - { "utf16platformendian", UCNV_UTF16_BigEndian }, -#else - { "utf16oppositeendian", UCNV_UTF16_BigEndian}, - { "utf16platformendian", UCNV_UTF16_LittleEndian }, -#endif -#if !UCONFIG_ONLY_HTML_CONVERSION - { "utf32", UCNV_UTF32 }, - { "utf32be", UCNV_UTF32_BigEndian }, - { "utf32le", UCNV_UTF32_LittleEndian }, -#if U_IS_BIG_ENDIAN - { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, - { "utf32platformendian", UCNV_UTF32_BigEndian }, -#else - { "utf32oppositeendian", UCNV_UTF32_BigEndian }, - { "utf32platformendian", UCNV_UTF32_LittleEndian }, -#endif -#endif -#if !UCONFIG_ONLY_HTML_CONVERSION - { "utf7", UCNV_UTF7 }, -#endif - { "utf8", UCNV_UTF8 }, -#if !UCONFIG_ONLY_HTML_CONVERSION - { "x11compoundtext", UCNV_COMPOUND_TEXT} -#endif -}; - - -/*initializes some global variables */ -static UHashtable *SHARED_DATA_HASHTABLE = NULL; -static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ - /* Note: the global mutex is used for */ - /* reference count updates. */ - -static const char **gAvailableConverters = NULL; -static uint16_t gAvailableConverterCount = 0; -static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; - -#if !U_CHARSET_IS_UTF8 - -/* This contains the resolved converter name. So no further alias lookup is needed again. */ -static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ -static const char *gDefaultConverterName = NULL; - -/* -If the default converter is an algorithmic converter, this is the cached value. -We don't cache a full UConverter and clone it because ucnv_clone doesn't have -less overhead than an algorithmic open. We don't cache non-algorithmic converters -because ucnv_flushCache must be able to unload the default converter and its table. -*/ -static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; - -/* Does gDefaultConverterName have a converter option and require extra parsing? */ -static UBool gDefaultConverterContainsOption; - -#endif /* !U_CHARSET_IS_UTF8 */ - -static const char DATA_TYPE[] = "cnv"; - -/* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). - * If it is ever to be called from elsewhere, synchronization - * will need to be considered. - */ -static void -ucnv_flushAvailableConverterCache() { - gAvailableConverterCount = 0; - if (gAvailableConverters) { - uprv_free((char **)gAvailableConverters); - gAvailableConverters = NULL; - } - gAvailableConvertersInitOnce.reset(); -} - -/* ucnv_cleanup - delete all storage held by the converter cache, except any */ -/* in use by open converters. */ -/* Not thread safe. */ -/* Not supported API. */ -static UBool U_CALLCONV ucnv_cleanup(void) { - ucnv_flushCache(); - if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { - uhash_close(SHARED_DATA_HASHTABLE); - SHARED_DATA_HASHTABLE = NULL; - } - - /* Isn't called from flushCache because other threads may have preexisting references to the table. */ - ucnv_flushAvailableConverterCache(); - -#if !U_CHARSET_IS_UTF8 - gDefaultConverterName = NULL; - gDefaultConverterNameBuffer[0] = 0; - gDefaultConverterContainsOption = FALSE; - gDefaultAlgorithmicSharedData = NULL; -#endif - - return (SHARED_DATA_HASHTABLE == NULL); -} - -U_CAPI void U_EXPORT2 -ucnv_enableCleanup() { - ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); -} - -static UBool U_CALLCONV -isCnvAcceptable(void * /*context*/, - const char * /*type*/, const char * /*name*/, - const UDataInfo *pInfo) { - return (UBool)( - pInfo->size>=20 && - pInfo->isBigEndian==U_IS_BIG_ENDIAN && - pInfo->charsetFamily==U_CHARSET_FAMILY && - pInfo->sizeofUChar==U_SIZEOF_UCHAR && - pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ - pInfo->dataFormat[1]==0x6e && - pInfo->dataFormat[2]==0x76 && - pInfo->dataFormat[3]==0x74 && - pInfo->formatVersion[0]==6); /* Everything will be version 6 */ -} - -/** - * Un flatten shared data from a UDATA.. - */ -static UConverterSharedData* -ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) -{ - /* UDataInfo info; -- necessary only if some converters have different formatVersion */ - const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); - const UConverterStaticData *source = (const UConverterStaticData *) raw; - UConverterSharedData *data; - UConverterType type = (UConverterType)source->conversionType; - - if(U_FAILURE(*status)) - return NULL; - - if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || - converterData[type] == NULL || - !converterData[type]->isReferenceCounted || - converterData[type]->referenceCounter != 1 || - source->structSize != sizeof(UConverterStaticData)) - { - *status = U_INVALID_TABLE_FORMAT; - return NULL; - } - - data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); - if(data == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /* copy initial values from the static structure for this type */ - uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); - - data->staticData = source; - - data->sharedDataCached = FALSE; - - /* fill in fields from the loaded data */ - data->dataMemory = (void*)pData; /* for future use */ - - if(data->impl->load != NULL) { - data->impl->load(data, pArgs, raw + source->structSize, status); - if(U_FAILURE(*status)) { - uprv_free(data); - return NULL; - } - } - return data; -} - -/*Takes an alias name gets an actual converter file name - *goes to disk and opens it. - *allocates the memory and returns a new UConverter object - */ -static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) -{ - UDataMemory *data; - UConverterSharedData *sharedData; - - UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); - - if (U_FAILURE (*err)) { - UTRACE_EXIT_STATUS(*err); - return NULL; - } - - UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); - - data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); - if(U_FAILURE(*err)) - { - UTRACE_EXIT_STATUS(*err); - return NULL; - } - - sharedData = ucnv_data_unFlattenClone(pArgs, data, err); - if(U_FAILURE(*err)) - { - udata_close(data); - UTRACE_EXIT_STATUS(*err); - return NULL; - } - - /* - * TODO Store pkg in a field in the shared data so that delta-only converters - * can load base converters from the same package. - * If the pkg name is longer than the field, then either do not load the converter - * in the first place, or just set the pkg field to "". - */ - - UTRACE_EXIT_PTR_STATUS(sharedData, *err); - return sharedData; -} - -/*returns a converter type from a string - */ -static const UConverterSharedData * -getAlgorithmicTypeFromName(const char *realName) -{ - uint32_t mid, start, limit; - uint32_t lastMid; - int result; - char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - - /* Lower case and remove ignoreable characters. */ - ucnv_io_stripForCompare(strippedName, realName); - - /* do a binary search for the alias */ - start = 0; - limit = UPRV_LENGTHOF(cnvNameType); - mid = limit; - lastMid = UINT32_MAX; - - for (;;) { - mid = (uint32_t)((start + limit) / 2); - if (lastMid == mid) { /* Have we moved? */ - break; /* We haven't moved, and it wasn't found. */ - } - lastMid = mid; - result = uprv_strcmp(strippedName, cnvNameType[mid].name); - - if (result < 0) { - limit = mid; - } else if (result > 0) { - start = mid; - } else { - return converterData[cnvNameType[mid].type]; - } - } - - return NULL; -} - -/* -* Based on the number of known converters, this determines how many times larger -* the shared data hash table should be. When on small platforms, or just a couple -* of converters are used, this number should be 2. When memory is plentiful, or -* when ucnv_countAvailable is ever used with a lot of available converters, -* this should be 4. -* Larger numbers reduce the number of hash collisions, but use more memory. -*/ -#define UCNV_CACHE_LOAD_FACTOR 2 - -/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ -/* Will always be called with the cnvCacheMutex alrady being held */ -/* by the calling function. */ -/* Stores the shared data in the SHARED_DATA_HASHTABLE - * @param data The shared data - */ -static void -ucnv_shareConverterData(UConverterSharedData * data) -{ - UErrorCode err = U_ZERO_ERROR; - /*Lazy evaluates the Hashtable itself */ - /*void *sanity = NULL;*/ - - if (SHARED_DATA_HASHTABLE == NULL) - { - SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, - ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, - &err); - ucnv_enableCleanup(); - - if (U_FAILURE(err)) - return; - } - - /* ### check to see if the element is not already there! */ - - /* - sanity = ucnv_getSharedConverterData (data->staticData->name); - if(sanity != NULL) - { - UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); - } - UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); - */ - - /* Mark it shared */ - data->sharedDataCached = TRUE; - - uhash_put(SHARED_DATA_HASHTABLE, - (void*) data->staticData->name, /* Okay to cast away const as long as - keyDeleter == NULL */ - data, - &err); - UCNV_DEBUG_LOG("put", data->staticData->name,data); - -} - -/* Look up a converter name in the shared data cache. */ -/* cnvCacheMutex must be held by the caller to protect the hash table. */ -/* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) - * @param name The name of the shared data - * @return the shared data from the SHARED_DATA_HASHTABLE - */ -static UConverterSharedData * -ucnv_getSharedConverterData(const char *name) -{ - /*special case when no Table has yet been created we return NULL */ - if (SHARED_DATA_HASHTABLE == NULL) - { - return NULL; - } - else - { - UConverterSharedData *rc; - - rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); - UCNV_DEBUG_LOG("get",name,rc); - return rc; - } -} - -/*frees the string of memory blocks associates with a sharedConverter - *if and only if the referenceCounter == 0 - */ -/* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to - * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and - * returns TRUE, - * otherwise returns FALSE - * @param sharedConverterData The shared data - * @return if not it frees all the memory stemming from sharedConverterData and - * returns TRUE, otherwise returns FALSE - */ -static UBool -ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) -{ - UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); - UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); - - if (deadSharedData->referenceCounter > 0) { - UTRACE_EXIT_VALUE((int32_t)FALSE); - return FALSE; - } - - if (deadSharedData->impl->unload != NULL) { - deadSharedData->impl->unload(deadSharedData); - } - - if(deadSharedData->dataMemory != NULL) - { - UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; - udata_close(data); - } - - uprv_free(deadSharedData); - - UTRACE_EXIT_VALUE((int32_t)TRUE); - return TRUE; -} - -/** - * Load a non-algorithmic converter. - * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). - */ -UConverterSharedData * -ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { - UConverterSharedData *mySharedConverterData; - - if(err == NULL || U_FAILURE(*err)) { - return NULL; - } - - if(pArgs->pkg != NULL && *pArgs->pkg != 0) { - /* application-provided converters are not currently cached */ - return createConverterFromFile(pArgs, err); - } - - mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); - if (mySharedConverterData == NULL) - { - /*Not cached, we need to stream it in from file */ - mySharedConverterData = createConverterFromFile(pArgs, err); - if (U_FAILURE (*err) || (mySharedConverterData == NULL)) - { - return NULL; - } - else if (!pArgs->onlyTestIsLoadable) - { - /* share it with other library clients */ - ucnv_shareConverterData(mySharedConverterData); - } - } - else - { - /* The data for this converter was already in the cache. */ - /* Update the reference counter on the shared data: one more client */ - mySharedConverterData->referenceCounter++; - } - - return mySharedConverterData; -} - -/** - * Unload a non-algorithmic converter. - * It must be sharedData->isReferenceCounted - * and this function must be called inside umtx_lock(&cnvCacheMutex). - */ -U_CAPI void -ucnv_unload(UConverterSharedData *sharedData) { - if(sharedData != NULL) { - if (sharedData->referenceCounter > 0) { - sharedData->referenceCounter--; - } - - if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { - ucnv_deleteSharedConverterData(sharedData); - } - } -} - -U_CFUNC void -ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) -{ - if(sharedData != NULL && sharedData->isReferenceCounted) { - umtx_lock(&cnvCacheMutex); - ucnv_unload(sharedData); - umtx_unlock(&cnvCacheMutex); - } -} - -U_CFUNC void -ucnv_incrementRefCount(UConverterSharedData *sharedData) -{ - if(sharedData != NULL && sharedData->isReferenceCounted) { - umtx_lock(&cnvCacheMutex); - sharedData->referenceCounter++; - umtx_unlock(&cnvCacheMutex); - } -} - -/* - * *pPieces must be initialized. - * The name without options will be copied to pPieces->cnvName. - * The locale and options will be copied to pPieces only if present in inName, - * otherwise the existing values in pPieces remain. - * *pArgs will be set to the pPieces values. - */ -static void -parseConverterOptions(const char *inName, - UConverterNamePieces *pPieces, - UConverterLoadArgs *pArgs, - UErrorCode *err) -{ - char *cnvName = pPieces->cnvName; - char c; - int32_t len = 0; - - pArgs->name=inName; - pArgs->locale=pPieces->locale; - pArgs->options=pPieces->options; - - /* copy the converter name itself to cnvName */ - while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { - if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { - *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ - pPieces->cnvName[0]=0; - return; - } - *cnvName++=c; - inName++; - } - *cnvName=0; - pArgs->name=pPieces->cnvName; - - /* parse options. No more name copying should occur. */ - while((c=*inName)!=0) { - if(c==UCNV_OPTION_SEP_CHAR) { - ++inName; - } - - /* inName is behind an option separator */ - if(uprv_strncmp(inName, "locale=", 7)==0) { - /* do not modify locale itself in case we have multiple locale options */ - char *dest=pPieces->locale; - - /* copy the locale option value */ - inName+=7; - len=0; - while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { - ++inName; - - if(++len>=ULOC_FULLNAME_CAPACITY) { - *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ - pPieces->locale[0]=0; - return; - } - - *dest++=c; - } - *dest=0; - } else if(uprv_strncmp(inName, "version=", 8)==0) { - /* copy the version option value into bits 3..0 of pPieces->options */ - inName+=8; - c=*inName; - if(c==0) { - pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); - return; - } else if((uint8_t)(c-'0')<10) { - pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); - ++inName; - } - } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { - inName+=8; - pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); - /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ - } else { - /* ignore any other options until we define some */ - while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { - } - if(c==0) { - return; - } - } - } -} - -/*Logic determines if the converter is Algorithmic AND/OR cached - *depending on that: - * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) - * -Get it from a Hashtable (Data=X, Cached=TRUE) - * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) - * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) - */ -U_CFUNC UConverterSharedData * -ucnv_loadSharedData(const char *converterName, - UConverterNamePieces *pPieces, - UConverterLoadArgs *pArgs, - UErrorCode * err) { - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs; - UConverterSharedData *mySharedConverterData = NULL; - UErrorCode internalErrorCode = U_ZERO_ERROR; - UBool mayContainOption = TRUE; - UBool checkForAlgorithmic = TRUE; - - if (U_FAILURE (*err)) { - return NULL; - } - - if(pPieces == NULL) { - if(pArgs != NULL) { - /* - * Bad: We may set pArgs pointers to stackPieces fields - * which will be invalid after this function returns. - */ - *err = U_INTERNAL_PROGRAM_ERROR; - return NULL; - } - pPieces = &stackPieces; - } - if(pArgs == NULL) { - uprv_memset(&stackArgs, 0, sizeof(stackArgs)); - stackArgs.size = (int32_t)sizeof(stackArgs); - pArgs = &stackArgs; - } - - pPieces->cnvName[0] = 0; - pPieces->locale[0] = 0; - pPieces->options = 0; - - pArgs->name = converterName; - pArgs->locale = pPieces->locale; - pArgs->options = pPieces->options; - - /* In case "name" is NULL we want to open the default converter. */ - if (converterName == NULL) { -#if U_CHARSET_IS_UTF8 - pArgs->name = "UTF-8"; - return (UConverterSharedData *)converterData[UCNV_UTF8]; -#else - /* Call ucnv_getDefaultName first to query the name from the OS. */ - pArgs->name = ucnv_getDefaultName(); - if (pArgs->name == NULL) { - *err = U_MISSING_RESOURCE_ERROR; - return NULL; - } - mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; - checkForAlgorithmic = FALSE; - mayContainOption = gDefaultConverterContainsOption; - /* the default converter name is already canonical */ -#endif - } - else if(UCNV_FAST_IS_UTF8(converterName)) { - /* fastpath for UTF-8 */ - pArgs->name = "UTF-8"; - return (UConverterSharedData *)converterData[UCNV_UTF8]; - } - else { - /* separate the converter name from the options */ - parseConverterOptions(converterName, pPieces, pArgs, err); - if (U_FAILURE(*err)) { - /* Very bad name used. */ - return NULL; - } - - /* get the canonical converter name */ - pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); - if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { - /* - * set the input name in case the converter was added - * without updating the alias table, or when there is no alias table - */ - pArgs->name = pPieces->cnvName; - } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { - *err = U_AMBIGUOUS_ALIAS_WARNING; - } - } - - /* separate the converter name from the options */ - if(mayContainOption && pArgs->name != pPieces->cnvName) { - parseConverterOptions(pArgs->name, pPieces, pArgs, err); - } - - /* get the shared data for an algorithmic converter, if it is one */ - if (checkForAlgorithmic) { - mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); - } - if (mySharedConverterData == NULL) - { - /* it is a data-based converter, get its shared data. */ - /* Hold the cnvCacheMutex through the whole process of checking the */ - /* converter data cache, and adding new entries to the cache */ - /* to prevent other threads from modifying the cache during the */ - /* process. */ - pArgs->nestedLoads=1; - pArgs->pkg=NULL; - - umtx_lock(&cnvCacheMutex); - mySharedConverterData = ucnv_load(pArgs, err); - umtx_unlock(&cnvCacheMutex); - if (U_FAILURE (*err) || (mySharedConverterData == NULL)) - { - return NULL; - } - } - - return mySharedConverterData; -} - -U_CAPI UConverter * -ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) -{ - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; - UConverterSharedData *mySharedConverterData; - - UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); - - if(U_SUCCESS(*err)) { - UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); - - mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); - - myUConverter = ucnv_createConverterFromSharedData( - myUConverter, mySharedConverterData, - &stackArgs, - err); - - if(U_SUCCESS(*err)) { - UTRACE_EXIT_PTR_STATUS(myUConverter, *err); - return myUConverter; - } - } - - /* exit with error */ - UTRACE_EXIT_STATUS(*err); - return NULL; -} - -U_CFUNC UBool -ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { - UConverter myUConverter; - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; - UConverterSharedData *mySharedConverterData; - - UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); - - if(U_SUCCESS(*err)) { - UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); - - stackArgs.onlyTestIsLoadable=TRUE; - mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); - ucnv_createConverterFromSharedData( - &myUConverter, mySharedConverterData, - &stackArgs, - err); - ucnv_unloadSharedDataIfReady(mySharedConverterData); - } - - UTRACE_EXIT_STATUS(*err); - return U_SUCCESS(*err); -} - -UConverter * -ucnv_createAlgorithmicConverter(UConverter *myUConverter, - UConverterType type, - const char *locale, uint32_t options, - UErrorCode *err) { - UConverter *cnv; - const UConverterSharedData *sharedData; - UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; - - UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); - UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); - - if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); - return NULL; - } - - sharedData = converterData[type]; - if(sharedData == NULL || sharedData->isReferenceCounted) { - /* not a valid type, or not an algorithmic converter */ - *err = U_ILLEGAL_ARGUMENT_ERROR; - UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); - return NULL; - } - - stackArgs.name = ""; - stackArgs.options = options; - stackArgs.locale=locale; - cnv = ucnv_createConverterFromSharedData( - myUConverter, (UConverterSharedData *)sharedData, - &stackArgs, err); - - UTRACE_EXIT_PTR_STATUS(cnv, *err); - return cnv; -} - -U_CFUNC UConverter* -ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) -{ - UConverter *myUConverter; - UConverterSharedData *mySharedConverterData; - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; - - UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); - - if(U_FAILURE(*err)) { - UTRACE_EXIT_STATUS(*err); - return NULL; - } - - UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); - - /* first, get the options out of the converterName string */ - stackPieces.cnvName[0] = 0; - stackPieces.locale[0] = 0; - stackPieces.options = 0; - parseConverterOptions(converterName, &stackPieces, &stackArgs, err); - if (U_FAILURE(*err)) { - /* Very bad name used. */ - UTRACE_EXIT_STATUS(*err); - return NULL; - } - stackArgs.nestedLoads=1; - stackArgs.pkg=packageName; - - /* open the data, unflatten the shared structure */ - mySharedConverterData = createConverterFromFile(&stackArgs, err); - - if (U_FAILURE(*err)) { - UTRACE_EXIT_STATUS(*err); - return NULL; - } - - /* create the actual converter */ - myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); - - if (U_FAILURE(*err)) { - ucnv_close(myUConverter); - UTRACE_EXIT_STATUS(*err); - return NULL; - } - - UTRACE_EXIT_PTR_STATUS(myUConverter, *err); - return myUConverter; -} - - -U_CFUNC UConverter* -ucnv_createConverterFromSharedData(UConverter *myUConverter, - UConverterSharedData *mySharedConverterData, - UConverterLoadArgs *pArgs, - UErrorCode *err) -{ - UBool isCopyLocal; - - if(U_FAILURE(*err)) { - ucnv_unloadSharedDataIfReady(mySharedConverterData); - return myUConverter; - } - if(myUConverter == NULL) - { - myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); - if(myUConverter == NULL) - { - *err = U_MEMORY_ALLOCATION_ERROR; - ucnv_unloadSharedDataIfReady(mySharedConverterData); - return NULL; - } - isCopyLocal = FALSE; - } else { - isCopyLocal = TRUE; - } - - /* initialize the converter */ - uprv_memset(myUConverter, 0, sizeof(UConverter)); - myUConverter->isCopyLocal = isCopyLocal; - /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ - myUConverter->sharedData = mySharedConverterData; - myUConverter->options = pArgs->options; - if(!pArgs->onlyTestIsLoadable) { - myUConverter->preFromUFirstCP = U_SENTINEL; - myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; - myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; - myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; - myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; - myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; - myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; - myUConverter->subChars = (uint8_t *)myUConverter->subUChars; - uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); - myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ - } - - if(mySharedConverterData->impl->open != NULL) { - mySharedConverterData->impl->open(myUConverter, pArgs, err); - if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { - /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ - ucnv_close(myUConverter); - return NULL; - } - } - - return myUConverter; -} - -/*Frees all shared immutable objects that aren't referred to (reference count = 0) - */ -U_CAPI int32_t U_EXPORT2 -ucnv_flushCache () -{ - UConverterSharedData *mySharedData = NULL; - int32_t pos; - int32_t tableDeletedNum = 0; - const UHashElement *e; - /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ - int32_t i, remaining; - - UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); - - /* Close the default converter without creating a new one so that everything will be flushed. */ - u_flushDefaultConverter(); - - /*if shared data hasn't even been lazy evaluated yet - * return 0 - */ - if (SHARED_DATA_HASHTABLE == NULL) { - UTRACE_EXIT_VALUE((int32_t)0); - return 0; - } - - /*creates an enumeration to iterate through every element in the - * table - * - * Synchronization: holding cnvCacheMutex will prevent any other thread from - * accessing or modifying the hash table during the iteration. - * The reference count of an entry may be decremented by - * ucnv_close while the iteration is in process, but this is - * benign. It can't be incremented (in ucnv_createConverter()) - * because the sequence of looking up in the cache + incrementing - * is protected by cnvCacheMutex. - */ - umtx_lock(&cnvCacheMutex); - /* - * double loop: A delta/extension-only converter has a pointer to its base table's - * shared data; the first iteration of the outer loop may see the delta converter - * before the base converter, and unloading the delta converter may get the base - * converter's reference counter down to 0. - */ - i = 0; - do { - remaining = 0; - pos = UHASH_FIRST; - while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) - { - mySharedData = (UConverterSharedData *) e->value.pointer; - /*deletes only if reference counter == 0 */ - if (mySharedData->referenceCounter == 0) - { - tableDeletedNum++; - - UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); - - uhash_removeElement(SHARED_DATA_HASHTABLE, e); - mySharedData->sharedDataCached = FALSE; - ucnv_deleteSharedConverterData (mySharedData); - } else { - ++remaining; - } - } - } while(++i == 1 && remaining > 0); - umtx_unlock(&cnvCacheMutex); - - UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); - - UTRACE_EXIT_VALUE(tableDeletedNum); - return tableDeletedNum; -} - -/* available converters list --------------------------------------------------- */ - -static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { - U_ASSERT(gAvailableConverterCount == 0); - U_ASSERT(gAvailableConverters == NULL); - - ucnv_enableCleanup(); - UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); - int32_t allConverterCount = uenum_count(allConvEnum, &errCode); - if (U_FAILURE(errCode)) { - return; - } - - /* We can't have more than "*converterTable" converters to open */ - gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); - if (!gAvailableConverters) { - errCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - - /* Open the default converter to make sure that it has first dibs in the hash table. */ - UErrorCode localStatus = U_ZERO_ERROR; - UConverter tempConverter; - ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); - - gAvailableConverterCount = 0; - - for (int32_t idx = 0; idx < allConverterCount; idx++) { - localStatus = U_ZERO_ERROR; - const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); - if (ucnv_canCreateConverter(converterName, &localStatus)) { - gAvailableConverters[gAvailableConverterCount++] = converterName; - } - } - - uenum_close(allConvEnum); -} - - -static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { - umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); - return U_SUCCESS(*pErrorCode); -} - -U_CFUNC uint16_t -ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { - if (haveAvailableConverterList(pErrorCode)) { - return gAvailableConverterCount; - } - return 0; -} - -U_CFUNC const char * -ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { - if (haveAvailableConverterList(pErrorCode)) { - if (n < gAvailableConverterCount) { - return gAvailableConverters[n]; - } - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; - } - return NULL; -} - -/* default converter name --------------------------------------------------- */ - -#if !U_CHARSET_IS_UTF8 -/* -Copy the canonical converter name. -ucnv_getDefaultName must be thread safe, which can call this function. - -ucnv_setDefaultName calls this function and it doesn't have to be -thread safe because there is no reliable/safe way to reset the -converter in use in all threads. If you did reset the converter, you -would not be sure that retrieving a default converter for one string -would be the same type of default converter for a successive string. -Since the name is a returned via ucnv_getDefaultName without copying, -you shouldn't be modifying or deleting the string from a separate thread. -*/ -static inline void -internalSetName(const char *name, UErrorCode *status) { - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; - int32_t length=(int32_t)(uprv_strlen(name)); - UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); - const UConverterSharedData *algorithmicSharedData; - - stackArgs.name = name; - if(containsOption) { - stackPieces.cnvName[0] = 0; - stackPieces.locale[0] = 0; - stackPieces.options = 0; - parseConverterOptions(name, &stackPieces, &stackArgs, status); - if(U_FAILURE(*status)) { - return; - } - } - algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); - - umtx_lock(&cnvCacheMutex); - - gDefaultAlgorithmicSharedData = algorithmicSharedData; - gDefaultConverterContainsOption = containsOption; - uprv_memcpy(gDefaultConverterNameBuffer, name, length); - gDefaultConverterNameBuffer[length]=0; - - /* gDefaultConverterName MUST be the last global var set by this function. */ - /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ - // But there is nothing here preventing that from being reordered, either by the compiler - // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. - // -- Andy - gDefaultConverterName = gDefaultConverterNameBuffer; - - ucnv_enableCleanup(); - - umtx_unlock(&cnvCacheMutex); -} -#endif - -/* - * In order to be really thread-safe, the get function would have to take - * a buffer parameter and copy the current string inside a mutex block. - * This implementation only tries to be really thread-safe while - * setting the name. - * It assumes that setting a pointer is atomic. - */ - -U_CAPI const char* U_EXPORT2 -ucnv_getDefaultName() { -#if U_CHARSET_IS_UTF8 - return "UTF-8"; -#else - /* local variable to be thread-safe */ - const char *name; - - /* - Concurrent calls to ucnv_getDefaultName must be thread safe, - but ucnv_setDefaultName is not thread safe. - */ - { - icu::Mutex lock(&cnvCacheMutex); - name = gDefaultConverterName; - } - if(name==NULL) { - UErrorCode errorCode = U_ZERO_ERROR; - UConverter *cnv = NULL; - - name = uprv_getDefaultCodepage(); - - /* if the name is there, test it out and get the canonical name with options */ - if(name != NULL) { - cnv = ucnv_open(name, &errorCode); - if(U_SUCCESS(errorCode) && cnv != NULL) { - name = ucnv_getName(cnv, &errorCode); - } - } - - if(name == NULL || name[0] == 0 - || U_FAILURE(errorCode) || cnv == NULL - || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) - { - /* Panic time, let's use a fallback. */ -#if (U_CHARSET_FAMILY == U_ASCII_FAMILY) - name = "US-ASCII"; - /* there is no 'algorithmic' converter for EBCDIC */ -#elif U_PLATFORM == U_PF_OS390 - name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; -#else - name = "ibm-37_P100-1995"; -#endif - } - - internalSetName(name, &errorCode); - - /* The close may make the current name go away. */ - ucnv_close(cnv); - } - - return name; -#endif -} - -#if U_CHARSET_IS_UTF8 -U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} -#else -/* -This function is not thread safe, and it can't be thread safe. -See internalSetName or the API reference for details. -*/ -U_CAPI void U_EXPORT2 -ucnv_setDefaultName(const char *converterName) { - if(converterName==NULL) { - /* reset to the default codepage */ - gDefaultConverterName=NULL; - } else { - UErrorCode errorCode = U_ZERO_ERROR; - UConverter *cnv = NULL; - const char *name = NULL; - - /* if the name is there, test it out and get the canonical name with options */ - cnv = ucnv_open(converterName, &errorCode); - if(U_SUCCESS(errorCode) && cnv != NULL) { - name = ucnv_getName(cnv, &errorCode); - } - - if(U_SUCCESS(errorCode) && name!=NULL) { - internalSetName(name, &errorCode); - } - /* else this converter is bad to use. Don't change it to a bad value. */ - - /* The close may make the current name go away. */ - ucnv_close(cnv); - - /* reset the converter cache */ - u_flushDefaultConverter(); - } -} -#endif - -/* data swapping ------------------------------------------------------------ */ - -/* most of this might belong more properly into ucnvmbcs.c, but that is so large */ - -#if !UCONFIG_NO_LEGACY_CONVERSION - -U_CAPI int32_t U_EXPORT2 -ucnv_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - uint32_t offset, count, staticDataSize; - int32_t size; - - const UConverterStaticData *inStaticData; - UConverterStaticData *outStaticData; - - const _MBCSHeader *inMBCSHeader; - _MBCSHeader *outMBCSHeader; - _MBCSHeader mbcsHeader; - uint32_t mbcsHeaderLength; - UBool noFromU=FALSE; - - uint8_t outputType; - - int32_t maxFastUChar, mbcsIndexLength; - - const int32_t *inExtIndexes; - int32_t extOffset; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ - pInfo->dataFormat[1]==0x6e && - pInfo->dataFormat[2]==0x76 && - pInfo->dataFormat[3]==0x74 && - pInfo->formatVersion[0]==6 && - pInfo->formatVersion[1]>=2 - )) { - udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0], pInfo->formatVersion[1]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - /* read the initial UConverterStaticData structure after the UDataInfo header */ - inStaticData=(const UConverterStaticData *)inBytes; - outStaticData=(UConverterStaticData *)outBytes; - - if(length<0) { - staticDataSize=ds->readUInt32(inStaticData->structSize); - } else { - length-=headerSize; - if( length<(int32_t)sizeof(UConverterStaticData) || - (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) - ) { - udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - if(length>=0) { - /* swap the static data */ - if(inStaticData!=outStaticData) { - uprv_memcpy(outStaticData, inStaticData, staticDataSize); - } - - ds->swapArray32(ds, &inStaticData->structSize, 4, - &outStaticData->structSize, pErrorCode); - ds->swapArray32(ds, &inStaticData->codepage, 4, - &outStaticData->codepage, pErrorCode); - - ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), - outStaticData->name, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); - return 0; - } - } - - inBytes+=staticDataSize; - outBytes+=staticDataSize; - if(length>=0) { - length-=(int32_t)staticDataSize; - } - - /* check for supported conversionType values */ - if(inStaticData->conversionType==UCNV_MBCS) { - /* swap MBCS data */ - inMBCSHeader=(const _MBCSHeader *)inBytes; - outMBCSHeader=(_MBCSHeader *)outBytes; - - if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { - udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { - mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; - } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && - ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& - MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 - ) { - mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; - noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); - } else { - udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", - inMBCSHeader->version[0], inMBCSHeader->version[1]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); - mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); - mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); - mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); - mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); - mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); - mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); - mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); - /* mbcsHeader.options have been read above */ - - extOffset=(int32_t)(mbcsHeader.flags>>8); - outputType=(uint8_t)mbcsHeader.flags; - if(noFromU && outputType==MBCS_OUTPUT_1) { - udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - /* make sure that the output type is known */ - switch(outputType) { - case MBCS_OUTPUT_1: - case MBCS_OUTPUT_2: - case MBCS_OUTPUT_3: - case MBCS_OUTPUT_4: - case MBCS_OUTPUT_3_EUC: - case MBCS_OUTPUT_4_EUC: - case MBCS_OUTPUT_2_SISO: - case MBCS_OUTPUT_EXT_ONLY: - /* OK */ - break; - default: - udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", - outputType); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - /* calculate the length of the MBCS data */ - - /* - * utf8Friendly MBCS files (mbcsHeader.version 4.3) - * contain an additional mbcsIndex table: - * uint16_t[(maxFastUChar+1)>>6]; - * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). - */ - maxFastUChar=0; - mbcsIndexLength=0; - if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && - mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 - ) { - maxFastUChar=(maxFastUChar<<8)|0xff; - mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ - } - - if(extOffset==0) { - size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); - if(!noFromU) { - size+=(int32_t)mbcsHeader.fromUBytesLength; - } - - /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ - inExtIndexes=NULL; - } else { - /* there is extension data after the base data, see ucnv_ext.h */ - if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { - udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - inExtIndexes=(const int32_t *)(inBytes+extOffset); - size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); - } - - if(length>=0) { - if(lengthswapArray32(ds, &inMBCSHeader->countStates, count-4, - &outMBCSHeader->countStates, pErrorCode); - - if(outputType==MBCS_OUTPUT_EXT_ONLY) { - /* - * extension-only file, - * contains a base name instead of normal base table data - */ - - /* swap the base name, between the header and the extension data */ - const char *inBaseName=(const char *)inBytes+count; - char *outBaseName=(char *)outBytes+count; - ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), - outBaseName, pErrorCode); - } else { - /* normal file with base table data */ - - /* swap the state table, 1kB per state */ - offset=count; - count=mbcsHeader.countStates*1024; - ds->swapArray32(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - - /* swap the toUFallbacks[] */ - offset+=count; - count=mbcsHeader.countToUFallbacks*8; - ds->swapArray32(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - - /* swap the unicodeCodeUnits[] */ - offset=mbcsHeader.offsetToUCodeUnits; - count=mbcsHeader.offsetFromUTable-offset; - ds->swapArray16(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - - /* offset to the stage 1 table, independent of the outputType */ - offset=mbcsHeader.offsetFromUTable; - - if(outputType==MBCS_OUTPUT_1) { - /* SBCS: swap the fromU tables, all 16 bits wide */ - count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; - ds->swapArray16(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - } else { - /* otherwise: swap the stage tables separately */ - - /* stage 1 table: uint16_t[0x440 or 0x40] */ - if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { - count=0x440*2; /* for all of Unicode */ - } else { - count=0x40*2; /* only BMP */ - } - ds->swapArray16(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - - /* stage 2 table: uint32_t[] */ - offset+=count; - count=mbcsHeader.offsetFromUBytes-offset; - ds->swapArray32(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - - /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ - offset=mbcsHeader.offsetFromUBytes; - count= noFromU ? 0 : mbcsHeader.fromUBytesLength; - switch(outputType) { - case MBCS_OUTPUT_2: - case MBCS_OUTPUT_3_EUC: - case MBCS_OUTPUT_2_SISO: - ds->swapArray16(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - break; - case MBCS_OUTPUT_4: - ds->swapArray32(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - break; - default: - /* just uint8_t[], nothing to swap */ - break; - } - - if(mbcsIndexLength!=0) { - offset+=count; - count=mbcsIndexLength; - ds->swapArray16(ds, inBytes+offset, (int32_t)count, - outBytes+offset, pErrorCode); - } - } - } - - if(extOffset!=0) { - /* swap the extension data */ - inBytes+=extOffset; - outBytes+=extOffset; - - /* swap toUTable[] */ - offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); - length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); - ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); - - /* swap toUUChars[] */ - offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); - length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); - ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); - - /* swap fromUTableUChars[] */ - offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); - length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); - ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); - - /* swap fromUTableValues[] */ - offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); - /* same length as for fromUTableUChars[] */ - ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); - - /* no need to swap fromUBytes[] */ - - /* swap fromUStage12[] */ - offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); - length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); - ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); - - /* swap fromUStage3[] */ - offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); - length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); - ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); - - /* swap fromUStage3b[] */ - offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); - length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); - ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); - - /* swap indexes[] */ - length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); - ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); - } - } - } else { - udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", - inStaticData->conversionType); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - return headerSize+(int32_t)staticDataSize+size; -} - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_bld.h b/deps/node/deps/icu-small/source/common/ucnv_bld.h deleted file mode 100644 index caa263f5..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_bld.h +++ /dev/null @@ -1,296 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2015 International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* -* ucnv_bld.h: -* Contains internal data structure definitions -* Created by Bertrand A. Damiba -* -* Change history: -* -* 06/29/2000 helena Major rewrite of the callback APIs. -*/ - -#ifndef UCNV_BLD_H -#define UCNV_BLD_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" -#include "unicode/utf16.h" -#include "ucnv_cnv.h" -#include "ucnvmbcs.h" -#include "ucnv_ext.h" -#include "udataswp.h" - -/* size of the overflow buffers in UConverter, enough for escaping callbacks */ -#define UCNV_ERROR_BUFFER_LENGTH 32 - -/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */ -#define UCNV_MAX_SUBCHAR_LEN 4 - -/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */ -#define UCNV_MAX_CHAR_LEN 8 - -/* converter options bits */ -#define UCNV_OPTION_VERSION 0xf -#define UCNV_OPTION_SWAP_LFNL 0x10 - -#define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION) - -U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv - itself is compiled under C++, the linkage of the funcptrs will - work. - */ - -union UConverterTable { - UConverterMBCSTable mbcs; -}; - -typedef union UConverterTable UConverterTable; - -struct UConverterImpl; -typedef struct UConverterImpl UConverterImpl; - -/** values for the unicodeMask */ -#define UCNV_HAS_SUPPLEMENTARY 1 -#define UCNV_HAS_SURROGATES 2 - -typedef struct UConverterStaticData { /* +offset: size */ - uint32_t structSize; /* +0: 4 Size of this structure */ - - char name - [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */ - - int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */ - - int8_t platform; /* +68: 1 platform of the converter (only IBM now) */ - int8_t conversionType; /* +69: 1 conversion type */ - - int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */ - int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */ - - uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */ - int8_t subCharLen; /* +76: 1 */ - - uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ - uint8_t hasFromUnicodeFallback; /* +78: 1 */ - uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ - uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ - uint8_t reserved[19]; /* +81: 19 to round out the structure */ - /* total size: 100 */ -} UConverterStaticData; - -/* - * Defines the UConverterSharedData struct, - * the immutable, shared part of UConverter. - */ -struct UConverterSharedData { - uint32_t structSize; /* Size of this structure */ - uint32_t referenceCounter; /* used to count number of clients, unused for static/immutable SharedData */ - - const void *dataMemory; /* from udata_openChoice() - for cleanup */ - - const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */ - - UBool sharedDataCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ - /** If FALSE, then referenceCounter is not used. Must not change after initialization. */ - UBool isReferenceCounted; - - const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ - - /*initial values of some members of the mutable part of object */ - uint32_t toUnicodeStatus; - - /* - * Shared data structures currently come in two flavors: - * - readonly for built-in algorithmic converters - * - allocated for MBCS, with a pointer to an allocated UConverterTable - * which always has a UConverterMBCSTable - * - * To eliminate one allocation, I am making the UConverterMBCSTable - * a member of the shared data. - * - * markus 2003-nov-07 - */ - UConverterMBCSTable mbcs; -}; - -/** UConverterSharedData initializer for static, non-reference-counted converters. */ -#define UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(pStaticData, pImpl) \ - { \ - sizeof(UConverterSharedData), ~((uint32_t)0), \ - NULL, pStaticData, FALSE, FALSE, pImpl, \ - 0, UCNV_MBCS_TABLE_INITIALIZER \ - } - -/* Defines a UConverter, the lightweight mutable part the user sees */ - -struct UConverter { - /* - * Error function pointer called when conversion issues - * occur during a ucnv_fromUnicode call - */ - void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context, - UConverterFromUnicodeArgs *args, - const UChar *codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode *); - /* - * Error function pointer called when conversion issues - * occur during a ucnv_toUnicode call - */ - void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context, - UConverterToUnicodeArgs *args, - const char *codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode *); - - /* - * Pointer to additional data that depends on the converter type. - * Used by ISO 2022, SCSU, GB 18030 converters, possibly more. - */ - void *extraInfo; - - const void *fromUContext; - const void *toUContext; - - /* - * Pointer to charset bytes for substitution string if subCharLen>0, - * or pointer to Unicode string (UChar *) if subCharLen<0. - * subCharLen==0 is equivalent to using a skip callback. - * If the pointer is !=subUChars then it is allocated with - * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes. - * The subUChars field is declared as UChar[] not uint8_t[] to - * guarantee alignment for UChars. - */ - uint8_t *subChars; - - UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */ - - uint32_t options; /* options flags from UConverterOpen, may contain additional bits */ - - UBool sharedDataIsCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ - UBool isCopyLocal; /* TRUE if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ - UBool isExtraLocal; /* TRUE if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ - - UBool useFallback; - int8_t toULength; /* number of bytes in toUBytes */ - uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */ - uint32_t toUnicodeStatus; /* Used to internalize stream status information */ - int32_t mode; - uint32_t fromUnicodeStatus; - - /* - * More fromUnicode() status. Serves 3 purposes: - * - keeps a lead surrogate between buffers (similar to toUBytes[]) - * - keeps a lead surrogate at the end of the stream, - * which the framework handles as truncated input - * - if the fromUnicode() implementation returns to the framework - * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback - * for this code point - */ - UChar32 fromUChar32; - - /* - * value for ucnv_getMaxCharSize() - * - * usually simply copied from the static data, but ucnvmbcs.c modifies - * the value depending on the converter type and options - */ - int8_t maxBytesPerUChar; - - int8_t subCharLen; /* length of the codepage specific character sequence */ - int8_t invalidCharLength; - int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */ - - int8_t invalidUCharLength; - int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */ - - uint8_t subChar1; /* single-byte substitution character if different from subChar */ - UBool useSubChar1; - char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */ - uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */ - UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */ - - UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */ - UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */ - - /* fields for conversion extension */ - - /* store previous UChars/chars to continue partial matches */ - UChar32 preFromUFirstCP; /* >=0: partial match */ - UChar preFromU[UCNV_EXT_MAX_UCHARS]; - char preToU[UCNV_EXT_MAX_BYTES]; - int8_t preFromULength, preToULength; /* negative: replay */ - int8_t preToUFirstLength; /* length of first character */ - - /* new fields for ICU 4.0 */ - UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */ -}; - -U_CDECL_END /* end of UConverter */ - -#define CONVERTER_FILE_EXTENSION ".cnv" - - -/** - * Return the number of all converter names. - * @param pErrorCode The error code - * @return the number of all converter names - */ -U_CFUNC uint16_t -ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode); - -/** - * Return the (n)th converter name in mixed case, or NULL - * if there is none (typically, if the data cannot be loaded). - * 0<=indexisReferenceCounted - * and this function must be called inside umtx_lock(&cnvCacheMutex). - */ -U_CAPI void -ucnv_unload(UConverterSharedData *sharedData); - -/** - * Swap ICU .cnv conversion tables. See udataswp.h. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ucnv_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -U_CAPI void U_EXPORT2 -ucnv_enableCleanup(); - -#endif - -#endif /* _UCNV_BLD */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_cb.cpp b/deps/node/deps/icu-small/source/common/ucnv_cb.cpp deleted file mode 100644 index 1bb00120..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_cb.cpp +++ /dev/null @@ -1,261 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2006, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** - * ucnv_cb.c: - * External APIs for the ICU's codeset conversion library - * Helena Shih - * - * Modification History: - * - * Date Name Description - * 7/28/2000 srl Implementation - */ - -/** - * @name Character Conversion C API - * - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv_cb.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -/* need to update the offsets when the target moves. */ -/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly -if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within -the same call stack if the complexity arises. */ -U_CAPI void U_EXPORT2 -ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args, - const char* source, - int32_t length, - int32_t offsetIndex, - UErrorCode * err) -{ - if(U_FAILURE(*err)) { - return; - } - - ucnv_fromUWriteBytes( - args->converter, - source, length, - &args->target, args->targetLimit, - &args->offsets, offsetIndex, - err); -} - -U_CAPI void U_EXPORT2 -ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args, - const UChar** source, - const UChar* sourceLimit, - int32_t offsetIndex, - UErrorCode * err) -{ - /* - This is a fun one. Recursion can occur - we're basically going to - just retry shoving data through the same converter. Note, if you got - here through some kind of invalid sequence, you maybe should emit a - reset sequence of some kind and/or call ucnv_reset(). Since this - IS an actual conversion, take care that you've changed the callback - or the data, or you'll get an infinite loop. - - Please set the err value to something reasonable before calling - into this. - */ - - char *oldTarget; - - if(U_FAILURE(*err)) - { - return; - } - - oldTarget = args->target; - - ucnv_fromUnicode(args->converter, - &args->target, - args->targetLimit, - source, - sourceLimit, - NULL, /* no offsets */ - FALSE, /* no flush */ - err); - - if(args->offsets) - { - while (args->target != oldTarget) /* if it moved at all.. */ - { - *(args->offsets)++ = offsetIndex; - oldTarget++; - } - } - - /* - Note, if you did something like used a Stop subcallback, things would get interesting. - In fact, here's where we want to return the partially consumed in-source! - */ - if(*err == U_BUFFER_OVERFLOW_ERROR) - /* && (*source < sourceLimit && args->target >= args->targetLimit) - -- S. Hrcek */ - { - /* Overflowed the target. Now, we'll write into the charErrorBuffer. - It's a fixed size. If we overflow it... Hmm */ - char *newTarget; - const char *newTargetLimit; - UErrorCode err2 = U_ZERO_ERROR; - - int8_t errBuffLen; - - errBuffLen = args->converter->charErrorBufferLength; - - /* start the new target at the first free slot in the errbuff.. */ - newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen); - - newTargetLimit = (char *)(args->converter->charErrorBuffer + - sizeof(args->converter->charErrorBuffer)); - - if(newTarget >= newTargetLimit) - { - *err = U_INTERNAL_PROGRAM_ERROR; - return; - } - - /* We're going to tell the converter that the errbuff len is empty. - This prevents the existing errbuff from being 'flushed' out onto - itself. If the errbuff is needed by the converter this time, - we're hosed - we're out of space! */ - - args->converter->charErrorBufferLength = 0; - - ucnv_fromUnicode(args->converter, - &newTarget, - newTargetLimit, - source, - sourceLimit, - NULL, - FALSE, - &err2); - - /* We can go ahead and overwrite the length here. We know just how - to recalculate it. */ - - args->converter->charErrorBufferLength = (int8_t)( - newTarget - (char*)args->converter->charErrorBuffer); - - if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR)) - { - /* now we're REALLY in trouble. - Internal program error - callback shouldn't have written this much - data! - */ - *err = U_INTERNAL_PROGRAM_ERROR; - return; - } - /*else {*/ - /* sub errs could be invalid/truncated/illegal chars or w/e. - These might want to be passed on up.. But the problem is, we already - need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these - other errs.. */ - - /* - if(U_FAILURE(err2)) - ?? - */ - /*}*/ - } -} - -U_CAPI void U_EXPORT2 -ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, - int32_t offsetIndex, - UErrorCode * err) -{ - UConverter *converter; - int32_t length; - - if(U_FAILURE(*err)) { - return; - } - converter = args->converter; - length = converter->subCharLen; - - if(length == 0) { - return; - } - - if(length < 0) { - /* - * Write/convert the substitution string. Its real length is -length. - * Unlike the escape callback, we need not change the converter's - * callback function because ucnv_setSubstString() verified that - * the string can be converted, so we will not get a conversion error - * and will not recurse. - * At worst we should get a U_BUFFER_OVERFLOW_ERROR. - */ - const UChar *source = (const UChar *)converter->subChars; - ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err); - return; - } - - if(converter->sharedData->impl->writeSub!=NULL) { - converter->sharedData->impl->writeSub(args, offsetIndex, err); - } - else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) { - /* - TODO: Is this untestable because the MBCS converter has a writeSub function to call - and the other converters don't use subChar1? - */ - ucnv_cbFromUWriteBytes(args, - (const char *)&converter->subChar1, 1, - offsetIndex, err); - } - else { - ucnv_cbFromUWriteBytes(args, - (const char *)converter->subChars, length, - offsetIndex, err); - } -} - -U_CAPI void U_EXPORT2 -ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args, - const UChar* source, - int32_t length, - int32_t offsetIndex, - UErrorCode * err) -{ - if(U_FAILURE(*err)) { - return; - } - - ucnv_toUWriteUChars( - args->converter, - source, length, - &args->target, args->targetLimit, - &args->offsets, offsetIndex, - err); -} - -U_CAPI void U_EXPORT2 -ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args, - int32_t offsetIndex, - UErrorCode * err) -{ - static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD; - - /* could optimize this case, just one uchar */ - if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) { - ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err); - } else { - ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err); - } -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_cnv.cpp b/deps/node/deps/icu-small/source/common/ucnv_cnv.cpp deleted file mode 100644 index ea71acf9..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_cnv.cpp +++ /dev/null @@ -1,182 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2004, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* uconv_cnv.c: -* Implements all the low level conversion functions -* T_UnicodeConverter_{to,from}Unicode_$ConversionType -* -* Change history: -* -* 06/29/2000 helena Major rewrite of the callback APIs. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv_err.h" -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "ucnv_cnv.h" -#include "ucnv_bld.h" -#include "cmemory.h" - -U_CFUNC void -ucnv_getCompleteUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - (void)cnv; - (void)which; - (void)pErrorCode; - sa->addRange(sa->set, 0, 0x10ffff); -} - -U_CFUNC void -ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - (void)cnv; - (void)which; - (void)pErrorCode; - sa->addRange(sa->set, 0, 0xd7ff); - sa->addRange(sa->set, 0xe000, 0x10ffff); -} - -U_CFUNC void -ucnv_fromUWriteBytes(UConverter *cnv, - const char *bytes, int32_t length, - char **target, const char *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode) { - char *t=*target; - int32_t *o; - - /* write bytes */ - if(offsets==NULL || (o=*offsets)==NULL) { - while(length>0 && t0 && t0) { - if(cnv!=NULL) { - t=(char *)cnv->charErrorBuffer; - cnv->charErrorBufferLength=(int8_t)length; - do { - *t++=(uint8_t)*bytes++; - } while(--length>0); - } - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } -} - -U_CFUNC void -ucnv_toUWriteUChars(UConverter *cnv, - const UChar *uchars, int32_t length, - UChar **target, const UChar *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode) { - UChar *t=*target; - int32_t *o; - - /* write UChars */ - if(offsets==NULL || (o=*offsets)==NULL) { - while(length>0 && t0 && t0) { - if(cnv!=NULL) { - t=cnv->UCharErrorBuffer; - cnv->UCharErrorBufferLength=(int8_t)length; - do { - *t++=*uchars++; - } while(--length>0); - } - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } -} - -U_CFUNC void -ucnv_toUWriteCodePoint(UConverter *cnv, - UChar32 c, - UChar **target, const UChar *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode) { - UChar *t; - int32_t *o; - - t=*target; - - if(t=0) { - if(cnv!=NULL) { - int8_t i=0; - U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c); - cnv->UCharErrorBufferLength=i; - } - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_cnv.h b/deps/node/deps/icu-small/source/common/ucnv_cnv.h deleted file mode 100644 index a996e295..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_cnv.h +++ /dev/null @@ -1,323 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* ucnv_cnv.h: -* Definitions for converter implementations. -* -* Modification History: -* -* Date Name Description -* 05/09/00 helena Added implementation to handle fallback mappings. -* 06/29/2000 helena Major rewrite of the callback APIs. -*/ - -#ifndef UCNV_CNV_H -#define UCNV_CNV_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" -#include "unicode/uset.h" -#include "uset_imp.h" - -U_CDECL_BEGIN - -/* this is used in fromUnicode DBCS tables as an "unassigned" marker */ -#define missingCharMarker 0xFFFF - -/* - * #define missingUCharMarker 0xfffe - * - * commented out because there are actually two values used in toUnicode tables: - * U+fffe "unassigned" - * U+ffff "illegal" - */ - -/** Forward declaration, see ucnv_bld.h */ -struct UConverterSharedData; -typedef struct UConverterSharedData UConverterSharedData; - -/* function types for UConverterImpl ---------------------------------------- */ - -/* struct with arguments for UConverterLoad and ucnv_load() */ -typedef struct { - int32_t size; /* sizeof(UConverterLoadArgs) */ - int32_t nestedLoads; /* count nested ucnv_load() calls */ - UBool onlyTestIsLoadable; /* input: don't actually load */ - UBool reserved0; /* reserved - for good alignment of the pointers */ - int16_t reserved; /* reserved - for good alignment of the pointers */ - uint32_t options; - const char *pkg, *name, *locale; -} UConverterLoadArgs; - -#define UCNV_LOAD_ARGS_INITIALIZER \ - { (int32_t)sizeof(UConverterLoadArgs), 0, FALSE, FALSE, 0, 0, NULL, NULL, NULL } - -typedef void (*UConverterLoad) (UConverterSharedData *sharedData, - UConverterLoadArgs *pArgs, - const uint8_t *raw, UErrorCode *pErrorCode); -typedef void (*UConverterUnload) (UConverterSharedData *sharedData); - -typedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *pErrorCode); -typedef void (*UConverterClose) (UConverter *cnv); - -typedef enum UConverterResetChoice { - UCNV_RESET_BOTH, - UCNV_RESET_TO_UNICODE, - UCNV_RESET_FROM_UNICODE -} UConverterResetChoice; - -typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice); - -/* - * Converter implementation function(s) for ucnv_toUnicode(). - * If the toUnicodeWithOffsets function pointer is NULL, - * then the toUnicode function will be used and the offsets will be set to -1. - * - * Must maintain state across buffers. Use toUBytes[toULength] for partial input - * sequences; it will be checked in ucnv.c at the end of the input stream - * to detect truncated input. - * Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND. - * - * The toUnicodeWithOffsets must write exactly as many offset values as target - * units. Write offset values of -1 for when the source index corresponding to - * the output unit is not known (e.g., the character started in an earlier buffer). - * The pArgs->offsets pointer need not be moved forward. - * - * At function return, either one of the following conditions must be true: - * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit - * - another error code with toUBytes[toULength] set to the offending input - * - no error, and the source is consumed: source==sourceLimit - * - * The ucnv.c code will handle the end of the input (reset) - * (reset, and truncation detection) and callbacks. - */ -typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *); - -/* - * Same rules as for UConverterToUnicode. - * A lead surrogate is kept in fromUChar32 across buffers, and if an error - * occurs, then the offending input code point must be put into fromUChar32 - * as well. - */ -typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *); - -/* - * Converter implementation function for ucnv_convertEx(), for direct conversion - * between two charsets without pivoting through UTF-16. - * The rules are the same as for UConverterToUnicode and UConverterFromUnicode. - * In addition, - * - The toUnicode side must behave and keep state exactly like the - * UConverterToUnicode implementation for the same source charset. - * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back - * to pivoting. When this function is called, the conversion framework makes - * sure that this warning is not set on input. - * - Continuing a partial match and flushing the toUnicode replay buffer - * are handled by pivoting, using the toUnicode and fromUnicode functions. - */ -typedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode); - -/* - * Converter implementation function for ucnv_getNextUChar(). - * If the function pointer is NULL, then the toUnicode function will be used. - * - * Will be called at a character boundary (toULength==0). - * May return with - * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input - * (the return value will be ignored) - * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!) - * with toUBytes[toULength] set to the offending input - * (the return value will be ignored) - * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer, - * to indicate that the ucnv.c code shall call the toUnicode function instead - * - return a real code point result - * - * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed. - * - * The ucnv.c code will handle the end of the input (reset) - * (except for truncation detection!) and callbacks. - */ -typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *); - -typedef void (*UConverterGetStarters)(const UConverter* converter, - UBool starters[256], - UErrorCode *pErrorCode); - -/* If this function pointer is null or if the function returns null - * the name field in static data struct should be returned by - * ucnv_getName() API function - */ -typedef const char * (*UConverterGetName) (const UConverter *cnv); - -/** - * Write the codepage substitution character. - * If this function is not set, then ucnv_cbFromUWriteSub() writes - * the substitution character from UConverter. - * For stateful converters, it is typically necessary to handle this - * specificially for the converter in order to properly maintain the state. - */ -typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode); - -/** - * For converter-specific safeClone processing - * If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes - * after the converter is done opening. - * If this function is set, then it is called just after a memcpy() of - * converter data to the new, empty converter, and is expected to set up - * the initial state of the converter. It is not expected to increment the - * reference counts of the standard data types such as the shared data. - */ -typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status); - -/** - * Filters for some ucnv_getUnicodeSet() implementation code. - */ -typedef enum UConverterSetFilter { - UCNV_SET_FILTER_NONE, - UCNV_SET_FILTER_DBCS_ONLY, - UCNV_SET_FILTER_2022_CN, - UCNV_SET_FILTER_SJIS, - UCNV_SET_FILTER_GR94DBCS, - UCNV_SET_FILTER_HZ, - UCNV_SET_FILTER_COUNT -} UConverterSetFilter; - -/** - * Fills the set of Unicode code points that can be converted by an ICU converter. - * The API function ucnv_getUnicodeSet() clears the USet before calling - * the converter's getUnicodeSet() implementation; the converter should only - * add the appropriate code points to allow recursive use. - * For example, the ISO-2022-JP converter will call each subconverter's - * getUnicodeSet() implementation to consecutively add code points to - * the same USet, which will result in a union of the sets of all subconverters. - * - * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. - */ -typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode); - -UBool CONVERSION_U_SUCCESS (UErrorCode err); - -/** - * UConverterImpl contains all the data and functions for a converter type. - * Its function pointers work much like a C++ vtable. - * Many converter types need to define only a subset of the functions; - * when a function pointer is NULL, then a default action will be performed. - * - * Every converter type must implement toUnicode, fromUnicode, and getNextUChar, - * otherwise the converter may crash. - * Every converter type that has variable-length codepage sequences should - * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for - * correct offset handling. - * All other functions may or may not be implemented - it depends only on - * whether the converter type needs them. - * - * When open() fails, then close() will be called, if present. - */ -struct UConverterImpl { - UConverterType type; - - UConverterLoad load; - UConverterUnload unload; - - UConverterOpen open; - UConverterClose close; - UConverterReset reset; - - UConverterToUnicode toUnicode; - UConverterToUnicode toUnicodeWithOffsets; - UConverterFromUnicode fromUnicode; - UConverterFromUnicode fromUnicodeWithOffsets; - UConverterGetNextUChar getNextUChar; - - UConverterGetStarters getStarters; - UConverterGetName getName; - UConverterWriteSub writeSub; - UConverterSafeClone safeClone; - UConverterGetUnicodeSet getUnicodeSet; - - UConverterConvert toUTF8; - UConverterConvert fromUTF8; -}; - -extern const UConverterSharedData - _MBCSData, _Latin1Data, - _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, - _ISO2022Data, - _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, - _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, - _HZData,_ISCIIData, _SCSUData, _ASCIIData, - _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; - -U_CDECL_END - -/** Always use fallbacks from codepage to Unicode */ -#define TO_U_USE_FALLBACK(useFallback) TRUE -#define UCNV_TO_U_USE_FALLBACK(cnv) TRUE - -/** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */ -#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000) -#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c)) -#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c) - -/** - * Magic number for ucnv_getNextUChar(), returned by a - * getNextUChar() implementation to indicate to use the converter's toUnicode() - * instead of the native function. - * @internal - */ -#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9 - -U_CFUNC void -ucnv_getCompleteUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode); - -U_CFUNC void -ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode); - -U_CFUNC void -ucnv_fromUWriteBytes(UConverter *cnv, - const char *bytes, int32_t length, - char **target, const char *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode); -U_CFUNC void -ucnv_toUWriteUChars(UConverter *cnv, - const UChar *uchars, int32_t length, - UChar **target, const UChar *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode); - -U_CFUNC void -ucnv_toUWriteCodePoint(UConverter *cnv, - UChar32 c, - UChar **target, const UChar *targetLimit, - int32_t **offsets, - int32_t sourceIndex, - UErrorCode *pErrorCode); - -#endif - -#endif /* UCNV_CNV */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_ct.cpp b/deps/node/deps/icu-small/source/common/ucnv_ct.cpp deleted file mode 100644 index b40e1b2c..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_ct.cpp +++ /dev/null @@ -1,646 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2010-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_ct.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010Dec09 -* created by: Michael Ow -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "unicode/ucnv_err.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "ucnv_imp.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "ucnvmbcs.h" -#include "cstring.h" -#include "cmemory.h" - -typedef enum { - INVALID = -2, - DO_SEARCH = -1, - - COMPOUND_TEXT_SINGLE_0 = 0, - COMPOUND_TEXT_SINGLE_1 = 1, - COMPOUND_TEXT_SINGLE_2 = 2, - COMPOUND_TEXT_SINGLE_3 = 3, - - COMPOUND_TEXT_DOUBLE_1 = 4, - COMPOUND_TEXT_DOUBLE_2 = 5, - COMPOUND_TEXT_DOUBLE_3 = 6, - COMPOUND_TEXT_DOUBLE_4 = 7, - COMPOUND_TEXT_DOUBLE_5 = 8, - COMPOUND_TEXT_DOUBLE_6 = 9, - COMPOUND_TEXT_DOUBLE_7 = 10, - - COMPOUND_TEXT_TRIPLE_DOUBLE = 11, - - IBM_915 = 12, - IBM_916 = 13, - IBM_914 = 14, - IBM_874 = 15, - IBM_912 = 16, - IBM_913 = 17, - ISO_8859_14 = 18, - IBM_923 = 19, - NUM_OF_CONVERTERS = 20 -} COMPOUND_TEXT_CONVERTERS; - -#define SEARCH_LENGTH 12 - -static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = { - /* Single */ - { 0x1B, 0x2D, 0x41, 0, 0 }, - { 0x1B, 0x2D, 0x4D, 0, 0 }, - { 0x1B, 0x2D, 0x46, 0, 0 }, - { 0x1B, 0x2D, 0x47, 0, 0 }, - - /* Double */ - { 0x1B, 0x24, 0x29, 0x41, 0 }, - { 0x1B, 0x24, 0x29, 0x42, 0 }, - { 0x1B, 0x24, 0x29, 0x43, 0 }, - { 0x1B, 0x24, 0x29, 0x44, 0 }, - { 0x1B, 0x24, 0x29, 0x47, 0 }, - { 0x1B, 0x24, 0x29, 0x48, 0 }, - { 0x1B, 0x24, 0x29, 0x49, 0 }, - - /* Triple/Double */ - { 0x1B, 0x25, 0x47, 0, 0 }, - - /*IBM-915*/ - { 0x1B, 0x2D, 0x4C, 0, 0 }, - /*IBM-916*/ - { 0x1B, 0x2D, 0x48, 0, 0 }, - /*IBM-914*/ - { 0x1B, 0x2D, 0x44, 0, 0 }, - /*IBM-874*/ - { 0x1B, 0x2D, 0x54, 0, 0 }, - /*IBM-912*/ - { 0x1B, 0x2D, 0x42, 0, 0 }, - /* IBM-913 */ - { 0x1B, 0x2D, 0x43, 0, 0 }, - /* ISO-8859_14 */ - { 0x1B, 0x2D, 0x5F, 0, 0 }, - /* IBM-923 */ - { 0x1B, 0x2D, 0x62, 0, 0 }, -}; - -#define ESC_START 0x1B - -#define isASCIIRange(codepoint) \ - ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \ - (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) - -#define isIBM915(codepoint) \ - ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) - -#define isIBM916(codepoint) \ - ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) - -#define isCompoundS3(codepoint) \ - ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \ - (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \ - (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) - -#define isCompoundS2(codepoint) \ - ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) - -#define isIBM914(codepoint) \ - ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \ - (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \ - (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \ - (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \ - (codepoint == 0x0172) || (codepoint == 0x0173)) - -#define isIBM874(codepoint) \ - ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) - -#define isIBM912(codepoint) \ - ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \ - (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \ - (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \ - (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \ - (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \ - (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) - -#define isIBM913(codepoint) \ - ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \ - (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \ - (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \ - (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D)) - -#define isCompoundS1(codepoint) \ - ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \ - (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) - -#define isISO8859_14(codepoint) \ - ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \ - (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \ - (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \ - (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \ - (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \ - (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) - -#define isIBM923(codepoint) \ - ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) - - -typedef struct{ - UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS]; - COMPOUND_TEXT_CONVERTERS state; -} UConverterDataCompoundText; - -/*********** Compound Text Converter Protos ***********/ -U_CDECL_BEGIN -static void U_CALLCONV -_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); - -static void U_CALLCONV - _CompoundTextClose(UConverter *converter); - -static void U_CALLCONV -_CompoundTextReset(UConverter *converter, UConverterResetChoice choice); - -static const char* U_CALLCONV -_CompoundTextgetName(const UConverter* cnv); - - -static int32_t findNextEsc(const char *source, const char *sourceLimit) { - int32_t length = static_cast(sourceLimit - source); - int32_t i; - for (i = 1; i < length; i++) { - if (*(source + i) == 0x1B) { - return i; - } - } - - return length; -} - -static COMPOUND_TEXT_CONVERTERS getState(int codepoint) { - COMPOUND_TEXT_CONVERTERS state = DO_SEARCH; - - if (isASCIIRange(codepoint)) { - state = COMPOUND_TEXT_SINGLE_0; - } else if (isIBM912(codepoint)) { - state = IBM_912; - }else if (isIBM913(codepoint)) { - state = IBM_913; - } else if (isISO8859_14(codepoint)) { - state = ISO_8859_14; - } else if (isIBM923(codepoint)) { - state = IBM_923; - } else if (isIBM874(codepoint)) { - state = IBM_874; - } else if (isIBM914(codepoint)) { - state = IBM_914; - } else if (isCompoundS2(codepoint)) { - state = COMPOUND_TEXT_SINGLE_2; - } else if (isCompoundS3(codepoint)) { - state = COMPOUND_TEXT_SINGLE_3; - } else if (isIBM916(codepoint)) { - state = IBM_916; - } else if (isIBM915(codepoint)) { - state = IBM_915; - } else if (isCompoundS1(codepoint)) { - state = COMPOUND_TEXT_SINGLE_1; - } - - return state; -} - -static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) { - COMPOUND_TEXT_CONVERTERS state = INVALID; - UBool matchFound = FALSE; - int32_t i, n, offset = toUBytesBufferLength; - - for (i = 0; i < NUM_OF_CONVERTERS; i++) { - matchFound = TRUE; - for (n = 0; escSeqCompoundText[i][n] != 0; n++) { - if (n < toUBytesBufferLength) { - if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) { - matchFound = FALSE; - break; - } - } else if ((source + (n - offset)) >= sourceLimit) { - *err = U_TRUNCATED_CHAR_FOUND; - matchFound = FALSE; - break; - } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) { - matchFound = FALSE; - break; - } - } - - if (matchFound) { - break; - } - } - - if (matchFound) { - state = (COMPOUND_TEXT_CONVERTERS)i; - } - - return state; -} - -static void U_CALLCONV -_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ - cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText)); - if (cnv->extraInfo != NULL) { - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; - - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; - - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL; - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode); - - myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode); - myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode); - - if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { - _CompoundTextClose(cnv); - return; - } - - myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0; - } else { - *errorCode = U_MEMORY_ALLOCATION_ERROR; - } -} - - -static void U_CALLCONV -_CompoundTextClose(UConverter *converter) { - UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo); - int32_t i; - - if (converter->extraInfo != NULL) { - /*close the array of converter pointers and free the memory*/ - for (i = 0; i < NUM_OF_CONVERTERS; i++) { - if (myConverterData->myConverterArray[i] != NULL) { - ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]); - } - } - - uprv_free(converter->extraInfo); - converter->extraInfo = NULL; - } -} - -static void U_CALLCONV -_CompoundTextReset(UConverter *converter, UConverterResetChoice choice) { - (void)converter; - (void)choice; -} - -static const char* U_CALLCONV -_CompoundTextgetName(const UConverter* cnv){ - (void)cnv; - return "x11-compound-text"; -} - -static void U_CALLCONV -UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){ - UConverter *cnv = args->converter; - uint8_t *target = (uint8_t *) args->target; - const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; - const UChar* source = args->source; - const UChar* sourceLimit = args->sourceLimit; - /* int32_t* offsets = args->offsets; */ - UChar32 sourceChar; - UBool useFallback = cnv->useFallback; - uint8_t tmpTargetBuffer[7]; - int32_t tmpTargetBufferLength = 0; - COMPOUND_TEXT_CONVERTERS currentState, tmpState; - uint32_t pValue; - int32_t pValueLength = 0; - int32_t i, n, j; - - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; - - currentState = myConverterData->state; - - /* check if the last codepoint of previous buffer was a lead surrogate*/ - if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { - goto getTrail; - } - - while( source < sourceLimit){ - if(target < targetLimit){ - - sourceChar = *(source++); - /*check if the char is a First surrogate*/ - if(U16_IS_SURROGATE(sourceChar)) { - if(U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if(source < sourceLimit) { - /* test the following code unit */ - UChar trail=(UChar) *source; - if(U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - cnv->fromUChar32=0x00; - /* convert this supplementary code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* no more input */ - cnv->fromUChar32=sourceChar; - break; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - cnv->fromUChar32=sourceChar; - break; - } - } - - tmpTargetBufferLength = 0; - tmpState = getState(sourceChar); - - if (tmpState != DO_SEARCH && currentState != tmpState) { - /* Get escape sequence if necessary */ - currentState = tmpState; - for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) { - tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i]; - } - } - - if (tmpState == DO_SEARCH) { - /* Test all available converters */ - for (i = 1; i < SEARCH_LENGTH; i++) { - pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback); - if (pValueLength > 0) { - tmpState = (COMPOUND_TEXT_CONVERTERS)i; - if (currentState != tmpState) { - currentState = tmpState; - for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) { - tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j]; - } - } - for (n = (pValueLength - 1); n >= 0; n--) { - tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); - } - break; - } - } - } else if (tmpState == COMPOUND_TEXT_SINGLE_0) { - tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar; - } else { - pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback); - if (pValueLength > 0) { - for (n = (pValueLength - 1); n >= 0; n--) { - tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); - } - } - } - - for (i = 0; i < tmpTargetBufferLength; i++) { - if (target < targetLimit) { - *target++ = tmpTargetBuffer[i]; - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - if (*err == U_BUFFER_OVERFLOW_ERROR) { - for (; i < tmpTargetBufferLength; i++) { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i]; - } - } - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /*save the state and return */ - myConverterData->state = currentState; - args->source = source; - args->target = (char*)target; -} - - -static void U_CALLCONV -UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args, - UErrorCode* err){ - const char *mySource = (char *) args->source; - UChar *myTarget = args->target; - const char *mySourceLimit = args->sourceLimit; - const char *tmpSourceLimit = mySourceLimit; - uint32_t mySourceChar = 0x0000; - COMPOUND_TEXT_CONVERTERS currentState, tmpState; - int32_t sourceOffset = 0; - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo; - UConverterSharedData* savedSharedData = NULL; - - UConverterToUnicodeArgs subArgs; - int32_t minArgsSize; - - /* set up the subconverter arguments */ - if(args->sizesize; - } else { - minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); - } - - uprv_memcpy(&subArgs, args, minArgsSize); - subArgs.size = (uint16_t)minArgsSize; - - currentState = tmpState = myConverterData->state; - - while(mySource < mySourceLimit){ - if(myTarget < args->targetLimit){ - if (args->converter->toULength > 0) { - mySourceChar = args->converter->toUBytes[0]; - } else { - mySourceChar = (uint8_t)*mySource; - } - - if (mySourceChar == ESC_START) { - tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err); - - if (*err == U_TRUNCATED_CHAR_FOUND) { - for (; mySource < mySourceLimit;) { - args->converter->toUBytes[args->converter->toULength++] = *mySource++; - } - *err = U_ZERO_ERROR; - break; - } else if (tmpState == INVALID) { - if (args->converter->toULength == 0) { - mySource++; /* skip over the 0x1b byte */ - } - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - - if (tmpState != currentState) { - currentState = tmpState; - } - - sourceOffset = static_cast(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength); - - mySource += sourceOffset; - - args->converter->toULength = 0; - } - - if (currentState == COMPOUND_TEXT_SINGLE_0) { - while (mySource < mySourceLimit) { - if (*mySource == ESC_START) { - break; - } - if (myTarget < args->targetLimit) { - *myTarget++ = 0x00ff&(*mySource++); - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } else if (mySource < mySourceLimit){ - sourceOffset = findNextEsc(mySource, mySourceLimit); - - tmpSourceLimit = mySource + sourceOffset; - - subArgs.source = mySource; - subArgs.sourceLimit = tmpSourceLimit; - subArgs.target = myTarget; - savedSharedData = subArgs.converter->sharedData; - subArgs.converter->sharedData = myConverterData->myConverterArray[currentState]; - - ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); - - subArgs.converter->sharedData = savedSharedData; - - mySource = subArgs.source; - myTarget = subArgs.target; - - if (U_FAILURE(*err)) { - if(*err == U_BUFFER_OVERFLOW_ERROR) { - if(subArgs.converter->UCharErrorBufferLength > 0) { - uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, - subArgs.converter->UCharErrorBufferLength); - } - args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; - subArgs.converter->UCharErrorBufferLength = 0; - } - break; - } - } - } else { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - myConverterData->state = currentState; - args->target = myTarget; - args->source = mySource; -} - -static void U_CALLCONV -_CompoundText_GetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo; - int32_t i; - - for (i = 1; i < NUM_OF_CONVERTERS; i++) { - ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode); - } - sa->add(sa->set, 0x0000); - sa->add(sa->set, 0x0009); - sa->add(sa->set, 0x000A); - sa->addRange(sa->set, 0x0020, 0x007F); - sa->addRange(sa->set, 0x00A0, 0x00FF); -} -U_CDECL_END - -static const UConverterImpl _CompoundTextImpl = { - - UCNV_COMPOUND_TEXT, - - NULL, - NULL, - - _CompoundTextOpen, - _CompoundTextClose, - _CompoundTextReset, - - UConverter_toUnicode_CompoundText_OFFSETS, - UConverter_toUnicode_CompoundText_OFFSETS, - UConverter_fromUnicode_CompoundText_OFFSETS, - UConverter_fromUnicode_CompoundText_OFFSETS, - NULL, - - NULL, - _CompoundTextgetName, - NULL, - NULL, - _CompoundText_GetUnicodeSet, - NULL, - NULL -}; - -static const UConverterStaticData _CompoundTextStaticData = { - sizeof(UConverterStaticData), - "COMPOUND_TEXT", - 0, - UCNV_IBM, - UCNV_COMPOUND_TEXT, - 1, - 6, - { 0xef, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; -const UConverterSharedData _CompoundTextData = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData, &_CompoundTextImpl); - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_err.cpp b/deps/node/deps/icu-small/source/common/ucnv_err.cpp deleted file mode 100644 index 63794d23..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_err.cpp +++ /dev/null @@ -1,486 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ***************************************************************************** - * - * Copyright (C) 1998-2016, International Business Machines - * Corporation and others. All Rights Reserved. - * - ***************************************************************************** - * - * ucnv_err.c - * Implements error behaviour functions called by T_UConverter_{from,to}Unicode - * - * -* Change history: -* -* 06/29/2000 helena Major rewrite of the callback APIs. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv_err.h" -#include "unicode/ucnv_cb.h" -#include "ucnv_cnv.h" -#include "cmemory.h" -#include "unicode/ucnv.h" -#include "ustrfmt.h" - -#define VALUE_STRING_LENGTH 48 -/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ -#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 -#define UNICODE_U_CODEPOINT 0x0055 -#define UNICODE_X_CODEPOINT 0x0058 -#define UNICODE_RS_CODEPOINT 0x005C -#define UNICODE_U_LOW_CODEPOINT 0x0075 -#define UNICODE_X_LOW_CODEPOINT 0x0078 -#define UNICODE_AMP_CODEPOINT 0x0026 -#define UNICODE_HASH_CODEPOINT 0x0023 -#define UNICODE_SEMICOLON_CODEPOINT 0x003B -#define UNICODE_PLUS_CODEPOINT 0x002B -#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B -#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D -#define UNICODE_SPACE_CODEPOINT 0x0020 -#define UCNV_PRV_ESCAPE_ICU 0 -#define UCNV_PRV_ESCAPE_C 'C' -#define UCNV_PRV_ESCAPE_XML_DEC 'D' -#define UCNV_PRV_ESCAPE_XML_HEX 'X' -#define UCNV_PRV_ESCAPE_JAVA 'J' -#define UCNV_PRV_ESCAPE_UNICODE 'U' -#define UCNV_PRV_ESCAPE_CSS2 'S' -#define UCNV_PRV_STOP_ON_ILLEGAL 'i' - -/* - * IS_DEFAULT_IGNORABLE_CODE_POINT - * This is to check if a code point has the default ignorable unicode property. - * As such, this list needs to be updated if the ignorable code point list ever - * changes. - * To avoid dependency on other code, this list is hard coded here. - * When an ignorable code point is found and is unmappable, the default callbacks - * will ignore them. - * For a list of the default ignorable code points, use this link: - * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i= - * - * This list should be sync with the one in CharsetCallback.java - */ -#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \ - (c == 0x00AD) || \ - (c == 0x034F) || \ - (c == 0x061C) || \ - (c == 0x115F) || \ - (c == 0x1160) || \ - (0x17B4 <= c && c <= 0x17B5) || \ - (0x180B <= c && c <= 0x180E) || \ - (0x200B <= c && c <= 0x200F) || \ - (0x202A <= c && c <= 0x202E) || \ - (0x2060 <= c && c <= 0x206F) || \ - (c == 0x3164) || \ - (0xFE00 <= c && c <= 0xFE0F) || \ - (c == 0xFEFF) || \ - (c == 0xFFA0) || \ - (0xFFF0 <= c && c <= 0xFFF8) || \ - (0x1BCA0 <= c && c <= 0x1BCA3) || \ - (0x1D173 <= c && c <= 0x1D17A) || \ - (0xE0000 <= c && c <= 0xE0FFF)) - - -/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_STOP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - (void)context; - (void)fromUArgs; - (void)codeUnits; - (void)length; - if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - } - /* the caller must have set the error code accordingly */ - return; -} - - -/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_STOP ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codePoints, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - /* the caller must have set the error code accordingly */ - (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err; - return; -} - -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_SKIP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - (void)fromUArgs; - (void)codeUnits; - (void)length; - if (reason <= UCNV_IRREGULAR) - { - if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - } - else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterFromUnicodeArgs *fromArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - (void)codeUnits; - (void)length; - if (reason <= UCNV_IRREGULAR) - { - if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - } - else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - ucnv_cbFromUWriteSub(fromArgs, 0, err); - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, - *uses a clean copy (resetted) of the converter, to convert that unicode - *escape sequence to the target codepage (if conversion failure happens then - *we revert to substituting with subchar) - */ -U_CAPI void U_EXPORT2 -UCNV_FROM_U_CALLBACK_ESCAPE ( - const void *context, - UConverterFromUnicodeArgs *fromArgs, - const UChar *codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err) -{ - - UChar valueString[VALUE_STRING_LENGTH]; - int32_t valueStringLength = 0; - int32_t i = 0; - - const UChar *myValueSource = NULL; - UErrorCode err2 = U_ZERO_ERROR; - UConverterFromUCallback original = NULL; - const void *originalContext; - - UConverterFromUCallback ignoredCallback = NULL; - const void *ignoredContext; - - if (reason > UCNV_IRREGULAR) - { - return; - } - else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) - { - /* - * Skip if the codepoint has unicode property of default ignorable. - */ - *err = U_ZERO_ERROR; - return; - } - - ucnv_setFromUCallBack (fromArgs->converter, - (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, - NULL, - &original, - &originalContext, - &err2); - - if (U_FAILURE (err2)) - { - *err = err2; - return; - } - if(context==NULL) - { - while (i < length) - { - valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); - } - } - else - { - switch(*((char*)context)) - { - case UCNV_PRV_ESCAPE_JAVA: - while (i < length) - { - valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); - } - break; - - case UCNV_PRV_ESCAPE_C: - valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - - if(length==2){ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); - - } - else{ - valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); - } - break; - - case UCNV_PRV_ESCAPE_XML_DEC: - - valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - if(length==2){ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); - } - else{ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); - } - valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - break; - - case UCNV_PRV_ESCAPE_XML_HEX: - - valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ - if(length==2){ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); - } - else{ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); - } - valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - break; - - case UCNV_PRV_ESCAPE_UNICODE: - valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ - if (length == 2) { - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); - } else { - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); - } - valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ - break; - - case UCNV_PRV_ESCAPE_CSS2: - valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); - /* Always add space character, becase the next character might be whitespace, - which would erroneously be considered the termination of the escape sequence. */ - valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; - break; - - default: - while (i < length) - { - valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ - valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); - } - } - } - myValueSource = valueString; - - /* reset the error */ - *err = U_ZERO_ERROR; - - ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); - - ucnv_setFromUCallBack (fromArgs->converter, - original, - originalContext, - &ignoredCallback, - &ignoredContext, - &err2); - if (U_FAILURE (err2)) - { - *err = err2; - return; - } - - return; -} - - - -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_SKIP ( - const void *context, - UConverterToUnicodeArgs *toArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - (void)toArgs; - (void)codeUnits; - (void)length; - if (reason <= UCNV_IRREGULAR) - { - if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterToUnicodeArgs *toArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - (void)codeUnits; - (void)length; - if (reason <= UCNV_IRREGULAR) - { - if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) - { - *err = U_ZERO_ERROR; - ucnv_cbToUWriteSub(toArgs,0,err); - } - /* else the caller must have set the error code accordingly. */ - } - /* else ignore the reset, close and clone calls. */ -} - -/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, - *and uses that as the substitution sequence - */ -U_CAPI void U_EXPORT2 -UCNV_TO_U_CALLBACK_ESCAPE ( - const void *context, - UConverterToUnicodeArgs *toArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err) -{ - UChar uniValueString[VALUE_STRING_LENGTH]; - int32_t valueStringLength = 0; - int32_t i = 0; - - if (reason > UCNV_IRREGULAR) - { - return; - } - - if(context==NULL) - { - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); - } - } - else - { - switch(*((char*)context)) - { - case UCNV_PRV_ESCAPE_XML_DEC: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); - uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - } - break; - - case UCNV_PRV_ESCAPE_XML_HEX: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ - uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); - uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ - } - break; - case UCNV_PRV_ESCAPE_C: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ - valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); - } - break; - default: - while (i < length) - { - uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ - uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ - uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); - valueStringLength += 2; - } - } - } - /* reset the error */ - *err = U_ZERO_ERROR; - - ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_ext.cpp b/deps/node/deps/icu-small/source/common/ucnv_ext.cpp deleted file mode 100644 index 7dea4eef..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_ext.cpp +++ /dev/null @@ -1,1143 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2003-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucnv_ext.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003jun13 -* created by: Markus W. Scherer -* -* Conversion extensions -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION - -#include "unicode/uset.h" -#include "unicode/ustring.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "ucnv_ext.h" -#include "cmemory.h" -#include "uassert.h" - -/* to Unicode --------------------------------------------------------------- */ - -/* - * @return lookup value for the byte, if found; else 0 - */ -static inline uint32_t -ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { - uint32_t word0, word; - int32_t i, start, limit; - - /* check the input byte against the lowest and highest section bytes */ - start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); - limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); - if(byte1) { - return 0; /* no match of a DBCS sequence in SBCS mode */ - } else if(preLength==1) { - srcLength=0; - } else /* preLength==0 */ { - if(srcLength>1) { - srcLength=1; - } - } - flush=TRUE; - } - - /* we must not remember fallback matches when not using fallbacks */ - - /* match input units until there is a full match or the input is consumed */ - for(;;) { - /* go to the next section */ - toUSection=toUTable+idx; - - /* read first pair of the section */ - value=*toUSection++; - length=UCNV_EXT_TO_U_GET_BYTE(value); - value=UCNV_EXT_TO_U_GET_VALUE(value); - if( value!=0 && - (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || - TO_U_USE_FALLBACK(useFallback)) && - UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) - ) { - /* remember longest match so far */ - matchValue=value; - matchLength=i+j; - } - - /* match pre[] then src[] */ - if(iUCNV_EXT_MAX_BYTES) { - /* - * end of the entire input stream, stop with the longest match so far - * or: partial match must not be longer than UCNV_EXT_MAX_BYTES - * because it must fit into state buffers - */ - break; - } else { - /* continue with more input next time */ - return -length; - } - } - - /* search for the current UChar */ - value=ucnv_extFindToU(toUSection, length, b); - if(value==0) { - /* no match here, stop with the longest match so far */ - break; - } else { - if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { - /* partial match, continue */ - idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); - } else { - if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || - TO_U_USE_FALLBACK(useFallback)) && - UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) - ) { - /* full match, stop with result */ - matchValue=value; - matchLength=i+j; - } else { - /* full match on fallback not taken, stop with the longest match so far */ - } - break; - } - } - } - - if(matchLength==0) { - /* no match at all */ - return 0; - } - - /* return result */ - *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); - return matchLength; -} - -static inline void -ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, - uint32_t value, - UChar **target, const UChar *targetLimit, - int32_t **offsets, int32_t srcIndex, - UErrorCode *pErrorCode) { - /* output the result */ - if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { - /* output a single code point */ - ucnv_toUWriteCodePoint( - cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), - target, targetLimit, - offsets, srcIndex, - pErrorCode); - } else { - /* output a string - with correct data we have resultLength>0 */ - ucnv_toUWriteUChars( - cnv, - UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ - UCNV_EXT_TO_U_GET_INDEX(value), - UCNV_EXT_TO_U_GET_LENGTH(value), - target, targetLimit, - offsets, srcIndex, - pErrorCode); - } -} - -/* - * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), - * or 1 for DBCS-only, - * or -1 if the converter is not SI/SO stateful - * - * Note: For SI/SO stateful converters getting here, - * cnv->mode==0 is equivalent to firstLength==1. - */ -#define UCNV_SISO_STATE(cnv) \ - ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ - (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) - -/* - * targettoUBytes, firstLength, - *src, (int32_t)(srcLimit-*src), - &value, - cnv->useFallback, flush); - if(match>0) { - /* advance src pointer for the consumed input */ - *src+=match-firstLength; - - /* write result to target */ - ucnv_extWriteToU(cnv, cx, - value, - target, targetLimit, - offsets, srcIndex, - pErrorCode); - return TRUE; - } else if(match<0) { - /* save state for partial match */ - const char *s; - int32_t j; - - /* copy the first code point */ - s=(const char *)cnv->toUBytes; - cnv->preToUFirstLength=(int8_t)firstLength; - for(j=0; jpreToU[j]=*s++; - } - - /* now copy the newly consumed input */ - s=*src; - match=-match; - for(; jpreToU[j]=*s++; - } - *src=s; /* same as *src=srcLimit; because we reached the end of input */ - cnv->preToULength=(int8_t)match; - return TRUE; - } else /* match==0 no match */ { - return FALSE; - } -} - -U_CFUNC UChar32 -ucnv_extSimpleMatchToU(const int32_t *cx, - const char *source, int32_t length, - UBool useFallback) { - uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ - int32_t match; - - if(length<=0) { - return 0xffff; - } - - /* try to match */ - match=ucnv_extMatchToU(cx, -1, - source, length, - NULL, 0, - &value, - useFallback, TRUE); - if(match==length) { - /* write result for simple, single-character conversion */ - if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { - return UCNV_EXT_TO_U_GET_CODE_POINT(value); - } - } - - /* - * return no match because - * - match>0 && value points to string: simple conversion cannot handle multiple code points - * - match>0 && match!=length: not all input consumed, forbidden for this function - * - match==0: no match found in the first place - * - match<0: partial match, not supported for simple conversion (and flush==TRUE) - */ - return 0xfffe; -} - -/* - * continue partial match with new input - * never called for simple, single-character conversion - */ -U_CFUNC void -ucnv_extContinueMatchToU(UConverter *cnv, - UConverterToUnicodeArgs *pArgs, int32_t srcIndex, - UErrorCode *pErrorCode) { - uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ - int32_t match, length; - - match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), - cnv->preToU, cnv->preToULength, - pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), - &value, - cnv->useFallback, pArgs->flush); - if(match>0) { - if(match>=cnv->preToULength) { - /* advance src pointer for the consumed input */ - pArgs->source+=match-cnv->preToULength; - cnv->preToULength=0; - } else { - /* the match did not use all of preToU[] - keep the rest for replay */ - length=cnv->preToULength-match; - uprv_memmove(cnv->preToU, cnv->preToU+match, length); - cnv->preToULength=(int8_t)-length; - } - - /* write result */ - ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, - value, - &pArgs->target, pArgs->targetLimit, - &pArgs->offsets, srcIndex, - pErrorCode); - } else if(match<0) { - /* save state for partial match */ - const char *s; - int32_t j; - - /* just _append_ the newly consumed input to preToU[] */ - s=pArgs->source; - match=-match; - for(j=cnv->preToULength; jpreToU[j]=*s++; - } - pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ - cnv->preToULength=(int8_t)match; - } else /* match==0 */ { - /* - * no match - * - * We need to split the previous input into two parts: - * - * 1. The first codepage character is unmappable - that's how we got into - * trying the extension data in the first place. - * We need to move it from the preToU buffer - * to the error buffer, set an error code, - * and prepare the rest of the previous input for 2. - * - * 2. The rest of the previous input must be converted once we - * come back from the callback for the first character. - * At that time, we have to try again from scratch to convert - * these input characters. - * The replay will be handled by the ucnv.c conversion code. - */ - - /* move the first codepage character to the error field */ - uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); - cnv->toULength=cnv->preToUFirstLength; - - /* move the rest up inside the buffer */ - length=cnv->preToULength-cnv->preToUFirstLength; - if(length>0) { - uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); - } - - /* mark preToU for replay */ - cnv->preToULength=(int8_t)-length; - - /* set the error code for unassigned */ - *pErrorCode=U_INVALID_CHAR_FOUND; - } -} - -/* from Unicode ------------------------------------------------------------- */ - -// Use roundtrips, "good one-way" mappings, and some normal fallbacks. -static inline UBool -extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { - return - ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || - FROM_U_USE_FALLBACK(useFallback, firstCP)) && - (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; -} - -/* - * @return index of the UChar, if found; else <0 - */ -static inline int32_t -ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { - int32_t i, start, limit; - - /* binary search */ - start=0; - limit=length; - for(;;) { - i=limit-start; - if(i<=1) { - break; /* done */ - } - /* start=0 - * @param src UChars that can be used to complete a match - * @param srcLength length of src, >=0 - * @param pMatchValue [out] output result value for the match from the data structure - * @param useFallback "use fallback" flag, usually from cnv->useFallback - * @param flush TRUE if the end of the input stream is reached - * @return >1: matched, return value=total match length (number of input units matched) - * 1: matched, no mapping but request for - * (only for the first code point) - * 0: no match - * <0: partial match, return value=negative total match length - * (partial matches are never returned for flush==TRUE) - * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) - * the matchLength is 2 if only firstCP matched, and >2 if firstCP and - * further code units matched - */ -static int32_t -ucnv_extMatchFromU(const int32_t *cx, - UChar32 firstCP, - const UChar *pre, int32_t preLength, - const UChar *src, int32_t srcLength, - uint32_t *pMatchValue, - UBool useFallback, UBool flush) { - const uint16_t *stage12, *stage3; - const uint32_t *stage3b; - - const UChar *fromUTableUChars, *fromUSectionUChars; - const uint32_t *fromUTableValues, *fromUSectionValues; - - uint32_t value, matchValue; - int32_t i, j, idx, length, matchLength; - UChar c; - - if(cx==NULL) { - return 0; /* no extension data, no match */ - } - - /* trie lookup of firstCP */ - idx=firstCP>>10; /* stage 1 index */ - if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { - return 0; /* the first code point is outside the trie */ - } - - stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); - stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); - idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); - - stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); - value=stage3b[idx]; - if(value==0) { - return 0; - } - - /* - * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: - * Do not interpret values with reserved bits used, for forward compatibility, - * and do not even remember intermediate results with reserved bits used. - */ - - if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { - /* partial match, enter the loop below */ - idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); - - /* initialize */ - fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); - fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); - - matchValue=0; - i=j=matchLength=0; - - /* we must not remember fallback matches when not using fallbacks */ - - /* match input units until there is a full match or the input is consumed */ - for(;;) { - /* go to the next section */ - fromUSectionUChars=fromUTableUChars+idx; - fromUSectionValues=fromUTableValues+idx; - - /* read first pair of the section */ - length=*fromUSectionUChars++; - value=*fromUSectionValues++; - if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { - /* remember longest match so far */ - matchValue=value; - matchLength=2+i+j; - } - - /* match pre[] then src[] */ - if(iUCNV_EXT_MAX_UCHARS) { - /* - * end of the entire input stream, stop with the longest match so far - * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS - * because it must fit into state buffers - */ - break; - } else { - /* continue with more input next time */ - return -(2+length); - } - } - - /* search for the current UChar */ - idx=ucnv_extFindFromU(fromUSectionUChars, length, c); - if(idx<0) { - /* no match here, stop with the longest match so far */ - break; - } else { - value=fromUSectionValues[idx]; - if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { - /* partial match, continue */ - idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); - } else { - if(extFromUUseMapping(useFallback, value, firstCP)) { - /* full match, stop with result */ - matchValue=value; - matchLength=2+i+j; - } else { - /* full match on fallback not taken, stop with the longest match so far */ - } - break; - } - } - } - - if(matchLength==0) { - /* no match at all */ - return 0; - } - } else /* result from firstCP trie lookup */ { - if(extFromUUseMapping(useFallback, value, firstCP)) { - /* full match, stop with result */ - matchValue=value; - matchLength=2; - } else { - /* fallback not taken */ - return 0; - } - } - - /* return result */ - if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { - return 1; /* assert matchLength==2 */ - } - - *pMatchValue=matchValue; - return matchLength; -} - -/* - * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits - */ -static inline void -ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, - uint32_t value, - char **target, const char *targetLimit, - int32_t **offsets, int32_t srcIndex, - UErrorCode *pErrorCode) { - uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; - const uint8_t *result; - int32_t length, prevLength; - - length=UCNV_EXT_FROM_U_GET_LENGTH(value); - value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); - - /* output the result */ - if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { - /* - * Generate a byte array and then write it below. - * This is not the fastest possible way, but it should be ok for - * extension mappings, and it is much simpler. - * Offset and overflow handling are only done once this way. - */ - uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ - switch(length) { - case 3: - *p++=(uint8_t)(value>>16); - U_FALLTHROUGH; - case 2: - *p++=(uint8_t)(value>>8); - U_FALLTHROUGH; - case 1: - *p++=(uint8_t)value; - U_FALLTHROUGH; - default: - break; /* will never occur */ - } - result=buffer+1; - } else { - result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; - } - - /* with correct data we have length>0 */ - - if((prevLength=cnv->fromUnicodeStatus)!=0) { - /* handle SI/SO stateful output */ - uint8_t shiftByte; - - if(prevLength>1 && length==1) { - /* change from double-byte mode to single-byte */ - shiftByte=(uint8_t)UCNV_SI; - cnv->fromUnicodeStatus=1; - } else if(prevLength==1 && length>1) { - /* change from single-byte mode to double-byte */ - shiftByte=(uint8_t)UCNV_SO; - cnv->fromUnicodeStatus=2; - } else { - shiftByte=0; - } - - if(shiftByte!=0) { - /* prepend the shift byte to the result bytes */ - buffer[0]=shiftByte; - if(result!=buffer+1) { - uprv_memcpy(buffer+1, result, length); - } - result=buffer; - ++length; - } - } - - ucnv_fromUWriteBytes(cnv, (const char *)result, length, - target, targetLimit, - offsets, srcIndex, - pErrorCode); -} - -/* - * targetuseFallback, flush); - - /* reject a match if the result is a single byte for DBCS-only */ - if( match>=2 && - !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && - cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) - ) { - /* advance src pointer for the consumed input */ - *src+=match-2; /* remove 2 for the initial code point */ - - /* write result to target */ - ucnv_extWriteFromU(cnv, cx, - value, - target, targetLimit, - offsets, srcIndex, - pErrorCode); - return TRUE; - } else if(match<0) { - /* save state for partial match */ - const UChar *s; - int32_t j; - - /* copy the first code point */ - cnv->preFromUFirstCP=cp; - - /* now copy the newly consumed input */ - s=*src; - match=-match-2; /* remove 2 for the initial code point */ - for(j=0; jpreFromU[j]=*s++; - } - *src=s; /* same as *src=srcLimit; because we reached the end of input */ - cnv->preFromULength=(int8_t)match; - return TRUE; - } else if(match==1) { - /* matched, no mapping but request for */ - cnv->useSubChar1=TRUE; - return FALSE; - } else /* match==0 no match */ { - return FALSE; - } -} - -/* - * Used by ISO 2022 implementation. - * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping - */ -U_CFUNC int32_t -ucnv_extSimpleMatchFromU(const int32_t *cx, - UChar32 cp, uint32_t *pValue, - UBool useFallback) { - uint32_t value; - int32_t match; - - /* try to match */ - match=ucnv_extMatchFromU(cx, - cp, - NULL, 0, - NULL, 0, - &value, - useFallback, TRUE); - if(match>=2) { - /* write result for simple, single-character conversion */ - int32_t length; - int isRoundtrip; - - isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); - length=UCNV_EXT_FROM_U_GET_LENGTH(value); - value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); - - if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { - *pValue=value; - return isRoundtrip ? length : -length; -#if 0 /* not currently used */ - } else if(length==4) { - /* de-serialize a 4-byte result */ - const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; - *pValue= - ((uint32_t)result[0]<<24)| - ((uint32_t)result[1]<<16)| - ((uint32_t)result[2]<<8)| - result[3]; - return isRoundtrip ? 4 : -4; -#endif - } - } - - /* - * return no match because - * - match>1 && resultLength>4: result too long for simple conversion - * - match==1: no match found, preferred - * - match==0: no match found in the first place - * - match<0: partial match, not supported for simple conversion (and flush==TRUE) - */ - return 0; -} - -/* - * continue partial match with new input, requires cnv->preFromUFirstCP>=0 - * never called for simple, single-character conversion - */ -U_CFUNC void -ucnv_extContinueMatchFromU(UConverter *cnv, - UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, - UErrorCode *pErrorCode) { - uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ - int32_t match; - - match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, - cnv->preFromUFirstCP, - cnv->preFromU, cnv->preFromULength, - pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), - &value, - cnv->useFallback, pArgs->flush); - if(match>=2) { - match-=2; /* remove 2 for the initial code point */ - - if(match>=cnv->preFromULength) { - /* advance src pointer for the consumed input */ - pArgs->source+=match-cnv->preFromULength; - cnv->preFromULength=0; - } else { - /* the match did not use all of preFromU[] - keep the rest for replay */ - int32_t length=cnv->preFromULength-match; - u_memmove(cnv->preFromU, cnv->preFromU+match, length); - cnv->preFromULength=(int8_t)-length; - } - - /* finish the partial match */ - cnv->preFromUFirstCP=U_SENTINEL; - - /* write result */ - ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, - value, - &pArgs->target, pArgs->targetLimit, - &pArgs->offsets, srcIndex, - pErrorCode); - } else if(match<0) { - /* save state for partial match */ - const UChar *s; - int32_t j; - - /* just _append_ the newly consumed input to preFromU[] */ - s=pArgs->source; - match=-match-2; /* remove 2 for the initial code point */ - for(j=cnv->preFromULength; j=0); - cnv->preFromU[j]=*s++; - } - pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ - cnv->preFromULength=(int8_t)match; - } else /* match==0 or 1 */ { - /* - * no match - * - * We need to split the previous input into two parts: - * - * 1. The first code point is unmappable - that's how we got into - * trying the extension data in the first place. - * We need to move it from the preFromU buffer - * to the error buffer, set an error code, - * and prepare the rest of the previous input for 2. - * - * 2. The rest of the previous input must be converted once we - * come back from the callback for the first code point. - * At that time, we have to try again from scratch to convert - * these input characters. - * The replay will be handled by the ucnv.c conversion code. - */ - - if(match==1) { - /* matched, no mapping but request for */ - cnv->useSubChar1=TRUE; - } - - /* move the first code point to the error field */ - cnv->fromUChar32=cnv->preFromUFirstCP; - cnv->preFromUFirstCP=U_SENTINEL; - - /* mark preFromU for replay */ - cnv->preFromULength=-cnv->preFromULength; - - /* set the error code for unassigned */ - *pErrorCode=U_INVALID_CHAR_FOUND; - } -} - -static UBool -extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { - if(which==UCNV_ROUNDTRIP_SET) { - // Add only code points for which the roundtrip flag is set. - // Do not add any fallbacks, even if ucnv_fromUnicode() would use them - // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). - // - // By analogy, also do not add "good one-way" mappings. - // - // Do not add entries with reserved bits set. - if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= - UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { - return FALSE; - } - } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { - // Do not add entries with reserved bits set. - if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { - return FALSE; - } - } - // Do not add entries or other (future?) pseudo-entries - // with an output length of 0. - return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; -} - -static void -ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, - const int32_t *cx, - const USetAdder *sa, - UConverterUnicodeSet which, - int32_t minLength, - UChar32 firstCP, - UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, - int32_t sectionIndex, - UErrorCode *pErrorCode) { - const UChar *fromUSectionUChars; - const uint32_t *fromUSectionValues; - - uint32_t value; - int32_t i, count; - - fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; - fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; - - /* read first pair of the section */ - count=*fromUSectionUChars++; - value=*fromUSectionValues++; - - if(extSetUseMapping(which, minLength, value)) { - if(length==U16_LENGTH(firstCP)) { - /* add the initial code point */ - sa->add(sa->set, firstCP); - } else { - /* add the string so far */ - sa->addString(sa->set, s, length); - } - } - - for(i=0; iaddString(sa->set, s, length+1); - } - } -} - -U_CFUNC void -ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, - const USetAdder *sa, - UConverterUnicodeSet which, - UConverterSetFilter filter, - UErrorCode *pErrorCode) { - const int32_t *cx; - const uint16_t *stage12, *stage3, *ps2, *ps3; - const uint32_t *stage3b; - - uint32_t value; - int32_t st1, stage1Length, st2, st3, minLength; - - UChar s[UCNV_EXT_MAX_UCHARS]; - UChar32 c; - int32_t length; - - cx=sharedData->mbcs.extIndexes; - if(cx==NULL) { - return; - } - - stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); - stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); - stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); - - stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; - - /* enumerate the from-Unicode trie table */ - c=0; /* keep track of the current code point while enumerating */ - - if(filter==UCNV_SET_FILTER_2022_CN) { - minLength=3; - } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || - filter!=UCNV_SET_FILTER_NONE - ) { - /* DBCS-only, ignore single-byte results */ - minLength=2; - } else { - minLength=1; - } - - /* - * the trie enumeration is almost the same as - * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 - */ - for(st1=0; st1stage1Length) { - ps2=stage12+st2; - for(st2=0; st2<64; ++st2) { - if((st3=(int32_t)ps2[st2]<=0x8140 && value<=0xeffc)) { - continue; - } - break; - case UCNV_SET_FILTER_GR94DBCS: - if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && - (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && - (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { - continue; - } - break; - case UCNV_SET_FILTER_HZ: - if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && - (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && - (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { - continue; - } - break; - default: - /* - * UCNV_SET_FILTER_NONE, - * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength - */ - break; - } - sa->add(sa->set, c); - } - } while((++c&0xf)!=0); - } else { - c+=16; /* empty stage 3 block */ - } - } - } else { - c+=1024; /* empty stage 2 block */ - } - } -} - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_ext.h b/deps/node/deps/icu-small/source/common/ucnv_ext.h deleted file mode 100644 index 7b753ac2..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_ext.h +++ /dev/null @@ -1,481 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2003-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucnv_ext.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003jun13 -* created by: Markus W. Scherer -* -* Conversion extensions -*/ - -#ifndef __UCNV_EXT_H__ -#define __UCNV_EXT_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "ucnv_cnv.h" - -/* - * See icuhtml/design/conversion/conversion_extensions.html - * - * Conversion extensions serve three purposes: - * 1. They support m:n mappings. - * 2. They support extension-only conversion files that are used together - * with the regular conversion data in base files. - * 3. They support mappings with more complicated meta data, - * for example "good one-way" mappings (|4). - * - * A base file may contain an extension table (explicitly requested or - * implicitly generated for m:n mappings), but its extension table is not - * used when an extension-only file is used. - * - * It is an error if a base file contains any regular (not extension) mapping - * from the same sequence as a mapping in the extension file - * because the base mapping would hide the extension mapping. - * - * - * Data for conversion extensions: - * - * One set of data structures per conversion direction (to/from Unicode). - * The data structures are sorted by input units to allow for binary search. - * Input sequences of more than one unit are handled like contraction tables - * in collation: - * The lookup value of a unit points to another table that is to be searched - * for the next unit, recursively. - * - * For conversion from Unicode, the initial code point is looked up in - * a 3-stage trie for speed, - * with an additional table of unique results to save space. - * - * Long output strings are stored in separate arrays, with length and index - * in the lookup tables. - * Output results also include a flag distinguishing roundtrip from - * (reverse) fallback mappings. - * - * Input Unicode strings must not begin or end with unpaired surrogates - * to avoid problems with matches on parts of surrogate pairs. - * - * Mappings from multiple characters (code points or codepage state - * table sequences) must be searched preferring the longest match. - * For this to work and be efficient, the variable-width table must contain - * all mappings that contain prefixes of the multiple characters. - * If an extension table is built on top of a base table in another file - * and a base table entry is a prefix of a multi-character mapping, then - * this is an error. - * - * - * Implementation note: - * - * Currently, the parser and several checks in the code limit the number - * of UChars or bytes in a mapping to - * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively, - * which are output value limits in the data structure. - * - * For input, this is not strictly necessary - it is a hard limit only for the - * buffers in UConverter that are used to store partial matches. - * - * Input sequences could otherwise be arbitrarily long if partial matches - * need not be stored (i.e., if a sequence does not span several buffers with too - * many units before the last buffer), although then results would differ - * depending on whether partial matches exceed the limits or not, - * which depends on the pattern of buffer sizes. - * - * - * Data structure: - * - * int32_t indexes[>=32]; - * - * Array of indexes and lengths etc. The length of the array is at least 32. - * The actual length is stored in indexes[0] to be forward compatible. - * - * Each index to another array is the number of bytes from indexes[]. - * Each length of an array is the number of array base units in that array. - * - * Some of the structures may not be present, in which case their indexes - * and lengths are 0. - * - * Usage of indexes[i]: - * [0] length of indexes[] - * - * // to Unicode table - * [1] index of toUTable[] (array of uint32_t) - * [2] length of toUTable[] - * [3] index of toUUChars[] (array of UChar) - * [4] length of toUUChars[] - * - * // from Unicode table, not for the initial code point - * [5] index of fromUTableUChars[] (array of UChar) - * [6] index of fromUTableValues[] (array of uint32_t) - * [7] length of fromUTableUChars[] and fromUTableValues[] - * [8] index of fromUBytes[] (array of char) - * [9] length of fromUBytes[] - * - * // from Unicode trie for initial-code point lookup - * [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2) - * [11] length of stage 1 portion of fromUStage12[] - * [12] length of fromUStage12[] - * [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[]) - * [14] length of fromUStage3[] - * [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[]) - * [16] length of fromUStage3b[] - * - * [17] Bit field containing numbers of bytes: - * 31..24 reserved, 0 - * 23..16 maximum input bytes - * 15.. 8 maximum output bytes - * 7.. 0 maximum bytes per UChar - * - * [18] Bit field containing numbers of UChars: - * 31..24 reserved, 0 - * 23..16 maximum input UChars - * 15.. 8 maximum output UChars - * 7.. 0 maximum UChars per byte - * - * [19] Bit field containing flags: - * (extension table unicodeMask) - * 1 UCNV_HAS_SURROGATES flag for the extension table - * 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table - * - * [20]..[30] reserved, 0 - * [31] number of bytes for the entire extension structure - * [>31] reserved; there are indexes[0] indexes - * - * - * uint32_t toUTable[]; - * - * Array of byte/value pairs for lookups for toUnicode conversion. - * The array is partitioned into sections like collation contraction tables. - * Each section contains one word with the number of following words and - * a default value for when the lookup in this section yields no match. - * - * A section is sorted in ascending order of input bytes, - * allowing for fast linear or binary searches. - * The builder may store entries for a contiguous range of byte values - * (compare difference between the first and last one with count), - * which then allows for direct array access. - * The builder should always do this for the initial table section. - * - * Entries may have 0 values, see below. - * No two entries in a section have the same byte values. - * - * Each uint32_t contains an input byte value in bits 31..24 and the - * corresponding lookup value in bits 23..0. - * Interpret the value as follows: - * if(value==0) { - * no match, see below - * } else if(value<0x1f0000) { - * partial match - use value as index to the next toUTable section - * and match the next unit; (value indexes toUTable[value]) - * } else { - * if(bit 23 set) { - * roundtrip; - * } else { - * fallback; - * } - * unset value bit 23; - * if(value<=0x2fffff) { - * (value-0x1f0000) is a code point; (BMP: value<=0x1fffff) - * } else { - * bits 17..0 (value&0x3ffff) is an index to - * the result UChars in toUUChars[]; (0 indexes toUUChars[0]) - * length of the result=((value>>18)-12); (length=0..19) - * } - * } - * - * The first word in a section contains the number of following words in the - * input byte position (bits 31..24, number=1..0xff). - * The value of the initial word is used when the current byte is not found - * in this section. - * If the value is not 0, then it represents a result as above. - * If the value is 0, then the search has to return a shorter match with an - * earlier default value as the result, or result in "unmappable" even for the - * initial bytes. - * If the value is 0 for the initial toUTable entry, then the initial byte - * does not start any mapping input. - * - * - * UChar toUUChars[]; - * - * Contains toUnicode mapping results, stored as sequences of UChars. - * Indexes and lengths stored in the toUTable[]. - * - * - * UChar fromUTableUChars[]; - * uint32_t fromUTableValues[]; - * - * The fromUTable is split into two arrays, but works otherwise much like - * the toUTable. The array is partitioned into sections like collation - * contraction tables and toUTable. - * A row in the table consists of same-index entries in fromUTableUChars[] - * and fromUTableValues[]. - * - * Interpret a value as follows: - * if(value==0) { - * no match, see below - * } else if(value<=0xffffff) { (bits 31..24 are 0) - * partial match - use value as index to the next fromUTable section - * and match the next unit; (value indexes fromUTable[value]) - * } else { - * if(value==0x80000001) { - * return no mapping, but request for ; - * } - * if(bit 31 set) { - * roundtrip (|0); - * } else if(bit 30 set) { - * "good one-way" mapping (|4); -- new in ICU4C 51, _MBCSHeader.version 5.4/4.4 - * } else { - * normal fallback (|1); - * } - * // bit 29 reserved, 0 - * length=(value>>24)&0x1f; (bits 28..24) - * if(length==1..3) { - * bits 23..0 contain 1..3 bytes, padded with 00s on the left; - * } else { - * bits 23..0 (value&0xffffff) is an index to - * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0]) - * } - * } - * - * The first pair in a section contains the number of following pairs in the - * UChar position (16 bits, number=1..0xffff). - * The value of the initial pair is used when the current UChar is not found - * in this section. - * If the value is not 0, then it represents a result as above. - * If the value is 0, then the search has to return a shorter match with an - * earlier default value as the result, or result in "unmappable" even for the - * initial UChars. - * - * If the from Unicode trie is present, then the from Unicode search tables - * are not used for initial code points. - * In this case, the first entries (index 0) in the tables are not used - * (reserved, set to 0) because a value of 0 is used in trie results - * to indicate no mapping. - * - * - * uint16_t fromUStage12[]; - * - * Stages 1 & 2 of a trie that maps an initial code point. - * Indexes in stage 1 are all offset by the length of stage 1 so that the - * same array pointer can be used for both stages. - * If (c>>10)>=(length of stage 1) then c does not start any mapping. - * Same bit distribution as for regular conversion tries. - * - * - * uint16_t fromUStage3[]; - * uint32_t fromUStage3b[]; - * - * Stage 3 of the trie. The first array simply contains indexes to the second, - * which contains words in the same format as fromUTableValues[]. - * Use a stage 3 granularity of 4, which allows for 256k stage 3 entries, - * and 16-bit entries in stage 3 allow for 64k stage 3b entries. - * The stage 3 granularity means that the stage 2 entry needs to be left-shifted. - * - * Two arrays are used because it is expected that more than half of the stage 3 - * entries will be zero. The 16-bit index stage 3 array saves space even - * considering storing a total of 6 bytes per non-zero entry in both arrays - * together. - * Using a stage 3 granularity of >1 diminishes the compactability in that stage - * but provides a larger effective addressing space in stage 2. - * All but the final result stage use 16-bit entries to save space. - * - * fromUStage3b[] contains a zero for "no mapping" at its index 0, - * and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for " SUB mapping" - * (i.e., "no mapping" with preference for rather than ), - * and all other items are unique non-zero results. - * - * The default value of a fromUTableValues[] section that is referenced - * _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1, - * but this value must not occur anywhere else in fromUTableValues[] - * because "no mapping" is always a property of a single code point, - * never of multiple. - * - * - * char fromUBytes[]; - * - * Contains fromUnicode mapping results, stored as sequences of chars. - * Indexes and lengths stored in the fromUTableValues[]. - */ -enum { - UCNV_EXT_INDEXES_LENGTH, /* 0 */ - - UCNV_EXT_TO_U_INDEX, /* 1 */ - UCNV_EXT_TO_U_LENGTH, - UCNV_EXT_TO_U_UCHARS_INDEX, - UCNV_EXT_TO_U_UCHARS_LENGTH, - - UCNV_EXT_FROM_U_UCHARS_INDEX, /* 5 */ - UCNV_EXT_FROM_U_VALUES_INDEX, - UCNV_EXT_FROM_U_LENGTH, - UCNV_EXT_FROM_U_BYTES_INDEX, - UCNV_EXT_FROM_U_BYTES_LENGTH, - - UCNV_EXT_FROM_U_STAGE_12_INDEX, /* 10 */ - UCNV_EXT_FROM_U_STAGE_1_LENGTH, - UCNV_EXT_FROM_U_STAGE_12_LENGTH, - UCNV_EXT_FROM_U_STAGE_3_INDEX, - UCNV_EXT_FROM_U_STAGE_3_LENGTH, - UCNV_EXT_FROM_U_STAGE_3B_INDEX, - UCNV_EXT_FROM_U_STAGE_3B_LENGTH, - - UCNV_EXT_COUNT_BYTES, /* 17 */ - UCNV_EXT_COUNT_UCHARS, - UCNV_EXT_FLAGS, - - UCNV_EXT_RESERVED_INDEX, /* 20, moves with additional indexes */ - - UCNV_EXT_SIZE=31, - UCNV_EXT_INDEXES_MIN_LENGTH=32 -}; - -/* get the pointer to an extension array from indexes[index] */ -#define UCNV_EXT_ARRAY(indexes, index, itemType) \ - ((const itemType *)((const char *)(indexes)+(indexes)[index])) - -#define UCNV_GET_MAX_BYTES_PER_UCHAR(indexes) \ - ((indexes)[UCNV_EXT_COUNT_BYTES]&0xff) - -/* internal API ------------------------------------------------------------- */ - -U_CFUNC UBool -ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, - int32_t firstLength, - const char **src, const char *srcLimit, - UChar **target, const UChar *targetLimit, - int32_t **offsets, int32_t srcIndex, - UBool flush, - UErrorCode *pErrorCode); - -U_CFUNC UChar32 -ucnv_extSimpleMatchToU(const int32_t *cx, - const char *source, int32_t length, - UBool useFallback); - -U_CFUNC void -ucnv_extContinueMatchToU(UConverter *cnv, - UConverterToUnicodeArgs *pArgs, int32_t srcIndex, - UErrorCode *pErrorCode); - - -U_CFUNC UBool -ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, - UChar32 cp, - const UChar **src, const UChar *srcLimit, - char **target, const char *targetLimit, - int32_t **offsets, int32_t srcIndex, - UBool flush, - UErrorCode *pErrorCode); - -U_CFUNC int32_t -ucnv_extSimpleMatchFromU(const int32_t *cx, - UChar32 cp, uint32_t *pValue, - UBool useFallback); - -U_CFUNC void -ucnv_extContinueMatchFromU(UConverter *cnv, - UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, - UErrorCode *pErrorCode); - -/* - * Add code points and strings to the set according to the extension mappings. - * Limitation on the UConverterSetFilter: - * The filters currently assume that they are used with 1:1 mappings. - * They only apply to single input code points, and then they pass through - * only mappings with single-charset-code results. - * For example, the Shift-JIS filter only works for 2-byte results and tests - * that those 2 bytes are in the JIS X 0208 range of Shift-JIS. - */ -U_CFUNC void -ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, - const USetAdder *sa, - UConverterUnicodeSet which, - UConverterSetFilter filter, - UErrorCode *pErrorCode); - -/* toUnicode helpers -------------------------------------------------------- */ - -#define UCNV_EXT_TO_U_BYTE_SHIFT 24 -#define UCNV_EXT_TO_U_VALUE_MASK 0xffffff -#define UCNV_EXT_TO_U_MIN_CODE_POINT 0x1f0000 -#define UCNV_EXT_TO_U_MAX_CODE_POINT 0x2fffff -#define UCNV_EXT_TO_U_ROUNDTRIP_FLAG ((uint32_t)1<<23) -#define UCNV_EXT_TO_U_INDEX_MASK 0x3ffff -#define UCNV_EXT_TO_U_LENGTH_SHIFT 18 -#define UCNV_EXT_TO_U_LENGTH_OFFSET 12 - -/* maximum number of indexed UChars */ -#define UCNV_EXT_MAX_UCHARS 19 - -#define UCNV_EXT_TO_U_MAKE_WORD(byte, value) (((uint32_t)(byte)<>UCNV_EXT_TO_U_BYTE_SHIFT) -#define UCNV_EXT_TO_U_GET_VALUE(word) ((word)&UCNV_EXT_TO_U_VALUE_MASK) - -#define UCNV_EXT_TO_U_IS_PARTIAL(value) ((value)>UCNV_EXT_TO_U_LENGTH_SHIFT)-UCNV_EXT_TO_U_LENGTH_OFFSET) - -/* fromUnicode helpers ------------------------------------------------------ */ - -/* most trie constants are shared with ucnvmbcs.h */ - -/* see similar utrie.h UTRIE_INDEX_SHIFT and UTRIE_DATA_GRANULARITY */ -#define UCNV_EXT_STAGE_2_LEFT_SHIFT 2 -#define UCNV_EXT_STAGE_3_GRANULARITY 4 - -/* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */ -#define UCNV_EXT_FROM_U(stage12, stage3, s1Index, c) \ - (stage3)[ ((int32_t)(stage12)[ (stage12)[s1Index] +(((c)>>4)&0x3f) ]< (impossible roundtrip to 0 bytes, value 01) */ -#define UCNV_EXT_FROM_U_SUBCHAR1 0x80000001 - -/* at most 3 bytes in the lower part of the value */ -#define UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH 3 - -/* maximum number of indexed bytes */ -#define UCNV_EXT_MAX_BYTES 0x1f - -#define UCNV_EXT_FROM_U_IS_PARTIAL(value) (((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)==0) -#define UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value) (value) - -#define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0) -#define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) - -/* get length; masks away all other bits */ -#define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES) - -/* get bytes or bytes index */ -#define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK) - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_imp.h b/deps/node/deps/icu-small/source/common/ucnv_imp.h deleted file mode 100644 index c5e6aeb4..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_imp.h +++ /dev/null @@ -1,139 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* -* ucnv_imp.h: -* Contains all internal and external data structure definitions -* Created & Maitained by Bertrand A. Damiba -* -* -* -* ATTENTION: -* --------- -* Although the data structures in this file are open and stack allocatable -* we reserve the right to hide them in further releases. -*/ - -#ifndef UCNV_IMP_H -#define UCNV_IMP_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/uloc.h" -#include "ucnv_bld.h" - -/* - * Fast check for whether a charset name is "UTF-8". - * This does not recognize all of the variations that ucnv_open() - * and other functions recognize, but it covers most cases. - * @param name const char * charset name - * @return - */ -#define UCNV_FAST_IS_UTF8(name) \ - (((name[0]=='U' ? \ - ( name[1]=='T' && name[2]=='F') : \ - (name[0]=='u' && name[1]=='t' && name[2]=='f'))) \ - && (name[3]=='-' ? \ - (name[4]=='8' && name[5]==0) : \ - (name[3]=='8' && name[4]==0))) - -typedef struct { - char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - char locale[ULOC_FULLNAME_CAPACITY]; - uint32_t options; -} UConverterNamePieces; - -U_CFUNC UBool -ucnv_canCreateConverter(const char *converterName, UErrorCode *err); - -/* figures out if we need to go to file to read in the data tables. - * @param converterName The name of the converter - * @param err The error code - * @return the newly created converter - */ -U_CAPI UConverter * -ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err); - -/* - * Open a purely algorithmic converter, specified by a type constant. - * @param myUConverter NULL, or pre-allocated UConverter structure to avoid - * a memory allocation - * @param type requested converter type - * @param locale locale parameter, or "" - * @param options converter options bit set (default 0) - * @param err ICU error code, not tested for U_FAILURE on input - * because this is an internal function - * @internal - */ -U_CFUNC UConverter * -ucnv_createAlgorithmicConverter(UConverter *myUConverter, - UConverterType type, - const char *locale, uint32_t options, - UErrorCode *err); - -/* - * Creates a converter from shared data. - * Adopts mySharedConverterData: No matter what happens, the caller must not - * unload mySharedConverterData, except via ucnv_close(return value) - * if this function is successful. - */ -U_CFUNC UConverter * -ucnv_createConverterFromSharedData(UConverter *myUConverter, - UConverterSharedData *mySharedConverterData, - UConverterLoadArgs *pArgs, - UErrorCode *err); - -U_CFUNC UConverter * -ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode *err); - -/** - * Load a converter but do not create a UConverter object. - * Simply return the UConverterSharedData. - * Performs alias lookup etc. - * The UConverterNamePieces need not be initialized - * before calling this function. - * The UConverterLoadArgs must be initialized - * before calling this function. - * If the args are passed in, then the pieces must be passed in too. - * In other words, the following combinations are allowed: - * - pieces==NULL && args==NULL - * - pieces!=NULL && args==NULL - * - pieces!=NULL && args!=NULL - * @internal - */ -U_CFUNC UConverterSharedData * -ucnv_loadSharedData(const char *converterName, - UConverterNamePieces *pieces, - UConverterLoadArgs *pArgs, - UErrorCode * err); - -/** - * This may unload the shared data in a thread safe manner. - * This will only unload the data if no other converters are sharing it. - */ -U_CFUNC void -ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData); - -/** - * This is a thread safe way to increment the reference count. - */ -U_CFUNC void -ucnv_incrementRefCount(UConverterSharedData *sharedData); - -/** - * These are the default error handling callbacks for the charset conversion framework. - * For performance reasons, they are only called to handle an error (not normally called for a reset or close). - */ -#define UCNV_TO_U_DEFAULT_CALLBACK ((UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE) -#define UCNV_FROM_U_DEFAULT_CALLBACK ((UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE) - -#endif - -#endif /* _UCNV_IMP */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_io.cpp b/deps/node/deps/icu-small/source/common/ucnv_io.cpp deleted file mode 100644 index d9e91314..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_io.cpp +++ /dev/null @@ -1,1360 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* -* ucnv_io.cpp: -* initializes global variables and defines functions pertaining to converter -* name resolution aspect of the conversion code. -* -* new implementation: -* -* created on: 1999nov22 -* created by: Markus W. Scherer -* -* Use the binary cnvalias.icu (created from convrtrs.txt) to work -* with aliases for converter names. -* -* Date Name Description -* 11/22/1999 markus Created -* 06/28/2002 grhoten Major overhaul of the converter alias design. -* Now an alias can map to different converters -* depending on the specified standard. -******************************************************************************* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/udata.h" - -#include "umutex.h" -#include "uarrsort.h" -#include "uassert.h" -#include "udataswp.h" -#include "cstring.h" -#include "cmemory.h" -#include "ucnv_io.h" -#include "uenumimp.h" -#include "ucln_cmn.h" - -/* Format of cnvalias.icu ----------------------------------------------------- - * - * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. - * This binary form contains several tables. All indexes are to uint16_t - * units, and not to the bytes (uint8_t units). Addressing everything on - * 16-bit boundaries allows us to store more information with small index - * numbers, which are also 16-bit in size. The majority of the table (except - * the string table) are 16-bit numbers. - * - * First there is the size of the Table of Contents (TOC). The TOC - * entries contain the size of each section. In order to find the offset - * you just need to sum up the previous offsets. - * The TOC length and entries are an array of uint32_t values. - * The first section after the TOC starts immediately after the TOC. - * - * 1) This section contains a list of converters. This list contains indexes - * into the string table for the converter name. The index of this list is - * also used by other sections, which are mentioned later on. - * This list is not sorted. - * - * 2) This section contains a list of tags. This list contains indexes - * into the string table for the tag name. The index of this list is - * also used by other sections, which are mentioned later on. - * This list is in priority order of standards. - * - * 3) This section contains a list of sorted unique aliases. This - * list contains indexes into the string table for the alias name. The - * index of this list is also used by other sections, like the 4th section. - * The index for the 3rd and 4th section is used to get the - * alias -> converter name mapping. Section 3 and 4 form a two column table. - * Some of the most significant bits of each index may contain other - * information (see findConverter for details). - * - * 4) This section contains a list of mapped converter names. Consider this - * as a table that maps the 3rd section to the 1st section. This list contains - * indexes into the 1st section. The index of this list is the same index in - * the 3rd section. There is also some extra information in the high bits of - * each converter index in this table. Currently it's only used to say that - * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK - * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is - * the predigested form of the 5th section so that an alias lookup can be fast. - * - * 5) This section contains a 2D array with indexes to the 6th section. This - * section is the full form of all alias mappings. The column index is the - * index into the converter list (column header). The row index is the index - * to tag list (row header). This 2D array is the top part a 3D array. The - * third dimension is in the 6th section. - * - * 6) This is blob of variable length arrays. Each array starts with a size, - * and is followed by indexes to alias names in the string table. This is - * the third dimension to the section 5. No other section should be referencing - * this section. - * - * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its - * presence indicates that a section 9 exists. UConverterAliasOptions specifies - * what type of string normalization is used among other potential things in the - * future. - * - * 8) This is the string table. All strings are indexed on an even address. - * There are two reasons for this. First many chip architectures locate strings - * faster on even address boundaries. Second, since all indexes are 16-bit - * numbers, this string table can be 128KB in size instead of 64KB when we - * only have strings starting on an even address. - * - * 9) When present this is a set of prenormalized strings from section 8. This - * table contains normalized strings with the dashes and spaces stripped out, - * and all strings lowercased. In the future, the options in section 7 may state - * other types of normalization. - * - * Here is the concept of section 5 and 6. It's a 3D cube. Each tag - * has a unique alias among all converters. That same alias can - * be mentioned in other standards on different converters, - * but only one alias per tag can be unique. - * - * - * Converter Names (Usually in TR22 form) - * -------------------------------------------. - * T / /| - * a / / | - * g / / | - * s / / | - * / / | - * ------------------------------------------/ | - * A | | | - * l | | | - * i | | / - * a | | / - * s | | / - * e | | / - * s | |/ - * ------------------------------------------- - * - * - * - * Here is what it really looks like. It's like swiss cheese. - * There are holes. Some converters aren't recognized by - * a standard, or they are really old converters that the - * standard doesn't recognize anymore. - * - * Converter Names (Usually in TR22 form) - * -------------------------------------------. - * T /##########################################/| - * a / # # /# - * g / # ## ## ### # ### ### ### #/ - * s / # ##### #### ## ## #/# - * / ### # # ## # # # ### # # #/## - * ------------------------------------------/# # - * A |### # # ## # # # ### # # #|# # - * l |# # # # # ## # #|# # - * i |# # # # # # #|# - * a |# #|# - * s | #|# - * e - * s - * - */ - -/** - * Used by the UEnumeration API - */ -typedef struct UAliasContext { - uint32_t listOffset; - uint32_t listIdx; -} UAliasContext; - -static const char DATA_NAME[] = "cnvalias"; -static const char DATA_TYPE[] = "icu"; - -static UDataMemory *gAliasData=NULL; -static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER; - -enum { - tocLengthIndex=0, - converterListIndex=1, - tagListIndex=2, - aliasListIndex=3, - untaggedConvArrayIndex=4, - taggedAliasArrayIndex=5, - taggedAliasListsIndex=6, - tableOptionsIndex=7, - stringTableIndex=8, - normalizedStringTableIndex=9, - offsetsCount, /* length of the swapper's temporary offsets[] */ - minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */ -}; - -static const UConverterAliasOptions defaultTableOptions = { - UCNV_IO_UNNORMALIZED, - 0 /* containsCnvOptionInfo */ -}; -static UConverterAlias gMainTable; - -#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx)) -#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx)) - -static UBool U_CALLCONV -isAcceptable(void * /*context*/, - const char * /*type*/, const char * /*name*/, - const UDataInfo *pInfo) { - return (UBool)( - pInfo->size>=20 && - pInfo->isBigEndian==U_IS_BIG_ENDIAN && - pInfo->charsetFamily==U_CHARSET_FAMILY && - pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ - pInfo->dataFormat[1]==0x76 && - pInfo->dataFormat[2]==0x41 && - pInfo->dataFormat[3]==0x6c && - pInfo->formatVersion[0]==3); -} - -static UBool U_CALLCONV ucnv_io_cleanup(void) -{ - if (gAliasData) { - udata_close(gAliasData); - gAliasData = NULL; - } - gAliasDataInitOnce.reset(); - - uprv_memset(&gMainTable, 0, sizeof(gMainTable)); - - return TRUE; /* Everything was cleaned up */ -} - -static void U_CALLCONV initAliasData(UErrorCode &errCode) { - UDataMemory *data; - const uint16_t *table; - const uint32_t *sectionSizes; - uint32_t tableStart; - uint32_t currOffset; - - ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); - - U_ASSERT(gAliasData == NULL); - data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode); - if(U_FAILURE(errCode)) { - return; - } - - sectionSizes = (const uint32_t *)udata_getMemory(data); - table = (const uint16_t *)sectionSizes; - - tableStart = sectionSizes[0]; - if (tableStart < minTocLength) { - errCode = U_INVALID_FORMAT_ERROR; - udata_close(data); - return; - } - gAliasData = data; - - gMainTable.converterListSize = sectionSizes[1]; - gMainTable.tagListSize = sectionSizes[2]; - gMainTable.aliasListSize = sectionSizes[3]; - gMainTable.untaggedConvArraySize = sectionSizes[4]; - gMainTable.taggedAliasArraySize = sectionSizes[5]; - gMainTable.taggedAliasListsSize = sectionSizes[6]; - gMainTable.optionTableSize = sectionSizes[7]; - gMainTable.stringTableSize = sectionSizes[8]; - - if (tableStart > 8) { - gMainTable.normalizedStringTableSize = sectionSizes[9]; - } - - currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t)); - gMainTable.converterList = table + currOffset; - - currOffset += gMainTable.converterListSize; - gMainTable.tagList = table + currOffset; - - currOffset += gMainTable.tagListSize; - gMainTable.aliasList = table + currOffset; - - currOffset += gMainTable.aliasListSize; - gMainTable.untaggedConvArray = table + currOffset; - - currOffset += gMainTable.untaggedConvArraySize; - gMainTable.taggedAliasArray = table + currOffset; - - /* aliasLists is a 1's based array, but it has a padding character */ - currOffset += gMainTable.taggedAliasArraySize; - gMainTable.taggedAliasLists = table + currOffset; - - currOffset += gMainTable.taggedAliasListsSize; - if (gMainTable.optionTableSize > 0 - && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT) - { - /* Faster table */ - gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset); - } - else { - /* Smaller table, or I can't handle this normalization mode! - Use the original slower table lookup. */ - gMainTable.optionTable = &defaultTableOptions; - } - - currOffset += gMainTable.optionTableSize; - gMainTable.stringTable = table + currOffset; - - currOffset += gMainTable.stringTableSize; - gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED) - ? gMainTable.stringTable : (table + currOffset)); -} - - -static UBool -haveAliasData(UErrorCode *pErrorCode) { - umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode); - return U_SUCCESS(*pErrorCode); -} - -static inline UBool -isAlias(const char *alias, UErrorCode *pErrorCode) { - if(alias==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - return (UBool)(*alias!=0); -} - -static uint32_t getTagNumber(const char *tagname) { - if (gMainTable.tagList) { - uint32_t tagNum; - for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) { - if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) { - return tagNum; - } - } - } - - return UINT32_MAX; -} - -/* character types relevant for ucnv_compareNames() */ -enum { - UIGNORE, - ZERO, - NONZERO, - MINLETTER /* any values from here on are lowercase letter mappings */ -}; - -/* character types for ASCII 00..7F */ -static const uint8_t asciiTypes[128] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0, - 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0, - 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0 -}; - -#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE) - -/* character types for EBCDIC 80..FF */ -static const uint8_t ebcdicTypes[128] = { - 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, - 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, - 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, - 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, - 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, - ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0 -}; - -#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE) - -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c) -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c) -#else -# error U_CHARSET_FAMILY is not valid -#endif - - -/* @see ucnv_compareNames */ -U_CAPI char * U_CALLCONV -ucnv_io_stripASCIIForCompare(char *dst, const char *name) { - char *dstItr = dst; - uint8_t type, nextType; - char c1; - UBool afterDigit = FALSE; - - while ((c1 = *name++) != 0) { - type = GET_ASCII_TYPE(c1); - switch (type) { - case UIGNORE: - afterDigit = FALSE; - continue; /* ignore all but letters and digits */ - case ZERO: - if (!afterDigit) { - nextType = GET_ASCII_TYPE(*name); - if (nextType == ZERO || nextType == NONZERO) { - continue; /* ignore leading zero before another digit */ - } - } - break; - case NONZERO: - afterDigit = TRUE; - break; - default: - c1 = (char)type; /* lowercased letter */ - afterDigit = FALSE; - break; - } - *dstItr++ = c1; - } - *dstItr = 0; - return dst; -} - -U_CAPI char * U_CALLCONV -ucnv_io_stripEBCDICForCompare(char *dst, const char *name) { - char *dstItr = dst; - uint8_t type, nextType; - char c1; - UBool afterDigit = FALSE; - - while ((c1 = *name++) != 0) { - type = GET_EBCDIC_TYPE(c1); - switch (type) { - case UIGNORE: - afterDigit = FALSE; - continue; /* ignore all but letters and digits */ - case ZERO: - if (!afterDigit) { - nextType = GET_EBCDIC_TYPE(*name); - if (nextType == ZERO || nextType == NONZERO) { - continue; /* ignore leading zero before another digit */ - } - } - break; - case NONZERO: - afterDigit = TRUE; - break; - default: - c1 = (char)type; /* lowercased letter */ - afterDigit = FALSE; - break; - } - *dstItr++ = c1; - } - *dstItr = 0; - return dst; -} - -/** - * Do a fuzzy compare of two converter/alias names. - * The comparison is case-insensitive, ignores leading zeroes if they are not - * followed by further digits, and ignores all but letters and digits. - * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. - * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 - * at http://www.unicode.org/reports/tr22/ - * - * This is a symmetrical (commutative) operation; order of arguments - * is insignificant. This is an important property for sorting the - * list (when the list is preprocessed into binary form) and for - * performing binary searches on it at run time. - * - * @param name1 a converter name or alias, zero-terminated - * @param name2 a converter name or alias, zero-terminated - * @return 0 if the names match, or a negative value if the name1 - * lexically precedes name2, or a positive value if the name1 - * lexically follows name2. - * - * @see ucnv_io_stripForCompare - */ -U_CAPI int U_EXPORT2 -ucnv_compareNames(const char *name1, const char *name2) { - int rc; - uint8_t type, nextType; - char c1, c2; - UBool afterDigit1 = FALSE, afterDigit2 = FALSE; - - for (;;) { - while ((c1 = *name1++) != 0) { - type = GET_CHAR_TYPE(c1); - switch (type) { - case UIGNORE: - afterDigit1 = FALSE; - continue; /* ignore all but letters and digits */ - case ZERO: - if (!afterDigit1) { - nextType = GET_CHAR_TYPE(*name1); - if (nextType == ZERO || nextType == NONZERO) { - continue; /* ignore leading zero before another digit */ - } - } - break; - case NONZERO: - afterDigit1 = TRUE; - break; - default: - c1 = (char)type; /* lowercased letter */ - afterDigit1 = FALSE; - break; - } - break; /* deliver c1 */ - } - while ((c2 = *name2++) != 0) { - type = GET_CHAR_TYPE(c2); - switch (type) { - case UIGNORE: - afterDigit2 = FALSE; - continue; /* ignore all but letters and digits */ - case ZERO: - if (!afterDigit2) { - nextType = GET_CHAR_TYPE(*name2); - if (nextType == ZERO || nextType == NONZERO) { - continue; /* ignore leading zero before another digit */ - } - } - break; - case NONZERO: - afterDigit2 = TRUE; - break; - default: - c2 = (char)type; /* lowercased letter */ - afterDigit2 = FALSE; - break; - } - break; /* deliver c2 */ - } - - /* If we reach the ends of both strings then they match */ - if ((c1|c2)==0) { - return 0; - } - - /* Case-insensitive comparison */ - rc = (int)(unsigned char)c1 - (int)(unsigned char)c2; - if (rc != 0) { - return rc; - } - } -} - -/* - * search for an alias - * return the converter number index for gConverterList - */ -static inline uint32_t -findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { - uint32_t mid, start, limit; - uint32_t lastMid; - int result; - int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED); - char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - - if (!isUnnormalized) { - if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return UINT32_MAX; - } - - /* Lower case and remove ignoreable characters. */ - ucnv_io_stripForCompare(strippedName, alias); - alias = strippedName; - } - - /* do a binary search for the alias */ - start = 0; - limit = gMainTable.untaggedConvArraySize; - mid = limit; - lastMid = UINT32_MAX; - - for (;;) { - mid = (uint32_t)((start + limit) / 2); - if (lastMid == mid) { /* Have we moved? */ - break; /* We haven't moved, and it wasn't found. */ - } - lastMid = mid; - if (isUnnormalized) { - result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid])); - } - else { - result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid])); - } - - if (result < 0) { - limit = mid; - } else if (result > 0) { - start = mid; - } else { - /* Since the gencnval tool folds duplicates into one entry, - * this alias in gAliasList is unique, but different standards - * may map an alias to different converters. - */ - if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) { - *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING; - } - /* State whether the canonical converter name contains an option. - This information is contained in this list in order to maintain backward & forward compatibility. */ - if (containsOption) { - UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo; - *containsOption = (UBool)((containsCnvOptionInfo - && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0)) - || !containsCnvOptionInfo); - } - return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK; - } - } - - return UINT32_MAX; -} - -/* - * Is this alias in this list? - * alias and listOffset should be non-NULL. - */ -static inline UBool -isAliasInList(const char *alias, uint32_t listOffset) { - if (listOffset) { - uint32_t currAlias; - uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; - /* +1 to skip listCount */ - const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; - for (currAlias = 0; currAlias < listCount; currAlias++) { - if (currList[currAlias] - && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) - { - return TRUE; - } - } - } - return FALSE; -} - -/* - * Search for an standard name of an alias (what is the default name - * that this standard uses?) - * return the listOffset for gTaggedAliasLists. If it's 0, - * the it couldn't be found, but the parameters are valid. - */ -static uint32_t -findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) { - uint32_t idx; - uint32_t listOffset; - uint32_t convNum; - UErrorCode myErr = U_ZERO_ERROR; - uint32_t tagNum = getTagNumber(standard); - - /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ - convNum = findConverter(alias, NULL, &myErr); - if (myErr != U_ZERO_ERROR) { - *pErrorCode = myErr; - } - - if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { - listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; - if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) { - return listOffset; - } - if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { - /* Uh Oh! They used an ambiguous alias. - We have to search the whole swiss cheese starting - at the highest standard affinity. - This may take a while. - */ - for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) { - listOffset = gMainTable.taggedAliasArray[idx]; - if (listOffset && isAliasInList(alias, listOffset)) { - uint32_t currTagNum = idx/gMainTable.converterListSize; - uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize); - uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum]; - if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) { - return tempListOffset; - } - /* else keep on looking */ - /* We could speed this up by starting on the next row - because an alias is unique per row, right now. - This would change if alias versioning appears. */ - } - } - /* The standard doesn't know about the alias */ - } - /* else no default name */ - return 0; - } - /* else converter or tag not found */ - - return UINT32_MAX; -} - -/* Return the canonical name */ -static uint32_t -findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { - uint32_t idx; - uint32_t listOffset; - uint32_t convNum; - UErrorCode myErr = U_ZERO_ERROR; - uint32_t tagNum = getTagNumber(standard); - - /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ - convNum = findConverter(alias, NULL, &myErr); - if (myErr != U_ZERO_ERROR) { - *pErrorCode = myErr; - } - - if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { - listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; - if (listOffset && isAliasInList(alias, listOffset)) { - return convNum; - } - if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { - /* Uh Oh! They used an ambiguous alias. - We have to search one slice of the swiss cheese. - We search only in the requested tag, not the whole thing. - This may take a while. - */ - uint32_t convStart = (tagNum)*gMainTable.converterListSize; - uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize; - for (idx = convStart; idx < convLimit; idx++) { - listOffset = gMainTable.taggedAliasArray[idx]; - if (listOffset && isAliasInList(alias, listOffset)) { - return idx-convStart; - } - } - /* The standard doesn't know about the alias */ - } - /* else no canonical name */ - } - /* else converter or tag not found */ - - return UINT32_MAX; -} - -U_CAPI const char * -ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { - const char *aliasTmp = alias; - int32_t i = 0; - for (i = 0; i < 2; i++) { - if (i == 1) { - /* - * After the first unsuccess converter lookup, check to see if - * the name begins with 'x-'. If it does, strip it off and try - * again. This behaviour is similar to how ICU4J does it. - */ - if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') { - aliasTmp = aliasTmp+2; - } else { - break; - } - } - if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) { - uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode); - if (convNum < gMainTable.converterListSize) { - return GET_STRING(gMainTable.converterList[convNum]); - } - /* else converter not found */ - } else { - break; - } - } - - return NULL; -} - -U_CDECL_BEGIN - - -static int32_t U_CALLCONV -ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { - int32_t value = 0; - UAliasContext *myContext = (UAliasContext *)(enumerator->context); - uint32_t listOffset = myContext->listOffset; - - if (listOffset) { - value = gMainTable.taggedAliasLists[listOffset]; - } - return value; -} - -static const char * U_CALLCONV -ucnv_io_nextStandardAliases(UEnumeration *enumerator, - int32_t* resultLength, - UErrorCode * /*pErrorCode*/) -{ - UAliasContext *myContext = (UAliasContext *)(enumerator->context); - uint32_t listOffset = myContext->listOffset; - - if (listOffset) { - uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; - const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; - - if (myContext->listIdx < listCount) { - const char *myStr = GET_STRING(currList[myContext->listIdx++]); - if (resultLength) { - *resultLength = (int32_t)uprv_strlen(myStr); - } - return myStr; - } - } - /* Either we accessed a zero length list, or we enumerated too far. */ - if (resultLength) { - *resultLength = 0; - } - return NULL; -} - -static void U_CALLCONV -ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { - ((UAliasContext *)(enumerator->context))->listIdx = 0; -} - -static void U_CALLCONV -ucnv_io_closeUEnumeration(UEnumeration *enumerator) { - uprv_free(enumerator->context); - uprv_free(enumerator); -} - -U_CDECL_END - -/* Enumerate the aliases for the specified converter and standard tag */ -static const UEnumeration gEnumAliases = { - NULL, - NULL, - ucnv_io_closeUEnumeration, - ucnv_io_countStandardAliases, - uenum_unextDefault, - ucnv_io_nextStandardAliases, - ucnv_io_resetStandardAliases -}; - -U_CAPI UEnumeration * U_EXPORT2 -ucnv_openStandardNames(const char *convName, - const char *standard, - UErrorCode *pErrorCode) -{ - UEnumeration *myEnum = NULL; - if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) { - uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode); - - /* When listOffset == 0, we want to acknowledge that the - converter name and standard are okay, but there - is nothing to enumerate. */ - if (listOffset < gMainTable.taggedAliasListsSize) { - UAliasContext *myContext; - - myEnum = static_cast(uprv_malloc(sizeof(UEnumeration))); - if (myEnum == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration)); - myContext = static_cast(uprv_malloc(sizeof(UAliasContext))); - if (myContext == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - uprv_free(myEnum); - return NULL; - } - myContext->listOffset = listOffset; - myContext->listIdx = 0; - myEnum->context = myContext; - } - /* else converter or tag not found */ - } - return myEnum; -} - -static uint16_t -ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) { - if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { - uint32_t convNum = findConverter(alias, NULL, pErrorCode); - if (convNum < gMainTable.converterListSize) { - /* tagListNum - 1 is the ALL tag */ - int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; - - if (listOffset) { - return gMainTable.taggedAliasLists[listOffset]; - } - /* else this shouldn't happen. internal program error */ - } - /* else converter not found */ - } - return 0; -} - -static uint16_t -ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) { - if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { - uint32_t currAlias; - uint32_t convNum = findConverter(alias, NULL, pErrorCode); - if (convNum < gMainTable.converterListSize) { - /* tagListNum - 1 is the ALL tag */ - int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; - - if (listOffset) { - uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; - /* +1 to skip listCount */ - const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; - - for (currAlias = start; currAlias < listCount; currAlias++) { - aliases[currAlias] = GET_STRING(currList[currAlias]); - } - } - /* else this shouldn't happen. internal program error */ - } - /* else converter not found */ - } - return 0; -} - -static const char * -ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { - if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { - uint32_t convNum = findConverter(alias, NULL, pErrorCode); - if (convNum < gMainTable.converterListSize) { - /* tagListNum - 1 is the ALL tag */ - int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; - - if (listOffset) { - uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; - /* +1 to skip listCount */ - const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; - - if (n < listCount) { - return GET_STRING(currList[n]); - } - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; - } - /* else this shouldn't happen. internal program error */ - } - /* else converter not found */ - } - return NULL; -} - -static uint16_t -ucnv_io_countStandards(UErrorCode *pErrorCode) { - if (haveAliasData(pErrorCode)) { - /* Don't include the empty list */ - return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS); - } - - return 0; -} - -U_CAPI const char * U_EXPORT2 -ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { - if (haveAliasData(pErrorCode)) { - if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) { - return GET_STRING(gMainTable.tagList[n]); - } - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; - } - - return NULL; -} - -U_CAPI const char * U_EXPORT2 -ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { - if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { - uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode); - - if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) { - const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; - - /* Get the preferred name from this list */ - if (currList[0]) { - return GET_STRING(currList[0]); - } - /* else someone screwed up the alias table. */ - /* *pErrorCode = U_INVALID_FORMAT_ERROR */ - } - } - - return NULL; -} - -U_CAPI uint16_t U_EXPORT2 -ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) -{ - return ucnv_io_countAliases(alias, pErrorCode); -} - - -U_CAPI const char* U_EXPORT2 -ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) -{ - return ucnv_io_getAlias(alias, n, pErrorCode); -} - -U_CAPI void U_EXPORT2 -ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) -{ - ucnv_io_getAliases(alias, 0, aliases, pErrorCode); -} - -U_CAPI uint16_t U_EXPORT2 -ucnv_countStandards(void) -{ - UErrorCode err = U_ZERO_ERROR; - return ucnv_io_countStandards(&err); -} - -U_CAPI const char * U_EXPORT2 -ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { - if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { - uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); - - if (convNum < gMainTable.converterListSize) { - return GET_STRING(gMainTable.converterList[convNum]); - } - } - - return NULL; -} - -U_CDECL_BEGIN - - -static int32_t U_CALLCONV -ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) { - return gMainTable.converterListSize; -} - -static const char * U_CALLCONV -ucnv_io_nextAllConverters(UEnumeration *enumerator, - int32_t* resultLength, - UErrorCode * /*pErrorCode*/) -{ - uint16_t *myContext = (uint16_t *)(enumerator->context); - - if (*myContext < gMainTable.converterListSize) { - const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]); - if (resultLength) { - *resultLength = (int32_t)uprv_strlen(myStr); - } - return myStr; - } - /* Either we accessed a zero length list, or we enumerated too far. */ - if (resultLength) { - *resultLength = 0; - } - return NULL; -} - -static void U_CALLCONV -ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { - *((uint16_t *)(enumerator->context)) = 0; -} -U_CDECL_END -static const UEnumeration gEnumAllConverters = { - NULL, - NULL, - ucnv_io_closeUEnumeration, - ucnv_io_countAllConverters, - uenum_unextDefault, - ucnv_io_nextAllConverters, - ucnv_io_resetAllConverters -}; - -U_CAPI UEnumeration * U_EXPORT2 -ucnv_openAllNames(UErrorCode *pErrorCode) { - UEnumeration *myEnum = NULL; - if (haveAliasData(pErrorCode)) { - uint16_t *myContext; - - myEnum = static_cast(uprv_malloc(sizeof(UEnumeration))); - if (myEnum == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); - myContext = static_cast(uprv_malloc(sizeof(uint16_t))); - if (myContext == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - uprv_free(myEnum); - return NULL; - } - *myContext = 0; - myEnum->context = myContext; - } - return myEnum; -} - -U_CAPI uint16_t -ucnv_io_countKnownConverters(UErrorCode *pErrorCode) { - if (haveAliasData(pErrorCode)) { - return (uint16_t)gMainTable.converterListSize; - } - return 0; -} - -/* alias table swapping ----------------------------------------------------- */ - -U_CDECL_BEGIN - -typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name); -U_CDECL_END - - -/* - * row of a temporary array - * - * gets platform-endian charset string indexes and sorting indexes; - * after sorting this array by strings, the actual arrays are permutated - * according to the sorting indexes - */ -typedef struct TempRow { - uint16_t strIndex, sortIndex; -} TempRow; - -typedef struct TempAliasTable { - const char *chars; - TempRow *rows; - uint16_t *resort; - StripForCompareFn *stripForCompare; -} TempAliasTable; - -enum { - STACK_ROW_CAPACITY=500 -}; - -static int32_t U_CALLCONV -io_compareRows(const void *context, const void *left, const void *right) { - char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH], - strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH]; - - TempAliasTable *tempTable=(TempAliasTable *)context; - const char *chars=tempTable->chars; - - return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex), - tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex)); -} - -U_CAPI int32_t U_EXPORT2 -ucnv_swapAliases(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint16_t *inTable; - const uint32_t *inSectionSizes; - uint32_t toc[offsetsCount]; - uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ - uint32_t i, count, tocLength, topOffset; - - TempRow rows[STACK_ROW_CAPACITY]; - uint16_t resort[STACK_ROW_CAPACITY]; - TempAliasTable tempTable; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ - pInfo->dataFormat[1]==0x76 && - pInfo->dataFormat[2]==0x41 && - pInfo->dataFormat[3]==0x6c && - pInfo->formatVersion[0]==3 - )) { - udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - /* an alias table must contain at least the table of contents array */ - if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { - udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", - length-headerSize); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); - inTable=(const uint16_t *)inSectionSizes; - uprv_memset(toc, 0, sizeof(toc)); - toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); - if(tocLengthreadUInt32(inSectionSizes[i]); - } - - /* compute offsets */ - uprv_memset(offsets, 0, sizeof(offsets)); - offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ - for(i=tagListIndex; i<=tocLength; ++i) { - offsets[i]=offsets[i-1]+toc[i-1]; - } - - /* compute the overall size of the after-header data, in numbers of 16-bit units */ - topOffset=offsets[i-1]+toc[i-1]; - - if(length>=0) { - uint16_t *outTable; - const uint16_t *p, *p2; - uint16_t *q, *q2; - uint16_t oldIndex; - - if((length-headerSize)<(2*(int32_t)topOffset)) { - udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", - length-headerSize); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - outTable=(uint16_t *)((char *)outData+headerSize); - - /* swap the entire table of contents */ - ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); - - /* swap unormalized strings & normalized strings */ - ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), - outTable+offsets[stringTableIndex], pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); - return 0; - } - - if(ds->inCharset==ds->outCharset) { - /* no need to sort, just swap all 16-bit values together */ - ds->swapArray16(ds, - inTable+offsets[converterListIndex], - 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), - outTable+offsets[converterListIndex], - pErrorCode); - } else { - /* allocate the temporary table for sorting */ - count=toc[aliasListIndex]; - - tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ - - if(count<=STACK_ROW_CAPACITY) { - tempTable.rows=rows; - tempTable.resort=resort; - } else { - tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); - if(tempTable.rows==NULL) { - udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", - count); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - tempTable.resort=(uint16_t *)(tempTable.rows+count); - } - - if(ds->outCharset==U_ASCII_FAMILY) { - tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; - } else /* U_EBCDIC_FAMILY */ { - tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; - } - - /* - * Sort unique aliases+mapped names. - * - * We need to sort the list again by outCharset strings because they - * sort differently for different charset families. - * First we set up a temporary table with the string indexes and - * sorting indexes and sort that. - * Then we permutate and copy/swap the actual values. - */ - p=inTable+offsets[aliasListIndex]; - q=outTable+offsets[aliasListIndex]; - - p2=inTable+offsets[untaggedConvArrayIndex]; - q2=outTable+offsets[untaggedConvArrayIndex]; - - for(i=0; ireadUInt16(p[i]); - tempTable.rows[i].sortIndex=(uint16_t)i; - } - - uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), - io_compareRows, &tempTable, - FALSE, pErrorCode); - - if(U_SUCCESS(*pErrorCode)) { - /* copy/swap/permutate items */ - if(p!=q) { - for(i=0; iswapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); - ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); - } - } else { - /* - * If we swap in-place, then the permutation must use another - * temporary array (tempTable.resort) - * before the results are copied to the outBundle. - */ - uint16_t *r=tempTable.resort; - - for(i=0; iswapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); - } - uprv_memcpy(q, r, 2*(size_t)count); - - for(i=0; iswapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); - } - uprv_memcpy(q2, r, 2*(size_t)count); - } - } - - if(tempTable.rows!=rows) { - uprv_free(tempTable.rows); - } - - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", - count); - return 0; - } - - /* swap remaining 16-bit values */ - ds->swapArray16(ds, - inTable+offsets[converterListIndex], - 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), - outTable+offsets[converterListIndex], - pErrorCode); - ds->swapArray16(ds, - inTable+offsets[taggedAliasArrayIndex], - 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), - outTable+offsets[taggedAliasArrayIndex], - pErrorCode); - } - } - - return headerSize+2*(int32_t)topOffset; -} - -#endif - - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_io.h b/deps/node/deps/icu-small/source/common/ucnv_io.h deleted file mode 100644 index 8f2d7b5a..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_io.h +++ /dev/null @@ -1,127 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ********************************************************************** - * Copyright (C) 1999-2006, International Business Machines - * Corporation and others. All Rights Reserved. - ********************************************************************** - * - * - * ucnv_io.h: - * defines variables and functions pertaining to converter name resolution - * aspect of the conversion code - */ - -#ifndef UCNV_IO_H -#define UCNV_IO_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "udataswp.h" - -#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000 -#define UCNV_CONTAINS_OPTION_BIT 0x4000 -#define UCNV_CONVERTER_INDEX_MASK 0xFFF -#define UCNV_NUM_RESERVED_TAGS 2 -#define UCNV_NUM_HIDDEN_TAGS 1 - -enum { - UCNV_IO_UNNORMALIZED, - UCNV_IO_STD_NORMALIZED, - UCNV_IO_NORM_TYPE_COUNT -}; - -typedef struct { - uint16_t stringNormalizationType; - uint16_t containsCnvOptionInfo; -} UConverterAliasOptions; - -typedef struct UConverterAlias { - const uint16_t *converterList; - const uint16_t *tagList; - const uint16_t *aliasList; - const uint16_t *untaggedConvArray; - const uint16_t *taggedAliasArray; - const uint16_t *taggedAliasLists; - const UConverterAliasOptions *optionTable; - const uint16_t *stringTable; - const uint16_t *normalizedStringTable; - - uint32_t converterListSize; - uint32_t tagListSize; - uint32_t aliasListSize; - uint32_t untaggedConvArraySize; - uint32_t taggedAliasArraySize; - uint32_t taggedAliasListsSize; - uint32_t optionTableSize; - uint32_t stringTableSize; - uint32_t normalizedStringTableSize; -} UConverterAlias; - -/** - * \var ucnv_io_stripForCompare - * Remove the underscores, dashes and spaces from the name, and convert - * the name to lower case. - * @param dst The destination buffer, which is <= the buffer of name. - * @param dst The destination buffer, which is <= the buffer of name. - * @see ucnv_compareNames - * @return the destination buffer. - */ -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define ucnv_io_stripForCompare ucnv_io_stripASCIIForCompare -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define ucnv_io_stripForCompare ucnv_io_stripEBCDICForCompare -#else -# error U_CHARSET_FAMILY is not valid -#endif - -U_CAPI char * U_CALLCONV -ucnv_io_stripASCIIForCompare(char *dst, const char *name); - -U_CAPI char * U_CALLCONV -ucnv_io_stripEBCDICForCompare(char *dst, const char *name); - -/** - * Map a converter alias name to a canonical converter name. - * The alias is searched for case-insensitively, the converter name - * is returned in mixed-case. - * Returns NULL if the alias is not found. - * @param alias The alias name to be searched. - * @param containsOption A return value stating whether the returned converter name contains an option (a comma) - * @param pErrorCode The error code - * @return the converter name in mixed-case, return NULL if the alias is not found. - */ -U_CAPI const char * -ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode); - -/** - * Return the number of all known converter names (no aliases). - * @param pErrorCode The error code - * @return the number of all aliases - */ -U_CAPI uint16_t -ucnv_io_countKnownConverters(UErrorCode *pErrorCode); - -/** - * Swap an ICU converter alias table. See implementation for details. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ucnv_swapAliases(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -#endif - -#endif /* _UCNV_IO */ - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_lmb.cpp b/deps/node/deps/icu-small/source/common/ucnv_lmb.cpp deleted file mode 100644 index 6dd8e834..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_lmb.cpp +++ /dev/null @@ -1,1386 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_lmb.cpp -* encoding: UTF-8 -* tab size: 4 (not used) -* indentation:4 -* -* created on: 2000feb09 -* created by: Brendan Murray -* extensively hacked up by: Jim Snyder-Grant -* -* Modification History: -* -* Date Name Description -* -* 06/20/2000 helena OS/400 port changes; mostly typecast. -* 06/27/2000 Jim Snyder-Grant Deal with partial characters and small buffers. -* Add comments to document LMBCS format and implementation -* restructured order & breakdown of functions -* 06/28/2000 helena Major rewrite for the callback API changes. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv_err.h" -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" -#include "ucnv_imp.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" - -#ifdef EBCDIC_RTL - #include "ascii_a.h" -#endif - -/* - LMBCS - - (Lotus Multi-Byte Character Set) - - LMBCS was invented in the late 1980's and is primarily used in Lotus Notes - databases and in Lotus 1-2-3 files. Programmers who work with the APIs - into these products will sometimes need to deal with strings in this format. - - The code in this file provides an implementation for an ICU converter of - LMBCS to and from Unicode. - - Since the LMBCS character set is only sparsely documented in existing - printed or online material, we have added extensive annotation to this - file to serve as a guide to understanding LMBCS. - - LMBCS was originally designed with these four sometimes-competing design goals: - - -Provide encodings for the characters in 12 existing national standards - (plus a few other characters) - -Minimal memory footprint - -Maximal speed of conversion into the existing national character sets - -No need to track a changing state as you interpret a string. - - - All of the national character sets LMBCS was trying to encode are 'ANSI' - based, in that the bytes from 0x20 - 0x7F are almost exactly the - same common Latin unaccented characters and symbols in all character sets. - - So, in order to help meet the speed & memory design goals, the common ANSI - bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS. - - The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as - follows: - - [G] D1 [D2] - - That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2 - data bytes. The maximum size of a LMBCS chjaracter is 3 bytes: -*/ -#define ULMBCS_CHARSIZE_MAX 3 -/* - The single-byte values from 0x20 to 0x7F are examples of single D1 bytes. - We often have to figure out if byte values are below or above this, so we - use the ANSI nomenclature 'C0' and 'C1' to refer to the range of control - characters just above & below the common lower-ANSI range */ -#define ULMBCS_C0END 0x1F -#define ULMBCS_C1START 0x80 -/* - Since LMBCS is always dealing in byte units. we create a local type here for - dealing with these units of LMBCS code units: - -*/ -typedef uint8_t ulmbcs_byte_t; - -/* - Most of the values less than 0x20 are reserved in LMBCS to announce - which national character standard is being used for the 'D' bytes. - In the comments we show the common name and the IBM character-set ID - for these character-set announcers: -*/ - -#define ULMBCS_GRP_L1 0x01 /* Latin-1 :ibm-850 */ -#define ULMBCS_GRP_GR 0x02 /* Greek :ibm-851 */ -#define ULMBCS_GRP_HE 0x03 /* Hebrew :ibm-1255 */ -#define ULMBCS_GRP_AR 0x04 /* Arabic :ibm-1256 */ -#define ULMBCS_GRP_RU 0x05 /* Cyrillic :ibm-1251 */ -#define ULMBCS_GRP_L2 0x06 /* Latin-2 :ibm-852 */ -#define ULMBCS_GRP_TR 0x08 /* Turkish :ibm-1254 */ -#define ULMBCS_GRP_TH 0x0B /* Thai :ibm-874 */ -#define ULMBCS_GRP_JA 0x10 /* Japanese :ibm-943 */ -#define ULMBCS_GRP_KO 0x11 /* Korean :ibm-1261 */ -#define ULMBCS_GRP_TW 0x12 /* Chinese SC :ibm-950 */ -#define ULMBCS_GRP_CN 0x13 /* Chinese TC :ibm-1386 */ - -/* - So, the beginning of understanding LMBCS is that IF the first byte of a LMBCS - character is one of those 12 values, you can interpret the remaining bytes of - that character as coming from one of those character sets. Since the lower - ANSI bytes already are represented in single bytes, using one of the character - set announcers is used to announce a character that starts with a byte of - 0x80 or greater. - - The character sets are arranged so that the single byte sets all appear - before the multi-byte character sets. When we need to tell whether a - group byte is for a single byte char set or not we use this define: */ - -#define ULMBCS_DOUBLEOPTGROUP_START 0x10 - -/* -However, to fully understand LMBCS, you must also understand a series of -exceptions & optimizations made in service of the design goals. - -First, those of you who are character set mavens may have noticed that -the 'double-byte' character sets are actually multi-byte character sets -that can have 1 or two bytes, even in the upper-ascii range. To force -each group byte to introduce a fixed-width encoding (to make it faster to -count characters), we use a convention of doubling up on the group byte -to introduce any single-byte character > 0x80 in an otherwise double-byte -character set. So, for example, the LMBCS sequence x10 x10 xAE is the -same as '0xAE' in the Japanese code page 943. - -Next, you will notice that the list of group bytes has some gaps. -These are used in various ways. - -We reserve a few special single byte values for common control -characters. These are in the same place as their ANSI eqivalents for speed. -*/ - -#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */ -#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */ -#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */ - -/* Then, 1-2-3 reserved a special single-byte character to put at the -beginning of internal 'system' range names: */ - -#define ULMBCS_123SYSTEMRANGE 0x19 - -/* Then we needed a place to put all the other ansi control characters -that must be moved to different values because LMBCS reserves those -values for other purposes. To represent the control characters, we start -with a first byte of 0xF & add the control chaarcter value as the -second byte */ -#define ULMBCS_GRP_CTRL 0x0F - -/* For the C0 controls (less than 0x20), we add 0x20 to preserve the -useful doctrine that any byte less than 0x20 in a LMBCS char must be -the first byte of a character:*/ -#define ULMBCS_CTRLOFFSET 0x20 - -/* -Where to put the characters that aren't part of any of the 12 national -character sets? The first thing that was done, in the earlier years of -LMBCS, was to use up the spaces of the form - - [G] D1, - - where 'G' was one of the single-byte character groups, and - D1 was less than 0x80. These sequences are gathered together - into a Lotus-invented doublebyte character set to represent a - lot of stray values. Internally, in this implementation, we track this - as group '0', as a place to tuck this exceptions list.*/ - -#define ULMBCS_GRP_EXCEPT 0x00 -/* - Finally, as the durability and usefulness of UNICODE became clear, - LOTUS added a new group 0x14 to hold Unicode values not otherwise - represented in LMBCS: */ -#define ULMBCS_GRP_UNICODE 0x14 -/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE -(Big-Endian) characters. The exception comes when the UTF16 -representation would have a zero as the second byte. In that case, -'F6' is used in its place, and the bytes are swapped. (This prevents -LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK: -0xF6xx is in the middle of the Private Use Area.)*/ -#define ULMBCS_UNICOMPATZERO 0xF6 - -/* It is also useful in our code to have a constant for the size of -a LMBCS char that holds a literal Unicode value */ -#define ULMBCS_UNICODE_SIZE 3 - -/* -To squish the LMBCS representations down even further, and to make -translations even faster,sometimes the optimization group byte can be dropped -from a LMBCS character. This is decided on a process-by-process basis. The -group byte that is dropped is called the 'optimization group'. - -For Notes, the optimzation group is always 0x1.*/ -#define ULMBCS_DEFAULTOPTGROUP 0x1 -/* For 1-2-3 files, the optimzation group is stored in the header of the 1-2-3 -file. - - In any case, when using ICU, you either pass in the -optimization group as part of the name of the converter (LMBCS-1, LMBCS-2, -etc.). Using plain 'LMBCS' as the name of the converter will give you -LMBCS-1. - - -*** Implementation strategy *** - - -Because of the extensive use of other character sets, the LMBCS converter -keeps a mapping between optimization groups and IBM character sets, so that -ICU converters can be created and used as needed. */ - -/* As you can see, even though any byte below 0x20 could be an optimization -byte, only those at 0x13 or below can map to an actual converter. To limit -some loops and searches, we define a value for that last group converter:*/ - -#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */ - -static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = { - /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ - /* 0x0001 */ "ibm-850", - /* 0x0002 */ "ibm-851", - /* 0x0003 */ "windows-1255", - /* 0x0004 */ "windows-1256", - /* 0x0005 */ "windows-1251", - /* 0x0006 */ "ibm-852", - /* 0x0007 */ NULL, /* Unused */ - /* 0x0008 */ "windows-1254", - /* 0x0009 */ NULL, /* Control char HT */ - /* 0x000A */ NULL, /* Control char LF */ - /* 0x000B */ "windows-874", - /* 0x000C */ NULL, /* Unused */ - /* 0x000D */ NULL, /* Control char CR */ - /* 0x000E */ NULL, /* Unused */ - /* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */ - /* 0x0010 */ "windows-932", - /* 0x0011 */ "windows-949", - /* 0x0012 */ "windows-950", - /* 0x0013 */ "windows-936" - - /* The rest are null, including the 0x0014 Unicode compatibility region - and 0x0019, the 1-2-3 system range control char */ -}; - - -/* That's approximately all the data that's needed for translating - LMBCS to Unicode. - - -However, to translate Unicode to LMBCS, we need some more support. - -That's because there are often more than one possible mappings from a Unicode -code point back into LMBCS. The first thing we do is look up into a table -to figure out if there are more than one possible mappings. This table, -arranged by Unicode values (including ranges) either lists which group -to use, or says that it could go into one or more of the SBCS sets, or -into one or more of the DBCS sets. (If the character exists in both DBCS & -SBCS, the table will place it in the SBCS sets, to make the LMBCS code point -length as small as possible. Here's the two special markers we use to indicate -ambiguous mappings: */ - -#define ULMBCS_AMBIGUOUS_SBCS 0x80 /* could fit in more than one - LMBCS sbcs native encoding - (example: most accented latin) */ -#define ULMBCS_AMBIGUOUS_MBCS 0x81 /* could fit in more than one - LMBCS mbcs native encoding - (example: Unihan) */ -#define ULMBCS_AMBIGUOUS_ALL 0x82 -/* And here's a simple way to see if a group falls in an appropriate range */ -#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \ - ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \ - (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \ - (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \ - (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START)) || \ - ((agroup) == ULMBCS_AMBIGUOUS_ALL) - - -/* The table & some code to use it: */ - - -static const struct _UniLMBCSGrpMap -{ - const UChar uniStartRange; - const UChar uniEndRange; - const ulmbcs_byte_t GrpType; -} UniLMBCSGrpMap[] -= -{ - - {0x0001, 0x001F, ULMBCS_GRP_CTRL}, - {0x0080, 0x009F, ULMBCS_GRP_CTRL}, - {0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS}, - {0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL}, - {0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS}, - {0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL}, - {0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS}, - {0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL}, - {0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS}, - {0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL}, - {0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS}, - {0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL}, - {0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS}, - {0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL}, - {0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS}, - {0x01CE, 0x01CE, ULMBCS_GRP_TW }, - {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS}, - {0x02BA, 0x02BA, ULMBCS_GRP_CN}, - {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS}, - {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS}, - {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS}, - {0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS}, - {0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL}, - {0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS}, - {0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL}, - {0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS}, - {0x0400, 0x0400, ULMBCS_GRP_RU}, - {0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL}, - {0x0402, 0x040F, ULMBCS_GRP_RU}, - {0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL}, - {0x0432, 0x044E, ULMBCS_GRP_RU}, - {0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL}, - {0x0450, 0x0491, ULMBCS_GRP_RU}, - {0x05B0, 0x05F2, ULMBCS_GRP_HE}, - {0x060C, 0x06AF, ULMBCS_GRP_AR}, - {0x0E01, 0x0E5B, ULMBCS_GRP_TH}, - {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS}, - {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS}, - {0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS}, - {0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS}, - {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS}, - {0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS}, - {0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL}, - {0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS}, - {0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL}, - {0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS}, - {0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL}, - {0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS}, - {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS}, - {0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL}, - {0x2027, 0x2027, ULMBCS_GRP_TW}, - {0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL}, - {0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS}, - {0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS}, - {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS}, - {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS}, - {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS}, - {0x203C, 0x203C, ULMBCS_GRP_EXCEPT}, - {0x2074, 0x2074, ULMBCS_GRP_KO}, - {0x207F, 0x207F, ULMBCS_GRP_EXCEPT}, - {0x2081, 0x2084, ULMBCS_GRP_KO}, - {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS}, - {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS}, - {0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS}, - /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/ - {0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS}, - {0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS}, - {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS}, - {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS}, - {0x2153, 0x2154, ULMBCS_GRP_KO}, - {0x215B, 0x215E, ULMBCS_GRP_EXCEPT}, - {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS}, - {0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL}, - {0x2194, 0x2195, ULMBCS_GRP_EXCEPT}, - {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS}, - {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT}, - {0x21B8, 0x21B9, ULMBCS_GRP_CN}, - {0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT}, - {0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS}, - {0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT}, - {0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS}, - {0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT}, - {0x21E7, 0x21E7, ULMBCS_GRP_CN}, - {0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS}, - {0x2201, 0x2201, ULMBCS_GRP_EXCEPT}, - {0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS}, - {0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS}, - {0x2204, 0x2206, ULMBCS_GRP_EXCEPT}, - {0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS}, - {0x2209, 0x220A, ULMBCS_GRP_EXCEPT}, - {0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS}, - {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS}, - {0x2219, 0x2219, ULMBCS_GRP_EXCEPT}, - {0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS}, - {0x221B, 0x221C, ULMBCS_GRP_EXCEPT}, - {0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS}, - {0x221F, 0x221F, ULMBCS_GRP_EXCEPT}, - {0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS}, - {0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS}, - {0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS}, - {0x2245, 0x2248, ULMBCS_GRP_EXCEPT}, - {0x224C, 0x224C, ULMBCS_GRP_TW}, - {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS}, - {0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS}, - {0x2262, 0x2265, ULMBCS_GRP_EXCEPT}, - {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS}, - {0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS}, - {0x2284, 0x2285, ULMBCS_GRP_EXCEPT}, - {0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS}, - {0x2288, 0x2297, ULMBCS_GRP_EXCEPT}, - {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS}, - {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT}, - {0x2310, 0x2310, ULMBCS_GRP_EXCEPT}, - {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS}, - {0x2318, 0x2321, ULMBCS_GRP_EXCEPT}, - {0x2318, 0x2321, ULMBCS_GRP_CN}, - {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS}, - {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS}, - {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS}, - {0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL}, - {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS}, - {0x2504, 0x2505, ULMBCS_GRP_TW}, - {0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL}, - {0x2666, 0x2666, ULMBCS_GRP_EXCEPT}, - {0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS}, - {0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL}, - {0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS}, - {0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS}, - {0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS}, - {0x266F, 0x266F, ULMBCS_GRP_JA}, - {0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS}, - {0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS}, - {0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT}, - {0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS}, - {0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS}, - {0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS}, - {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE} -}; - -static ulmbcs_byte_t -FindLMBCSUniRange(UChar uniChar) -{ - const struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap; - - while (uniChar > pTable->uniEndRange) - { - pTable++; - } - - if (uniChar >= pTable->uniStartRange) - { - return pTable->GrpType; - } - return ULMBCS_GRP_UNICODE; -} - -/* -We also ask the creator of a converter to send in a preferred locale -that we can use in resolving ambiguous mappings. They send the locale -in as a string, and we map it, if possible, to one of the -LMBCS groups. We use this table, and the associated code, to -do the lookup: */ - -/************************************************** - This table maps locale ID's to LMBCS opt groups. - The default return is group 0x01. Note that for - performance reasons, the table is sorted in - increasing alphabetic order, with the notable - exception of zhTW. This is to force the check - for Traditonal Chinese before dropping back to - Simplified. - - Note too that the Latin-1 groups have been - commented out because it's the default, and - this shortens the table, allowing a serial - search to go quickly. - *************************************************/ - -static const struct _LocaleLMBCSGrpMap -{ - const char *LocaleID; - const ulmbcs_byte_t OptGroup; -} LocaleLMBCSGrpMap[] = -{ - {"ar", ULMBCS_GRP_AR}, - {"be", ULMBCS_GRP_RU}, - {"bg", ULMBCS_GRP_L2}, - /* {"ca", ULMBCS_GRP_L1}, */ - {"cs", ULMBCS_GRP_L2}, - /* {"da", ULMBCS_GRP_L1}, */ - /* {"de", ULMBCS_GRP_L1}, */ - {"el", ULMBCS_GRP_GR}, - /* {"en", ULMBCS_GRP_L1}, */ - /* {"es", ULMBCS_GRP_L1}, */ - /* {"et", ULMBCS_GRP_L1}, */ - /* {"fi", ULMBCS_GRP_L1}, */ - /* {"fr", ULMBCS_GRP_L1}, */ - {"he", ULMBCS_GRP_HE}, - {"hu", ULMBCS_GRP_L2}, - /* {"is", ULMBCS_GRP_L1}, */ - /* {"it", ULMBCS_GRP_L1}, */ - {"iw", ULMBCS_GRP_HE}, - {"ja", ULMBCS_GRP_JA}, - {"ko", ULMBCS_GRP_KO}, - /* {"lt", ULMBCS_GRP_L1}, */ - /* {"lv", ULMBCS_GRP_L1}, */ - {"mk", ULMBCS_GRP_RU}, - /* {"nl", ULMBCS_GRP_L1}, */ - /* {"no", ULMBCS_GRP_L1}, */ - {"pl", ULMBCS_GRP_L2}, - /* {"pt", ULMBCS_GRP_L1}, */ - {"ro", ULMBCS_GRP_L2}, - {"ru", ULMBCS_GRP_RU}, - {"sh", ULMBCS_GRP_L2}, - {"sk", ULMBCS_GRP_L2}, - {"sl", ULMBCS_GRP_L2}, - {"sq", ULMBCS_GRP_L2}, - {"sr", ULMBCS_GRP_RU}, - /* {"sv", ULMBCS_GRP_L1}, */ - {"th", ULMBCS_GRP_TH}, - {"tr", ULMBCS_GRP_TR}, - {"uk", ULMBCS_GRP_RU}, - /* {"vi", ULMBCS_GRP_L1}, */ - {"zhTW", ULMBCS_GRP_TW}, - {"zh", ULMBCS_GRP_CN}, - {NULL, ULMBCS_GRP_L1} -}; - - -static ulmbcs_byte_t -FindLMBCSLocale(const char *LocaleID) -{ - const struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap; - - if ((!LocaleID) || (!*LocaleID)) - { - return 0; - } - - while (pTable->LocaleID) - { - if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */ - { - /* First char matches - check whole name, for entry-length */ - if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0) - return pTable->OptGroup; - } - else - if (*pTable->LocaleID > *LocaleID) /* Sorted alphabetically - exit */ - break; - pTable++; - } - return ULMBCS_GRP_L1; -} - - -/* - Before we get to the main body of code, here's how we hook up to the rest - of ICU. ICU converters are required to define a structure that includes - some function pointers, and some common data, in the style of a C++ - vtable. There is also room in there for converter-specific data. LMBCS - uses that converter-specific data to keep track of the 12 subconverters - we use, the optimization group, and the group (if any) that matches the - locale. We have one structure instantiated for each of the 12 possible - optimization groups. To avoid typos & to avoid boring the reader, we - put the declarations of these structures and functions into macros. To see - the definitions of these structures, see unicode\ucnv_bld.h -*/ - -typedef struct - { - UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */ - uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ - uint8_t localeConverterIndex; /* reasonable locale match for index */ - } -UConverterDataLMBCS; - -U_CDECL_BEGIN -static void U_CALLCONV _LMBCSClose(UConverter * _this); -U_CDECL_END - -#define DECLARE_LMBCS_DATA(n) \ -static const UConverterImpl _LMBCSImpl##n={\ - UCNV_LMBCS_##n,\ - NULL,NULL,\ - _LMBCSOpen##n,\ - _LMBCSClose,\ - NULL,\ - _LMBCSToUnicodeWithOffsets,\ - _LMBCSToUnicodeWithOffsets,\ - _LMBCSFromUnicode,\ - _LMBCSFromUnicode,\ - NULL,\ - NULL,\ - NULL,\ - NULL,\ - _LMBCSSafeClone,\ - ucnv_getCompleteUnicodeSet,\ - NULL,\ - NULL\ -};\ -static const UConverterStaticData _LMBCSStaticData##n={\ - sizeof(UConverterStaticData),\ - "LMBCS-" #n,\ - 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\ - { 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \ -};\ -const UConverterSharedData _LMBCSData##n= \ - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_LMBCSStaticData##n, &_LMBCSImpl##n); - - /* The only function we needed to duplicate 12 times was the 'open' -function, which will do basically the same thing except set a different -optimization group. So, we put the common stuff into a worker function, -and set up another macro to stamp out the 12 open functions:*/ -#define DEFINE_LMBCS_OPEN(n) \ -static void U_CALLCONV \ - _LMBCSOpen##n(UConverter* _this, UConverterLoadArgs* pArgs, UErrorCode* err) \ -{ _LMBCSOpenWorker(_this, pArgs, err, n); } - - - -/* Here's the open worker & the common close function */ -static void -_LMBCSOpenWorker(UConverter* _this, - UConverterLoadArgs *pArgs, - UErrorCode* err, - ulmbcs_byte_t OptGroup) -{ - UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS)); - _this->extraInfo = extraInfo; - if(extraInfo != NULL) - { - UConverterNamePieces stackPieces; - UConverterLoadArgs stackArgs= UCNV_LOAD_ARGS_INITIALIZER; - ulmbcs_byte_t i; - - uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS)); - - stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; - - for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++) - { - if(OptGroupByteToCPName[i] != NULL) { - extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], &stackPieces, &stackArgs, err); - } - } - - if(U_FAILURE(*err) || pArgs->onlyTestIsLoadable) { - _LMBCSClose(_this); - return; - } - extraInfo->OptGroup = OptGroup; - extraInfo->localeConverterIndex = FindLMBCSLocale(pArgs->locale); - } - else - { - *err = U_MEMORY_ALLOCATION_ERROR; - } -} - -U_CDECL_BEGIN -static void U_CALLCONV -_LMBCSClose(UConverter * _this) -{ - if (_this->extraInfo != NULL) - { - ulmbcs_byte_t Ix; - UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo; - - for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++) - { - if (extraInfo->OptGrpConverter[Ix] != NULL) - ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]); - } - if (!_this->isExtraLocal) { - uprv_free (_this->extraInfo); - _this->extraInfo = NULL; - } - } -} - -typedef struct LMBCSClone { - UConverter cnv; - UConverterDataLMBCS lmbcs; -} LMBCSClone; - -static UConverter * U_CALLCONV -_LMBCSSafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) { - (void)status; - LMBCSClone *newLMBCS; - UConverterDataLMBCS *extraInfo; - int32_t i; - - if(*pBufferSize<=0) { - *pBufferSize=(int32_t)sizeof(LMBCSClone); - return NULL; - } - - extraInfo=(UConverterDataLMBCS *)cnv->extraInfo; - newLMBCS=(LMBCSClone *)stackBuffer; - - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS)); - - /* share the subconverters */ - for(i = 0; i <= ULMBCS_GRP_LAST; ++i) { - if(extraInfo->OptGrpConverter[i] != NULL) { - ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]); - } - } - - newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs; - newLMBCS->cnv.isExtraLocal = TRUE; - return &newLMBCS->cnv; -} - -/* - * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117) - * which added all code points except for U+F6xx - * because those cannot be represented in the Unicode group. - * However, it turns out that windows-950 has roundtrips for all of U+F6xx - * which means that LMBCS can convert all Unicode code points after all. - * We now simply use ucnv_getCompleteUnicodeSet(). - * - * This may need to be looked at again as Lotus uses _LMBCSGetUnicodeSet(). (091216) - */ - -/* - Here's the basic helper function that we use when converting from - Unicode to LMBCS, and we suspect that a Unicode character will fit into - one of the 12 groups. The return value is the number of bytes written - starting at pStartLMBCS (if any). -*/ - -static size_t -LMBCSConversionWorker ( - UConverterDataLMBCS * extraInfo, /* subconverters, opt & locale groups */ - ulmbcs_byte_t group, /* The group to try */ - ulmbcs_byte_t * pStartLMBCS, /* where to put the results */ - UChar * pUniChar, /* The input unicode character */ - ulmbcs_byte_t * lastConverterIndex, /* output: track last successful group used */ - UBool * groups_tried /* output: track any unsuccessful groups */ -) -{ - ulmbcs_byte_t * pLMBCS = pStartLMBCS; - UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group]; - - int bytesConverted; - uint32_t value; - ulmbcs_byte_t firstByte; - - U_ASSERT(xcnv); - U_ASSERT(group 0) { - firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8)); - } else { - /* most common failure mode is an unassigned character */ - groups_tried[group] = TRUE; - return 0; - } - - *lastConverterIndex = group; - - /* All initial byte values in lower ascii range should have been caught by now, - except with the exception group. - */ - U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT)); - - /* use converted data: first write 0, 1 or two group bytes */ - if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group) - { - *pLMBCS++ = group; - if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START) - { - *pLMBCS++ = group; - } - } - - /* don't emit control chars */ - if ( bytesConverted == 1 && firstByte < 0x20 ) - return 0; - - - /* then move over the converted data */ - switch(bytesConverted) - { - case 4: - *pLMBCS++ = (ulmbcs_byte_t)(value >> 24); - U_FALLTHROUGH; - case 3: - *pLMBCS++ = (ulmbcs_byte_t)(value >> 16); - U_FALLTHROUGH; - case 2: - *pLMBCS++ = (ulmbcs_byte_t)(value >> 8); - U_FALLTHROUGH; - case 1: - *pLMBCS++ = (ulmbcs_byte_t)value; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - - return (pLMBCS - pStartLMBCS); -} - - -/* This is a much simpler version of above, when we -know we are writing LMBCS using the Unicode group -*/ -static size_t -LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar) -{ - /* encode into LMBCS Unicode range */ - uint8_t LowCh = (uint8_t)(uniChar & 0x00FF); - uint8_t HighCh = (uint8_t)(uniChar >> 8); - - *pLMBCS++ = ULMBCS_GRP_UNICODE; - - if (LowCh == 0) - { - *pLMBCS++ = ULMBCS_UNICOMPATZERO; - *pLMBCS++ = HighCh; - } - else - { - *pLMBCS++ = HighCh; - *pLMBCS++ = LowCh; - } - return ULMBCS_UNICODE_SIZE; -} - - - -/* The main Unicode to LMBCS conversion function */ -static void U_CALLCONV -_LMBCSFromUnicode(UConverterFromUnicodeArgs* args, - UErrorCode* err) -{ - ulmbcs_byte_t lastConverterIndex = 0; - UChar uniChar; - ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX]; - ulmbcs_byte_t * pLMBCS; - int32_t bytes_written; - UBool groups_tried[ULMBCS_GRP_LAST+1]; - UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - int sourceIndex = 0; - - /* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS) - If that succeeds, see if it will all fit into the target & copy it over - if it does. - - We try conversions in the following order: - - 1. Single-byte ascii & special fixed control chars (&null) - 2. Look up group in table & try that (could be - A) Unicode group - B) control group, - C) national encoding, - or ambiguous SBCS or MBCS group (on to step 4...) - - 3. If its ambiguous, try this order: - A) The optimization group - B) The locale group - C) The last group that succeeded with this string. - D) every other group that's relevent (single or double) - E) If its single-byte ambiguous, try the exceptions group - - 4. And as a grand fallback: Unicode - */ - - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - ulmbcs_byte_t OldConverterIndex = 0; - - while (args->source < args->sourceLimit && !U_FAILURE(*err)) - { - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - OldConverterIndex = extraInfo->localeConverterIndex; - - if (args->target >= args->targetLimit) - { - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - uniChar = *(args->source); - bytes_written = 0; - pLMBCS = LMBCS; - - /* check cases in rough order of how common they are, for speed */ - - /* single byte matches: strategy 1 */ - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - if((uniChar>=0x80) && (uniChar<=0xff) - /*Fix for SPR#JUYA6XAERU and TSAO7GL5NK (Lotus)*/ &&(uniChar!=0xB1) &&(uniChar!=0xD7) &&(uniChar!=0xF7) - &&(uniChar!=0xB0) &&(uniChar!=0xB4) &&(uniChar!=0xB6) &&(uniChar!=0xA7) &&(uniChar!=0xA8)) - { - extraInfo->localeConverterIndex = ULMBCS_GRP_L1; - } - if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) || - uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR || - uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE - ) - { - *pLMBCS++ = (ulmbcs_byte_t ) uniChar; - bytes_written = 1; - } - - - if (!bytes_written) - { - /* Check by UNICODE range (Strategy 2) */ - ulmbcs_byte_t group = FindLMBCSUniRange(uniChar); - - if (group == ULMBCS_GRP_UNICODE) /* (Strategy 2A) */ - { - pLMBCS += LMBCSConvertUni(pLMBCS,uniChar); - - bytes_written = (int32_t)(pLMBCS - LMBCS); - } - else if (group == ULMBCS_GRP_CTRL) /* (Strategy 2B) */ - { - /* Handle control characters here */ - if (uniChar <= ULMBCS_C0END) - { - *pLMBCS++ = ULMBCS_GRP_CTRL; - *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar); - } - else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET) - { - *pLMBCS++ = ULMBCS_GRP_CTRL; - *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF); - } - bytes_written = (int32_t)(pLMBCS - LMBCS); - } - else if (group < ULMBCS_GRP_UNICODE) /* (Strategy 2C) */ - { - /* a specific converter has been identified - use it */ - bytes_written = (int32_t)LMBCSConversionWorker ( - extraInfo, group, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - if (!bytes_written) /* the ambiguous group cases (Strategy 3) */ - { - uprv_memset(groups_tried, 0, sizeof(groups_tried)); - - /* check for non-default optimization group (Strategy 3A )*/ - if ((extraInfo->OptGroup != 1) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))) - { - /*zhujin: upgrade, merge #39299 here (Lotus) */ - /*To make R5 compatible translation, look for exceptional group first for non-DBCS*/ - - if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - ULMBCS_GRP_L1, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - - if(!bytes_written) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - if(!bytes_written) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - extraInfo->localeConverterIndex, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - else - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - extraInfo->localeConverterIndex, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - /* check for locale optimization group (Strategy 3B) */ - if (!bytes_written && (extraInfo->localeConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex))) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - extraInfo->localeConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); - } - /* check for last optimization group used for this string (Strategy 3C) */ - if (!bytes_written && (lastConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex))) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - lastConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); - } - if (!bytes_written) - { - /* just check every possible matching converter (Strategy 3D) */ - ulmbcs_byte_t grp_start; - ulmbcs_byte_t grp_end; - ulmbcs_byte_t grp_ix; - grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) - ? ULMBCS_DOUBLEOPTGROUP_START - : ULMBCS_GRP_L1); - grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) - ? ULMBCS_GRP_LAST - : ULMBCS_GRP_TH); - if(group == ULMBCS_AMBIGUOUS_ALL) - { - grp_start = ULMBCS_GRP_L1; - grp_end = ULMBCS_GRP_LAST; - } - for (grp_ix = grp_start; - grp_ix <= grp_end && !bytes_written; - grp_ix++) - { - if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix]) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - grp_ix, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - /* a final conversion fallback to the exceptions group if its likely - to be single byte (Strategy 3E) */ - if (!bytes_written && grp_start == ULMBCS_GRP_L1) - { - bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, - ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, - &lastConverterIndex, groups_tried); - } - } - /* all of our other strategies failed. Fallback to Unicode. (Strategy 4)*/ - if (!bytes_written) - { - - pLMBCS += LMBCSConvertUni(pLMBCS, uniChar); - bytes_written = (int32_t)(pLMBCS - LMBCS); - } - } - } - - /* we have a translation. increment source and write as much as posible to target */ - args->source++; - pLMBCS = LMBCS; - while (args->target < args->targetLimit && bytes_written--) - { - *(args->target)++ = *pLMBCS++; - if (args->offsets) - { - *(args->offsets)++ = sourceIndex; - } - } - sourceIndex++; - if (bytes_written > 0) - { - /* write any bytes that didn't fit in target to the error buffer, - common code will move this to target if we get called back with - enough target room - */ - uint8_t * pErrorBuffer = args->converter->charErrorBuffer; - *err = U_BUFFER_OVERFLOW_ERROR; - args->converter->charErrorBufferLength = (int8_t)bytes_written; - while (bytes_written--) - { - *pErrorBuffer++ = *pLMBCS++; - } - } - /*Fix for SPR#DJOE66JFN3 (Lotus)*/ - extraInfo->localeConverterIndex = OldConverterIndex; - } -} - - -/* Now, the Unicode from LMBCS section */ - - -/* A function to call when we are looking at the Unicode group byte in LMBCS */ -static UChar -GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */ -{ - uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/ - uint8_t LowCh = *(*ppLMBCSin)++; - - if (HighCh == ULMBCS_UNICOMPATZERO ) - { - HighCh = LowCh; - LowCh = 0; /* zero-byte in LSB special character */ - } - return (UChar)((HighCh << 8) | LowCh); -} - - - -/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index' - bytes left in source up to sourceLimit.Errors appropriately if not. - If we reach the limit, then update the source pointer to there to consume - all input as required by ICU converter semantics. -*/ - -#define CHECK_SOURCE_LIMIT(index) \ - if (args->source+index > args->sourceLimit){\ - *err = U_TRUNCATED_CHAR_FOUND;\ - args->source = args->sourceLimit;\ - return 0xffff;} - -/* Return the Unicode representation for the current LMBCS character */ - -static UChar32 U_CALLCONV -_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - UChar32 uniChar = 0; /* an output UNICODE char */ - ulmbcs_byte_t CurByte; /* A byte from the input stream */ - - /* error check */ - if (args->source >= args->sourceLimit) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - /* Grab first byte & save address for error recovery */ - CurByte = *((ulmbcs_byte_t *) (args->source++)); - - /* - * at entry of each if clause: - * 1. 'CurByte' points at the first byte of a LMBCS character - * 2. '*source'points to the next byte of the source stream after 'CurByte' - * - * the job of each if clause is: - * 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte) - * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately - */ - - /* First lets check the simple fixed values. */ - - if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START)) /* ascii range */ - || (CurByte == 0) - || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR - || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE) - { - uniChar = CurByte; - } - else - { - UConverterDataLMBCS * extraInfo; - ulmbcs_byte_t group; - UConverterSharedData *cnv; - - if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */ - { - ulmbcs_byte_t C0C1byte; - CHECK_SOURCE_LIMIT(1); - C0C1byte = *(args->source)++; - uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte; - } - else - if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */ - { - CHECK_SOURCE_LIMIT(2); - - /* don't check for error indicators fffe/ffff below */ - return GetUniFromLMBCSUni(&(args->source)); - } - else if (CurByte <= ULMBCS_CTRLOFFSET) - { - group = CurByte; /* group byte is in the source */ - extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL) - { - /* this is not a valid group byte - no converter*/ - *err = U_INVALID_CHAR_FOUND; - } - else if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ - { - - CHECK_SOURCE_LIMIT(2); - - /* check for LMBCS doubled-group-byte case */ - if (*args->source == group) { - /* single byte */ - ++args->source; - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE); - ++args->source; - } else { - /* double byte */ - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE); - args->source += 2; - } - } - else { /* single byte conversion */ - CHECK_SOURCE_LIMIT(1); - CurByte = *(args->source)++; - - if (CurByte >= ULMBCS_C1START) - { - uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); - } - else - { - /* The non-optimizable oddballs where there is an explicit byte - * AND the second byte is not in the upper ascii range - */ - char bytes[2]; - - extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT]; - - /* Lookup value must include opt group */ - bytes[0] = group; - bytes[1] = CurByte; - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE); - } - } - } - else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */ - { - extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; - group = extraInfo->OptGroup; - cnv = extraInfo->OptGrpConverter[group]; - if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ - { - if (!ucnv_MBCSIsLeadByte(cnv, CurByte)) - { - CHECK_SOURCE_LIMIT(0); - - /* let the MBCS conversion consume CurByte again */ - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE); - } - else - { - CHECK_SOURCE_LIMIT(1); - /* let the MBCS conversion consume CurByte again */ - uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE); - ++args->source; - } - } - else /* single byte conversion */ - { - uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); - } - } - } - return uniChar; -} - - -/* The exported function that converts lmbcs to one or more - UChars - currently UTF-16 -*/ -static void U_CALLCONV -_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - char LMBCS [ULMBCS_CHARSIZE_MAX]; - UChar uniChar; /* one output UNICODE char */ - const char * saveSource; /* beginning of current code point */ - const char * pStartLMBCS = args->source; /* beginning of whole string */ - const char * errSource = NULL; /* pointer to actual input in case an error occurs */ - int8_t savebytes = 0; - - /* Process from source to limit, or until error */ - while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target) - { - saveSource = args->source; /* beginning of current code point */ - - if (args->converter->toULength) /* reassemble char from previous call */ - { - const char *saveSourceLimit; - size_t size_old = args->converter->toULength; - - /* limit from source is either remainder of temp buffer, or user limit on source */ - size_t size_new_maybe_1 = sizeof(LMBCS) - size_old; - size_t size_new_maybe_2 = args->sourceLimit - args->source; - size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2; - - - uprv_memcpy(LMBCS, args->converter->toUBytes, size_old); - uprv_memcpy(LMBCS + size_old, args->source, size_new); - saveSourceLimit = args->sourceLimit; - args->source = errSource = LMBCS; - args->sourceLimit = LMBCS+size_old+size_new; - savebytes = (int8_t)(size_old+size_new); - uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); - args->source = saveSource + ((args->source - LMBCS) - size_old); - args->sourceLimit = saveSourceLimit; - - if (*err == U_TRUNCATED_CHAR_FOUND) - { - /* evil special case: source buffers so small a char spans more than 2 buffers */ - args->converter->toULength = savebytes; - uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes); - args->source = args->sourceLimit; - *err = U_ZERO_ERROR; - return; - } - else - { - /* clear the partial-char marker */ - args->converter->toULength = 0; - } - } - else - { - errSource = saveSource; - uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); - savebytes = (int8_t)(args->source - saveSource); - } - if (U_SUCCESS(*err)) - { - if (uniChar < 0xfffe) - { - *(args->target)++ = uniChar; - if(args->offsets) - { - *(args->offsets)++ = (int32_t)(saveSource - pStartLMBCS); - } - } - else if (uniChar == 0xfffe) - { - *err = U_INVALID_CHAR_FOUND; - } - else /* if (uniChar == 0xffff) */ - { - *err = U_ILLEGAL_CHAR_FOUND; - } - } - } - /* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */ - if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - else if (U_FAILURE(*err)) - { - /* If character incomplete or unmappable/illegal, store it in toUBytes[] */ - args->converter->toULength = savebytes; - if (savebytes > 0) { - uprv_memcpy(args->converter->toUBytes, errSource, savebytes); - } - if (*err == U_TRUNCATED_CHAR_FOUND) { - *err = U_ZERO_ERROR; - } - } -} - -/* And now, the macroized declarations of data & functions: */ -DEFINE_LMBCS_OPEN(1) -DEFINE_LMBCS_OPEN(2) -DEFINE_LMBCS_OPEN(3) -DEFINE_LMBCS_OPEN(4) -DEFINE_LMBCS_OPEN(5) -DEFINE_LMBCS_OPEN(6) -DEFINE_LMBCS_OPEN(8) -DEFINE_LMBCS_OPEN(11) -DEFINE_LMBCS_OPEN(16) -DEFINE_LMBCS_OPEN(17) -DEFINE_LMBCS_OPEN(18) -DEFINE_LMBCS_OPEN(19) - - -DECLARE_LMBCS_DATA(1) -DECLARE_LMBCS_DATA(2) -DECLARE_LMBCS_DATA(3) -DECLARE_LMBCS_DATA(4) -DECLARE_LMBCS_DATA(5) -DECLARE_LMBCS_DATA(6) -DECLARE_LMBCS_DATA(8) -DECLARE_LMBCS_DATA(11) -DECLARE_LMBCS_DATA(16) -DECLARE_LMBCS_DATA(17) -DECLARE_LMBCS_DATA(18) -DECLARE_LMBCS_DATA(19) - -U_CDECL_END - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ucnv_set.cpp b/deps/node/deps/icu-small/source/common/ucnv_set.cpp deleted file mode 100644 index 926cee0d..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_set.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2007, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucnv_set.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004sep07 -* created by: Markus W. Scherer -* -* Conversion API functions using USet (ucnv_getUnicodeSet()) -* moved here from ucnv.c for removing the dependency of other ucnv_ -* implementation functions on the USet implementation. -*/ - -#include "unicode/utypes.h" -#include "unicode/uset.h" -#include "unicode/ucnv.h" -#include "ucnv_bld.h" -#include "uset_imp.h" - -#if !UCONFIG_NO_CONVERSION - -U_CAPI void U_EXPORT2 -ucnv_getUnicodeSet(const UConverter *cnv, - USet *setFillIn, - UConverterUnicodeSet whichSet, - UErrorCode *pErrorCode) { - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - if(cnv==NULL || setFillIn==NULL || whichSetsharedData->impl->getUnicodeSet==NULL) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return; - } - - { - USetAdder sa={ - NULL, - uset_add, - uset_addRange, - uset_addString, - uset_remove, - uset_removeRange - }; - sa.set=setFillIn; - - /* empty the set */ - uset_clear(setFillIn); - - /* call the converter to add the code points it supports */ - cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode); - } -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_u16.cpp b/deps/node/deps/icu-small/source/common/ucnv_u16.cpp deleted file mode 100644 index 6c1b87d3..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_u16.cpp +++ /dev/null @@ -1,1579 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u16.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-16 converter implementation. Used to be in ucnv_utf.c. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/uversion.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -enum { - UCNV_NEED_TO_WRITE_BOM=1 -}; - -U_CDECL_BEGIN -/* - * The UTF-16 toUnicode implementation is also used for the Java-specific - * "with BOM" variants of UTF-16BE and UTF-16LE. - */ -static void U_CALLCONV -_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode); - -/* UTF-16BE ----------------------------------------------------------------- */ - -#if U_IS_BIG_ENDIAN -# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets -#else -# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets -#endif - - -static void U_CALLCONV -_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source; - char *target; - int32_t *offsets; - - uint32_t targetCapacity, length, sourceIndex; - UChar c, trail; - char overflow[4]; - - source=pArgs->source; - length=(int32_t)(pArgs->sourceLimit-source); - if(length<=0) { - /* no input, nothing to do */ - return; - } - - cnv=pArgs->converter; - - /* write the BOM if necessary */ - if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xfeu, (char)0xffu }; - ucnv_fromUWriteBytes(cnv, - bom, 2, - &pArgs->target, pArgs->targetLimit, - &pArgs->offsets, -1, - pErrorCode); - cnv->fromUnicodeStatus=0; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-target); - offsets=pArgs->offsets; - sourceIndex=0; - - /* c!=0 indicates in several places outside the main loops that a surrogate was found */ - - if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { - /* the last buffer ended with a lead surrogate, output the surrogate pair */ - ++source; - --length; - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target[2]=(uint8_t)(trail>>8); - target[3]=(uint8_t)trail; - target+=4; - targetCapacity-=4; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - } - sourceIndex=1; - cnv->fromUChar32=c=0; - } - - if(c==0) { - /* copy an even number of bytes for complete UChars */ - uint32_t count=2*length; - if(count>targetCapacity) { - count=targetCapacity&~1; - } - /* count is even */ - targetCapacity-=count; - count>>=1; - length-=count; - - if(offsets==NULL) { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target[2]=(uint8_t)(trail>>8); - target[3]=(uint8_t)trail; - target+=4; - } else { - break; - } - --count; - } - } else { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target+=2; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)(c>>8); - target[1]=(uint8_t)c; - target[2]=(uint8_t)(trail>>8); - target[3]=(uint8_t)trail; - target+=4; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=2; - } else { - break; - } - --count; - } - } - - if(count==0) { - /* done with the loop for complete UChars */ - if(length>0 && targetCapacity>0) { - /* - * there is more input and some target capacity - - * it must be targetCapacity==1 because otherwise - * the above would have copied more; - * prepare for overflow output - */ - if(U16_IS_SINGLE(c=*source++)) { - overflow[0]=(char)(c>>8); - overflow[1]=(char)c; - length=2; /* 2 bytes to output */ - c=0; - /* } else { keep c for surrogate handling, length will be set there */ - } - } else { - length=0; - c=0; - } - } else { - /* keep c for surrogate handling, length will be set there */ - targetCapacity+=2*count; - } - } else { - length=0; /* from here on, length counts the bytes in overflow[] */ - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - length=0; - if(U16_IS_SURROGATE_LEAD(c)) { - if(sourcesourceLimit) { - if(U16_IS_TRAIL(trail=*source)) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - ++source; - overflow[0]=(char)(c>>8); - overflow[1]=(char)c; - overflow[2]=(char)(trail>>8); - overflow[3]=(char)trail; - length=4; /* 4 bytes to output */ - c=0; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - cnv->fromUChar32=c; - } - - if(length>0) { - /* output length bytes with overflow (length>targetCapacity>0) */ - ucnv_fromUWriteBytes(cnv, - overflow, length, - (char **)&target, pArgs->targetLimit, - &offsets, sourceIndex, - pErrorCode); - targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); - } - - if(U_SUCCESS(*pErrorCode) && sourcesourceLimit && targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -static void U_CALLCONV -_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source; - UChar *target; - int32_t *offsets; - - uint32_t targetCapacity, length, count, sourceIndex; - UChar c, trail; - - if(pArgs->converter->mode<8) { - _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); - return; - } - - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); - if(length<=0 && cnv->toUnicodeStatus==0) { - /* no input, nothing to do */ - return; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-target); - offsets=pArgs->offsets; - sourceIndex=0; - c=0; - - /* complete a partial UChar or pair from the last call */ - if(cnv->toUnicodeStatus!=0) { - /* - * special case: single byte from a previous buffer, - * where the byte turned out not to belong to a trail surrogate - * and the preceding, unmatched lead surrogate was put into toUBytes[] - * for error handling - */ - cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; - cnv->toULength=1; - cnv->toUnicodeStatus=0; - } - if((count=cnv->toULength)!=0) { - uint8_t *p=cnv->toUBytes; - do { - p[count++]=*source++; - ++sourceIndex; - --length; - if(count==2) { - c=((UChar)p[0]<<8)|p[1]; - if(U16_IS_SINGLE(c)) { - /* output the BMP code point */ - *target++=c; - if(offsets!=NULL) { - *offsets++=-1; - } - --targetCapacity; - count=0; - c=0; - break; - } else if(U16_IS_SURROGATE_LEAD(c)) { - /* continue collecting bytes for the trail surrogate */ - c=0; /* avoid unnecessary surrogate handling below */ - } else { - /* fall through to error handling for an unmatched trail surrogate */ - break; - } - } else if(count==4) { - c=((UChar)p[0]<<8)|p[1]; - trail=((UChar)p[2]<<8)|p[3]; - if(U16_IS_TRAIL(trail)) { - /* output the surrogate pair */ - *target++=c; - if(targetCapacity>=2) { - *target++=trail; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - } - targetCapacity-=2; - } else /* targetCapacity==1 */ { - targetCapacity=0; - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - count=0; - c=0; - break; - } else { - /* unmatched lead surrogate, handle here for consistent toUBytes[] */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - - /* back out reading the code unit after it */ - if(((const uint8_t *)pArgs->source-source)>=2) { - source-=2; - } else { - /* - * if the trail unit's first byte was in a previous buffer, then - * we need to put it into a special place because toUBytes[] will be - * used for the lead unit's bytes - */ - cnv->toUnicodeStatus=0x100|p[2]; - --source; - } - cnv->toULength=2; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; - } - } - } while(length>0); - cnv->toULength=(int8_t)count; - } - - /* copy an even number of bytes for complete UChars */ - count=2*targetCapacity; - if(count>length) { - count=length&~1; - } - if(c==0 && count>0) { - length-=count; - count>>=1; - targetCapacity-=count; - if(offsets==NULL) { - do { - c=((UChar)source[0]<<8)|source[1]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - } else { - break; - } - } while(--count>0); - } else { - do { - c=((UChar)source[0]<<8)|source[1]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - *offsets++=sourceIndex; - sourceIndex+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=4; - } else { - break; - } - } while(--count>0); - } - - if(count==0) { - /* done with the loop for complete UChars */ - c=0; - } else { - /* keep c for surrogate handling, trail will be set there */ - length+=2*(count-1); /* one more byte pair was consumed than count decremented */ - targetCapacity+=count; - } - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - cnv->toUBytes[0]=(uint8_t)(c>>8); - cnv->toUBytes[1]=(uint8_t)c; - cnv->toULength=2; - - if(U16_IS_SURROGATE_LEAD(c)) { - if(length>=2) { - if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - source+=2; - length-=2; - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - cnv->toULength=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } - - if(U_SUCCESS(*pErrorCode)) { - /* check for a remaining source byte */ - if(length>0) { - if(targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* it must be length==1 because otherwise the above would have copied more */ - cnv->toUBytes[cnv->toULength++]=*source++; - } - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -static UChar32 U_CALLCONV -_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { - const uint8_t *s, *sourceLimit; - UChar32 c; - - if(pArgs->converter->mode<8) { - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } - - s=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - - if(s>=sourceLimit) { - /* no input */ - *err=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - if(s+2>sourceLimit) { - /* only one byte: truncated UChar */ - pArgs->converter->toUBytes[0]=*s++; - pArgs->converter->toULength=1; - pArgs->source=(const char *)s; - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* get one UChar */ - c=((UChar32)*s<<8)|s[1]; - s+=2; - - /* check for a surrogate pair */ - if(U_IS_SURROGATE(c)) { - if(U16_IS_SURROGATE_LEAD(c)) { - if(s+2<=sourceLimit) { - UChar trail; - - /* get a second UChar and see if it is a trail surrogate */ - trail=((UChar)*s<<8)|s[1]; - if(U16_IS_TRAIL(trail)) { - c=U16_GET_SUPPLEMENTARY(c, trail); - s+=2; - } else { - /* unmatched lead surrogate */ - c=-2; - } - } else { - /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ - uint8_t *bytes=pArgs->converter->toUBytes; - s-=2; - pArgs->converter->toULength=(int8_t)(sourceLimit-s); - do { - *bytes++=*s++; - } while(sconverter->toUBytes; - pArgs->converter->toULength=2; - *bytes=*(s-2); - bytes[1]=*(s-1); - - c=0xffff; - *err=U_ILLEGAL_CHAR_FOUND; - } - } - - pArgs->source=(const char *)s; - return c; -} - -static void U_CALLCONV -_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode state */ - if(UCNV_GET_VERSION(cnv)==0) { - cnv->mode=8; /* no BOM handling */ - } else { - cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */ - } - } - if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { - /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} - -static void U_CALLCONV -_UTF16BEOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - (void)pArgs; - if(UCNV_GET_VERSION(cnv)<=1) { - _UTF16BEReset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const char * U_CALLCONV -_UTF16BEGetName(const UConverter *cnv) { - if(UCNV_GET_VERSION(cnv)==0) { - return "UTF-16BE"; - } else { - return "UTF-16BE,version=1"; - } -} -U_CDECL_END - -static const UConverterImpl _UTF16BEImpl={ - UCNV_UTF16_BigEndian, - - NULL, - NULL, - - _UTF16BEOpen, - NULL, - _UTF16BEReset, - - _UTF16BEToUnicodeWithOffsets, - _UTF16BEToUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16BEGetNextUChar, - - NULL, - _UTF16BEGetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF16BEStaticData={ - sizeof(UConverterStaticData), - "UTF-16BE", - 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, - { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF16BEData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl); - -/* UTF-16LE ----------------------------------------------------------------- */ -U_CDECL_BEGIN -static void U_CALLCONV -_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source; - char *target; - int32_t *offsets; - - uint32_t targetCapacity, length, sourceIndex; - UChar c, trail; - char overflow[4]; - - source=pArgs->source; - length=(int32_t)(pArgs->sourceLimit-source); - if(length<=0) { - /* no input, nothing to do */ - return; - } - - cnv=pArgs->converter; - - /* write the BOM if necessary */ - if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xffu, (char)0xfeu }; - ucnv_fromUWriteBytes(cnv, - bom, 2, - &pArgs->target, pArgs->targetLimit, - &pArgs->offsets, -1, - pErrorCode); - cnv->fromUnicodeStatus=0; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - sourceIndex=0; - - /* c!=0 indicates in several places outside the main loops that a surrogate was found */ - - if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { - /* the last buffer ended with a lead surrogate, output the surrogate pair */ - ++source; - --length; - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target[2]=(uint8_t)trail; - target[3]=(uint8_t)(trail>>8); - target+=4; - targetCapacity-=4; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - *offsets++=-1; - } - sourceIndex=1; - cnv->fromUChar32=c=0; - } - - if(c==0) { - /* copy an even number of bytes for complete UChars */ - uint32_t count=2*length; - if(count>targetCapacity) { - count=targetCapacity&~1; - } - /* count is even */ - targetCapacity-=count; - count>>=1; - length-=count; - - if(offsets==NULL) { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target[2]=(uint8_t)trail; - target[3]=(uint8_t)(trail>>8); - target+=4; - } else { - break; - } - --count; - } - } else { - while(count>0) { - c=*source++; - if(U16_IS_SINGLE(c)) { - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target+=2; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { - ++source; - --count; - target[0]=(uint8_t)c; - target[1]=(uint8_t)(c>>8); - target[2]=(uint8_t)trail; - target[3]=(uint8_t)(trail>>8); - target+=4; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=2; - } else { - break; - } - --count; - } - } - - if(count==0) { - /* done with the loop for complete UChars */ - if(length>0 && targetCapacity>0) { - /* - * there is more input and some target capacity - - * it must be targetCapacity==1 because otherwise - * the above would have copied more; - * prepare for overflow output - */ - if(U16_IS_SINGLE(c=*source++)) { - overflow[0]=(char)c; - overflow[1]=(char)(c>>8); - length=2; /* 2 bytes to output */ - c=0; - /* } else { keep c for surrogate handling, length will be set there */ - } - } else { - length=0; - c=0; - } - } else { - /* keep c for surrogate handling, length will be set there */ - targetCapacity+=2*count; - } - } else { - length=0; /* from here on, length counts the bytes in overflow[] */ - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - length=0; - if(U16_IS_SURROGATE_LEAD(c)) { - if(sourcesourceLimit) { - if(U16_IS_TRAIL(trail=*source)) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - ++source; - overflow[0]=(char)c; - overflow[1]=(char)(c>>8); - overflow[2]=(char)trail; - overflow[3]=(char)(trail>>8); - length=4; /* 4 bytes to output */ - c=0; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - cnv->fromUChar32=c; - } - - if(length>0) { - /* output length bytes with overflow (length>targetCapacity>0) */ - ucnv_fromUWriteBytes(cnv, - overflow, length, - &target, pArgs->targetLimit, - &offsets, sourceIndex, - pErrorCode); - targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); - } - - if(U_SUCCESS(*pErrorCode) && sourcesourceLimit && targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -static void U_CALLCONV -_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source; - UChar *target; - int32_t *offsets; - - uint32_t targetCapacity, length, count, sourceIndex; - UChar c, trail; - - if(pArgs->converter->mode<8) { - _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); - return; - } - - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); - if(length<=0 && cnv->toUnicodeStatus==0) { - /* no input, nothing to do */ - return; - } - - target=pArgs->target; - if(target >= pArgs->targetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return; - } - - targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - sourceIndex=0; - c=0; - - /* complete a partial UChar or pair from the last call */ - if(cnv->toUnicodeStatus!=0) { - /* - * special case: single byte from a previous buffer, - * where the byte turned out not to belong to a trail surrogate - * and the preceding, unmatched lead surrogate was put into toUBytes[] - * for error handling - */ - cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; - cnv->toULength=1; - cnv->toUnicodeStatus=0; - } - if((count=cnv->toULength)!=0) { - uint8_t *p=cnv->toUBytes; - do { - p[count++]=*source++; - ++sourceIndex; - --length; - if(count==2) { - c=((UChar)p[1]<<8)|p[0]; - if(U16_IS_SINGLE(c)) { - /* output the BMP code point */ - *target++=c; - if(offsets!=NULL) { - *offsets++=-1; - } - --targetCapacity; - count=0; - c=0; - break; - } else if(U16_IS_SURROGATE_LEAD(c)) { - /* continue collecting bytes for the trail surrogate */ - c=0; /* avoid unnecessary surrogate handling below */ - } else { - /* fall through to error handling for an unmatched trail surrogate */ - break; - } - } else if(count==4) { - c=((UChar)p[1]<<8)|p[0]; - trail=((UChar)p[3]<<8)|p[2]; - if(U16_IS_TRAIL(trail)) { - /* output the surrogate pair */ - *target++=c; - if(targetCapacity>=2) { - *target++=trail; - if(offsets!=NULL) { - *offsets++=-1; - *offsets++=-1; - } - targetCapacity-=2; - } else /* targetCapacity==1 */ { - targetCapacity=0; - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - count=0; - c=0; - break; - } else { - /* unmatched lead surrogate, handle here for consistent toUBytes[] */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - - /* back out reading the code unit after it */ - if(((const uint8_t *)pArgs->source-source)>=2) { - source-=2; - } else { - /* - * if the trail unit's first byte was in a previous buffer, then - * we need to put it into a special place because toUBytes[] will be - * used for the lead unit's bytes - */ - cnv->toUnicodeStatus=0x100|p[2]; - --source; - } - cnv->toULength=2; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; - } - } - } while(length>0); - cnv->toULength=(int8_t)count; - } - - /* copy an even number of bytes for complete UChars */ - count=2*targetCapacity; - if(count>length) { - count=length&~1; - } - if(c==0 && count>0) { - length-=count; - count>>=1; - targetCapacity-=count; - if(offsets==NULL) { - do { - c=((UChar)source[1]<<8)|source[0]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - } else { - break; - } - } while(--count>0); - } else { - do { - c=((UChar)source[1]<<8)|source[0]; - source+=2; - if(U16_IS_SINGLE(c)) { - *target++=c; - *offsets++=sourceIndex; - sourceIndex+=2; - } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && - U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) - ) { - source+=2; - --count; - *target++=c; - *target++=trail; - *offsets++=sourceIndex; - *offsets++=sourceIndex; - sourceIndex+=4; - } else { - break; - } - } while(--count>0); - } - - if(count==0) { - /* done with the loop for complete UChars */ - c=0; - } else { - /* keep c for surrogate handling, trail will be set there */ - length+=2*(count-1); /* one more byte pair was consumed than count decremented */ - targetCapacity+=count; - } - } - - if(c!=0) { - /* - * c is a surrogate, and - * - source or target too short - * - or the surrogate is unmatched - */ - cnv->toUBytes[0]=(uint8_t)c; - cnv->toUBytes[1]=(uint8_t)(c>>8); - cnv->toULength=2; - - if(U16_IS_SURROGATE_LEAD(c)) { - if(length>=2) { - if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) { - /* output the surrogate pair, will overflow (see conditions comment above) */ - source+=2; - length-=2; - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - cnv->UCharErrorBuffer[0]=trail; - cnv->UCharErrorBufferLength=1; - cnv->toULength=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* unmatched lead surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* see if the trail surrogate is in the next buffer */ - } - } else { - /* unmatched trail surrogate */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } - - if(U_SUCCESS(*pErrorCode)) { - /* check for a remaining source byte */ - if(length>0) { - if(targetCapacity==0) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - /* it must be length==1 because otherwise the above would have copied more */ - cnv->toUBytes[cnv->toULength++]=*source++; - } - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -static UChar32 U_CALLCONV -_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { - const uint8_t *s, *sourceLimit; - UChar32 c; - - if(pArgs->converter->mode<8) { - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } - - s=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - - if(s>=sourceLimit) { - /* no input */ - *err=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - if(s+2>sourceLimit) { - /* only one byte: truncated UChar */ - pArgs->converter->toUBytes[0]=*s++; - pArgs->converter->toULength=1; - pArgs->source=(const char *)s; - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* get one UChar */ - c=((UChar32)s[1]<<8)|*s; - s+=2; - - /* check for a surrogate pair */ - if(U_IS_SURROGATE(c)) { - if(U16_IS_SURROGATE_LEAD(c)) { - if(s+2<=sourceLimit) { - UChar trail; - - /* get a second UChar and see if it is a trail surrogate */ - trail=((UChar)s[1]<<8)|*s; - if(U16_IS_TRAIL(trail)) { - c=U16_GET_SUPPLEMENTARY(c, trail); - s+=2; - } else { - /* unmatched lead surrogate */ - c=-2; - } - } else { - /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ - uint8_t *bytes=pArgs->converter->toUBytes; - s-=2; - pArgs->converter->toULength=(int8_t)(sourceLimit-s); - do { - *bytes++=*s++; - } while(sconverter->toUBytes; - pArgs->converter->toULength=2; - *bytes=*(s-2); - bytes[1]=*(s-1); - - c=0xffff; - *err=U_ILLEGAL_CHAR_FOUND; - } - } - - pArgs->source=(const char *)s; - return c; -} - -static void U_CALLCONV -_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode state */ - if(UCNV_GET_VERSION(cnv)==0) { - cnv->mode=8; /* no BOM handling */ - } else { - cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */ - } - } - if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { - /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} - -static void U_CALLCONV -_UTF16LEOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - (void)pArgs; - if(UCNV_GET_VERSION(cnv)<=1) { - _UTF16LEReset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const char * U_CALLCONV -_UTF16LEGetName(const UConverter *cnv) { - if(UCNV_GET_VERSION(cnv)==0) { - return "UTF-16LE"; - } else { - return "UTF-16LE,version=1"; - } -} -U_CDECL_END - -static const UConverterImpl _UTF16LEImpl={ - UCNV_UTF16_LittleEndian, - - NULL, - NULL, - - _UTF16LEOpen, - NULL, - _UTF16LEReset, - - _UTF16LEToUnicodeWithOffsets, - _UTF16LEToUnicodeWithOffsets, - _UTF16LEFromUnicodeWithOffsets, - _UTF16LEFromUnicodeWithOffsets, - _UTF16LEGetNextUChar, - - NULL, - _UTF16LEGetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - - -static const UConverterStaticData _UTF16LEStaticData={ - sizeof(UConverterStaticData), - "UTF-16LE", - 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, - { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF16LEData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl); - -/* UTF-16 (Detect BOM) ------------------------------------------------------ */ - -/* - * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE - * accordingly. - * This is a simpler version of the UTF-32 converter, with - * fewer states for shorter BOMs. - * - * State values: - * 0 initial state - * 1 saw first byte - * 2..5 - - * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1 - * 8 UTF-16BE mode - * 9 UTF-16LE mode - * - * During detection: state==number of initial bytes seen so far. - * - * On output, emit U+FEFF as the first code point. - * - * Variants: - * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error. - * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and - * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error. - */ -U_CDECL_BEGIN -static void U_CALLCONV -_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode: state=0 */ - cnv->mode=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode: prepare to output the UTF-16PE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} -U_CDECL_END -extern const UConverterSharedData _UTF16v2Data; -U_CDECL_BEGIN -static void U_CALLCONV -_UTF16Open(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - if(UCNV_GET_VERSION(cnv)<=2) { - if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) { - /* - * Switch implementation, and switch the staticData that's different - * and was copied into the UConverter. - * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.) - * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream. - */ - cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data; - uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN); - } - _UTF16Reset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const char * U_CALLCONV -_UTF16GetName(const UConverter *cnv) { - if(UCNV_GET_VERSION(cnv)==0) { - return "UTF-16"; - } else if(UCNV_GET_VERSION(cnv)==1) { - return "UTF-16,version=1"; - } else { - return "UTF-16,version=2"; - } -} -U_CDECL_END -extern const UConverterSharedData _UTF16Data; - -static inline bool IS_UTF16BE(const UConverter *cnv) { - return ((cnv)->sharedData == &_UTF16BEData); -} - -static inline bool IS_UTF16LE(const UConverter *cnv) { - return ((cnv)->sharedData == &_UTF16LEData); -} - -static inline bool IS_UTF16(const UConverter *cnv) { - return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data); -} - -U_CDECL_BEGIN -static void U_CALLCONV -_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv=pArgs->converter; - const char *source=pArgs->source; - const char *sourceLimit=pArgs->sourceLimit; - int32_t *offsets=pArgs->offsets; - - int32_t state, offsetDelta; - uint8_t b; - - state=cnv->mode; - - /* - * If we detect a BOM in this buffer, then we must add the BOM size to the - * offsets because the actual converter function will not see and count the BOM. - * offsetDelta will have the number of the BOM bytes that are in the current buffer. - */ - offsetDelta=0; - - while(sourcetoUBytes[0]=(uint8_t)*source++; - cnv->toULength=1; - state=1; - break; - case 1: - /* - * Only inside this switch case can the state variable - * temporarily take two additional values: - * 6: BOM error, continue with BE - * 7: BOM error, continue with LE - */ - b=*source; - if(cnv->toUBytes[0]==0xfe && b==0xff) { - if(IS_UTF16LE(cnv)) { - state=7; /* illegal reverse BOM for Java "UnicodeLittle" */ - } else { - state=8; /* detect UTF-16BE */ - } - } else if(cnv->toUBytes[0]==0xff && b==0xfe) { - if(IS_UTF16BE(cnv)) { - state=6; /* illegal reverse BOM for Java "UnicodeBig" */ - } else { - state=9; /* detect UTF-16LE */ - } - } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) { - state=6; /* illegal missing BOM for Java "Unicode" */ - } - if(state>=8) { - /* BOM detected, consume it */ - ++source; - cnv->toULength=0; - offsetDelta=(int32_t)(source-pArgs->source); - } else if(state<6) { - /* ok: no BOM, and not a reverse BOM */ - if(source!=pArgs->source) { - /* reset the source for a correct first offset */ - source=pArgs->source; - cnv->toULength=0; - } - if(IS_UTF16LE(cnv)) { - /* Make Java "UnicodeLittle" default to LE. */ - state=9; - } else { - /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */ - state=8; - } - } else { - /* - * error: missing BOM, or reverse BOM - * UTF-16,version=1: Java-specific "Unicode" requires a BOM. - * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM. - * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM. - */ - /* report the non-BOM or reverse BOM as an illegal sequence */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - pArgs->source=source+1; - /* continue with conversion if the callback resets the error */ - /* - * Make Java "Unicode" default to BE like standard UTF-16. - * Make Java "UnicodeBig" and "UnicodeLittle" default - * to their normal endiannesses. - */ - cnv->mode=state+2; - *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; - return; - } - /* convert the rest of the stream */ - cnv->mode=state; - continue; - case 8: - /* call UTF-16BE */ - pArgs->source=source; - _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); - source=pArgs->source; - break; - case 9: - /* call UTF-16LE */ - pArgs->source=source; - _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); - source=pArgs->source; - break; - default: - break; /* does not occur */ - } - } - - /* add BOM size to offsets - see comment at offsetDelta declaration */ - if(offsets!=NULL && offsetDelta!=0) { - int32_t *offsetsLimit=pArgs->offsets; - while(offsetssource=source; - - if(source==sourceLimit && pArgs->flush) { - /* handle truncated input */ - switch(state) { - case 0: - break; /* no input at all, nothing to do */ - case 8: - _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); - break; - case 9: - _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); - break; - default: - /* 0mode=state; -} - -static UChar32 U_CALLCONV -_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - switch(pArgs->converter->mode) { - case 8: - return _UTF16BEGetNextUChar(pArgs, pErrorCode); - case 9: - return _UTF16LEGetNextUChar(pArgs, pErrorCode); - default: - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } -} -U_CDECL_END - -static const UConverterImpl _UTF16Impl = { - UCNV_UTF16, - - NULL, - NULL, - - _UTF16Open, - NULL, - _UTF16Reset, - - _UTF16ToUnicodeWithOffsets, - _UTF16ToUnicodeWithOffsets, - _UTF16PEFromUnicodeWithOffsets, - _UTF16PEFromUnicodeWithOffsets, - _UTF16GetNextUChar, - - NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ - _UTF16GetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF16StaticData = { - sizeof(UConverterStaticData), - "UTF-16", - 1204, /* CCSID for BOM sensitive UTF-16 */ - UCNV_IBM, UCNV_UTF16, 2, 2, -#if U_IS_BIG_ENDIAN - { 0xff, 0xfd, 0, 0 }, 2, -#else - { 0xfd, 0xff, 0, 0 }, 2, -#endif - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF16Data = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl); - -static const UConverterImpl _UTF16v2Impl = { - UCNV_UTF16, - - NULL, - NULL, - - _UTF16Open, - NULL, - _UTF16Reset, - - _UTF16ToUnicodeWithOffsets, - _UTF16ToUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16BEFromUnicodeWithOffsets, - _UTF16GetNextUChar, - - NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ - _UTF16GetName, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF16v2StaticData = { - sizeof(UConverterStaticData), - "UTF-16,version=2", - 1204, /* CCSID for BOM sensitive UTF-16 */ - UCNV_IBM, UCNV_UTF16, 2, 2, - { 0xff, 0xfd, 0, 0 }, 2, - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF16v2Data = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_u32.cpp b/deps/node/deps/icu-small/source/common/ucnv_u32.cpp deleted file mode 100644 index 13444a3a..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_u32.cpp +++ /dev/null @@ -1,1253 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u32.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-32 converter implementation. Used to be in ucnv_utf.c. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/utf.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -#define MAXIMUM_UCS2 0x0000FFFF -#define MAXIMUM_UTF 0x0010FFFF -#define HALF_SHIFT 10 -#define HALF_BASE 0x0010000 -#define HALF_MASK 0x3FF -#define SURROGATE_HIGH_START 0xD800 -#define SURROGATE_LOW_START 0xDC00 - -/* -SURROGATE_LOW_START + HALF_BASE */ -#define SURROGATE_LOW_BASE 9216 - -enum { - UCNV_NEED_TO_WRITE_BOM=1 -}; - -/* UTF-32BE ----------------------------------------------------------------- */ -U_CDECL_BEGIN -static void U_CALLCONV -T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - - /* Restore state of current sequence */ - if (args->converter->toULength > 0 && myTarget < targetLimit) { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) { - if (mySource < sourceLimit) { - ch = (ch << 8) | (uint8_t)(*mySource); - toUBytes[i++] = (char) *(mySource++); - } - else { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) { - *(myTarget++) = (UChar)ch; - } - else { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; -} - -static void U_CALLCONV -T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - int32_t *myOffsets = args->offsets; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - int32_t offsetNum = 0; - - /* Restore state of current sequence */ - if (args->converter->toULength > 0 && myTarget < targetLimit) { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) { - if (mySource < sourceLimit) { - ch = (ch << 8) | (uint8_t)(*mySource); - toUBytes[i++] = (char) *(mySource++); - } - else { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - *myOffsets++ = offsetNum; - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - *(myOffsets++) = offsetNum; - } - else { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - offsetNum += i; - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; - args->offsets = myOffsets; -} - -static void U_CALLCONV -T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - temp[0] = 0; - - if (args->converter->fromUChar32) { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - ch = *(mySource++); - - if (U_IS_SURROGATE(ch)) { - if (U_IS_LEAD(ch)) { -lowsurogate: - if (mySource < sourceLimit) { - ch2 = *mySource; - if (U_IS_TRAIL(ch2)) { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[1] = (uint8_t) (ch >> 16 & 0x1F); - temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { - if (myTarget < targetLimit) { - *(myTarget++) = temp[indexToWrite]; - } - else { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; -} - -static void U_CALLCONV -T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - int32_t *myOffsets; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - int32_t offsetNum = 0; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - myOffsets = args->offsets; - temp[0] = 0; - - if (args->converter->fromUChar32) { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) { - ch = *(mySource++); - - if (U_IS_SURROGATE(ch)) { - if (U_IS_LEAD(ch)) { -lowsurogate: - if (mySource < sourceLimit) { - ch2 = *mySource; - if (U_IS_TRAIL(ch2)) { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[1] = (uint8_t) (ch >> 16 & 0x1F); - temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { - if (myTarget < targetLimit) { - *(myTarget++) = temp[indexToWrite]; - *(myOffsets++) = offsetNum; - } - else { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - offsetNum = offsetNum + 1 + (temp[1] != 0); - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; - args->offsets = myOffsets; -} - -static UChar32 U_CALLCONV -T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - const uint8_t *mySource; - UChar32 myUChar; - int32_t length; - - mySource = (const uint8_t *)args->source; - if (mySource >= (const uint8_t *)args->sourceLimit) - { - /* no input */ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); - if (length < 4) - { - /* got a partial character */ - uprv_memcpy(args->converter->toUBytes, mySource, length); - args->converter->toULength = (int8_t)length; - args->source = (const char *)(mySource + length); - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* Don't even try to do a direct cast because the value may be on an odd address. */ - myUChar = ((UChar32)mySource[0] << 24) - | ((UChar32)mySource[1] << 16) - | ((UChar32)mySource[2] << 8) - | ((UChar32)mySource[3]); - - args->source = (const char *)(mySource + 4); - if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { - return myUChar; - } - - uprv_memcpy(args->converter->toUBytes, mySource, 4); - args->converter->toULength = 4; - - *err = U_ILLEGAL_CHAR_FOUND; - return 0xffff; -} -U_CDECL_END -static const UConverterImpl _UTF32BEImpl = { - UCNV_UTF32_BigEndian, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - T_UConverter_toUnicode_UTF32_BE, - T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, - T_UConverter_fromUnicode_UTF32_BE, - T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, - T_UConverter_getNextUChar_UTF32_BE, - - NULL, - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ -static const UConverterStaticData _UTF32BEStaticData = { - sizeof(UConverterStaticData), - "UTF-32BE", - 1232, - UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, - { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF32BEData = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl); - -/* UTF-32LE ---------------------------------------------------------- */ -U_CDECL_BEGIN -static void U_CALLCONV -T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - - /* Restore state of current sequence */ - if (args->converter->toULength > 0 && myTarget < targetLimit) - { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - /* Stores the previously calculated ch from a previous call*/ - ch = args->converter->toUnicodeStatus - 1; - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) - { - if (mySource < sourceLimit) - { - ch |= ((uint8_t)(*mySource)) << (i * 8); - toUBytes[i++] = (char) *(mySource++); - } - else - { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) { - *(myTarget++) = (UChar)ch; - } - else { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; -} - -static void U_CALLCONV -T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - int32_t *myOffsets = args->offsets; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = args->converter->toUBytes; - uint32_t ch, i; - int32_t offsetNum = 0; - - /* Restore state of current sequence */ - if (args->converter->toULength > 0 && myTarget < targetLimit) - { - i = args->converter->toULength; /* restore # of bytes consumed */ - args->converter->toULength = 0; - - /* Stores the previously calculated ch from a previous call*/ - ch = args->converter->toUnicodeStatus - 1; - args->converter->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - i = 0; - ch = 0; -morebytes: - while (i < sizeof(uint32_t)) - { - if (mySource < sourceLimit) - { - ch |= ((uint8_t)(*mySource)) << (i * 8); - toUBytes[i++] = (char) *(mySource++); - } - else - { - /* stores a partially calculated target*/ - /* + 1 to make 0 a valid character */ - args->converter->toUnicodeStatus = ch + 1; - args->converter->toULength = (int8_t) i; - goto donefornow; - } - } - - if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) - { - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum; - } - else { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - *(myOffsets++) = offsetNum; - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - *(myOffsets++) = offsetNum; - } - else - { - /* Put in overflow buffer (not handled here) */ - args->converter->UCharErrorBuffer[0] = (UChar) ch; - args->converter->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else - { - args->converter->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - offsetNum += i; - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; - args->offsets = myOffsets; -} - -static void U_CALLCONV -T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - temp[3] = 0; - - if (args->converter->fromUChar32) - { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (U16_IS_SURROGATE(ch)) { - if (U16_IS_LEAD(ch)) - { -lowsurogate: - if (mySource < sourceLimit) - { - ch2 = *mySource; - if (U16_IS_TRAIL(ch2)) { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[2] = (uint8_t) (ch >> 16 & 0x1F); - temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) - { - if (myTarget < targetLimit) - { - *(myTarget++) = temp[indexToWrite]; - } - else - { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; -} - -static void U_CALLCONV -T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - const UChar *mySource = args->source; - unsigned char *myTarget; - int32_t *myOffsets; - const UChar *sourceLimit = args->sourceLimit; - const unsigned char *targetLimit = (unsigned char *) args->targetLimit; - UChar32 ch, ch2; - unsigned int indexToWrite; - unsigned char temp[sizeof(uint32_t)]; - int32_t offsetNum = 0; - - if(mySource >= sourceLimit) { - /* no input, nothing to do */ - return; - } - - /* write the BOM if necessary */ - if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; - ucnv_fromUWriteBytes(args->converter, - bom, 4, - &args->target, args->targetLimit, - &args->offsets, -1, - err); - args->converter->fromUnicodeStatus=0; - } - - myTarget = (unsigned char *) args->target; - myOffsets = args->offsets; - temp[3] = 0; - - if (args->converter->fromUChar32) - { - ch = args->converter->fromUChar32; - args->converter->fromUChar32 = 0; - goto lowsurogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (U16_IS_SURROGATE(ch)) { - if (U16_IS_LEAD(ch)) - { -lowsurogate: - if (mySource < sourceLimit) - { - ch2 = *mySource; - if (U16_IS_TRAIL(ch2)) - { - ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; - mySource++; - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* ran out of source */ - args->converter->fromUChar32 = ch; - if (args->flush) { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err = U_ILLEGAL_CHAR_FOUND; - } - break; - } - } - else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - args->converter->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ - temp[2] = (uint8_t) (ch >> 16 & 0x1F); - temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ - temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ - - for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) - { - if (myTarget < targetLimit) - { - *(myTarget++) = temp[indexToWrite]; - *(myOffsets++) = offsetNum; - } - else - { - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - offsetNum = offsetNum + 1 + (temp[2] != 0); - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; - args->offsets = myOffsets; -} - -static UChar32 U_CALLCONV -T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, - UErrorCode* err) -{ - const uint8_t *mySource; - UChar32 myUChar; - int32_t length; - - mySource = (const uint8_t *)args->source; - if (mySource >= (const uint8_t *)args->sourceLimit) - { - /* no input */ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); - if (length < 4) - { - /* got a partial character */ - uprv_memcpy(args->converter->toUBytes, mySource, length); - args->converter->toULength = (int8_t)length; - args->source = (const char *)(mySource + length); - *err = U_TRUNCATED_CHAR_FOUND; - return 0xffff; - } - - /* Don't even try to do a direct cast because the value may be on an odd address. */ - myUChar = ((UChar32)mySource[3] << 24) - | ((UChar32)mySource[2] << 16) - | ((UChar32)mySource[1] << 8) - | ((UChar32)mySource[0]); - - args->source = (const char *)(mySource + 4); - if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { - return myUChar; - } - - uprv_memcpy(args->converter->toUBytes, mySource, 4); - args->converter->toULength = 4; - - *err = U_ILLEGAL_CHAR_FOUND; - return 0xffff; -} -U_CDECL_END -static const UConverterImpl _UTF32LEImpl = { - UCNV_UTF32_LittleEndian, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - T_UConverter_toUnicode_UTF32_LE, - T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, - T_UConverter_fromUnicode_UTF32_LE, - T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, - T_UConverter_getNextUChar_UTF32_LE, - - NULL, - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ -static const UConverterStaticData _UTF32LEStaticData = { - sizeof(UConverterStaticData), - "UTF-32LE", - 1234, - UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, - { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF32LEData = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl); - -/* UTF-32 (Detect BOM) ------------------------------------------------------ */ - -/* - * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE - * accordingly. - * - * State values: - * 0 initial state - * 1 saw 00 - * 2 saw 00 00 - * 3 saw 00 00 FE - * 4 - - * 5 saw FF - * 6 saw FF FE - * 7 saw FF FE 00 - * 8 UTF-32BE mode - * 9 UTF-32LE mode - * - * During detection: state&3==number of matching bytes so far. - * - * On output, emit U+FEFF as the first code point. - */ -U_CDECL_BEGIN -static void U_CALLCONV -_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode: state=0 */ - cnv->mode=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode: prepare to output the UTF-32PE BOM */ - cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; - } -} - -static void U_CALLCONV -_UTF32Open(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - (void)pArgs; - (void)pErrorCode; - _UTF32Reset(cnv, UCNV_RESET_BOTH); -} - -static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 }; - -static void U_CALLCONV -_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv=pArgs->converter; - const char *source=pArgs->source; - const char *sourceLimit=pArgs->sourceLimit; - int32_t *offsets=pArgs->offsets; - - int32_t state, offsetDelta; - char b; - - state=cnv->mode; - - /* - * If we detect a BOM in this buffer, then we must add the BOM size to the - * offsets because the actual converter function will not see and count the BOM. - * offsetDelta will have the number of the BOM bytes that are in the current buffer. - */ - offsetDelta=0; - - while(sourcesource); - } else if(state==8) { - state=9; /* detect UTF-32LE */ - offsetDelta=(int32_t)(source-pArgs->source); - } - } else { - /* switch to UTF-32BE and pass the previous bytes */ - int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ - - /* reset the source */ - source=pArgs->source; - - if(count==(state&3)) { - /* simple: all in the same buffer, just reset source */ - } else { - UBool oldFlush=pArgs->flush; - - /* some of the bytes are from a previous buffer, replay those first */ - pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ - pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ - pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */ - - /* no offsets: bytes from previous buffer, and not enough for output */ - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - - /* restore real pointers; pArgs->source will be set in case 8/9 */ - pArgs->sourceLimit=sourceLimit; - pArgs->flush=oldFlush; - } - state=8; - continue; - } - break; - case 8: - /* call UTF-32BE */ - pArgs->source=source; - if(offsets==NULL) { - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - } else { - T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); - } - source=pArgs->source; - break; - case 9: - /* call UTF-32LE */ - pArgs->source=source; - if(offsets==NULL) { - T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); - } else { - T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); - } - source=pArgs->source; - break; - default: - break; /* does not occur */ - } - } - - /* add BOM size to offsets - see comment at offsetDelta declaration */ - if(offsets!=NULL && offsetDelta!=0) { - int32_t *offsetsLimit=pArgs->offsets; - while(offsetssource=source; - - if(source==sourceLimit && pArgs->flush) { - /* handle truncated input */ - switch(state) { - case 0: - break; /* no input at all, nothing to do */ - case 8: - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - break; - case 9: - T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); - break; - default: - /* handle 0source=utf32BOM+(state&4); /* select the correct BOM */ - pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ - - /* no offsets: not enough for output */ - T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); - pArgs->source=source; - pArgs->sourceLimit=sourceLimit; - state=8; - break; - } - } - - cnv->mode=state; -} - -static UChar32 U_CALLCONV -_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - switch(pArgs->converter->mode) { - case 8: - return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); - case 9: - return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); - default: - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } -} -U_CDECL_END -static const UConverterImpl _UTF32Impl = { - UCNV_UTF32, - - NULL, - NULL, - - _UTF32Open, - NULL, - _UTF32Reset, - - _UTF32ToUnicodeWithOffsets, - _UTF32ToUnicodeWithOffsets, -#if U_IS_BIG_ENDIAN - T_UConverter_fromUnicode_UTF32_BE, - T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, -#else - T_UConverter_fromUnicode_UTF32_LE, - T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, -#endif - _UTF32GetNextUChar, - - NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - NULL, - NULL -}; - -/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */ -static const UConverterStaticData _UTF32StaticData = { - sizeof(UConverterStaticData), - "UTF-32", - 1236, - UCNV_IBM, UCNV_UTF32, 4, 4, -#if U_IS_BIG_ENDIAN - { 0, 0, 0xff, 0xfd }, 4, -#else - { 0xfd, 0xff, 0, 0 }, 4, -#endif - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF32Data = - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_u7.cpp b/deps/node/deps/icu-small/source/common/ucnv_u7.cpp deleted file mode 100644 index ec7befe9..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_u7.cpp +++ /dev/null @@ -1,1491 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u7.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-7 converter implementation. Used to be in ucnv_utf.c. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "cmemory.h" -#include "unicode/ucnv.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "uassert.h" - -/* UTF-7 -------------------------------------------------------------------- */ - -/* - * UTF-7 is a stateful encoding of Unicode. - * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt) - * It was intended for use in Internet email systems, using in its bytewise - * encoding only a subset of 7-bit US-ASCII. - * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still - * occasionally used. - * - * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII - * characters directly or in base64. Especially, the characters in set O - * as defined in the RFC (see below) may be encoded directly but are not - * allowed in, e.g., email headers. - * By default, the ICU UTF-7 converter encodes set O directly. - * By choosing the option "version=1", set O will be escaped instead. - * For example: - * utf7Converter=ucnv_open("UTF-7,version=1"); - * - * For details about email headers see RFC 2047. - */ - -/* - * Tests for US-ASCII characters belonging to character classes - * defined in UTF-7. - * - * Set D (directly encoded characters) consists of the following - * characters: the upper and lower case letters A through Z - * and a through z, the 10 digits 0-9, and the following nine special - * characters (note that "+" and "=" are omitted): - * '(),-./:? - * - * Set O (optional direct characters) consists of the following - * characters (note that "\" and "~" are omitted): - * !"#$%&*;<=>@[]^_`{|} - * - * According to the rules in RFC 2152, the byte values for the following - * US-ASCII characters are not used in UTF-7 and are therefore illegal: - * - all C0 control codes except for CR LF TAB - * - BACKSLASH - * - TILDE - * - DEL - * - all codes beyond US-ASCII, i.e. all >127 - */ -#define inSetD(c) \ - ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \ - (uint8_t)((c)-48)<10 || /* digits */ \ - (uint8_t)((c)-39)<3 || /* '() */ \ - (uint8_t)((c)-44)<4 || /* ,-./ */ \ - (c)==58 || (c)==63 /* :? */ \ - ) - -#define inSetO(c) \ - ((uint8_t)((c)-33)<6 || /* !"#$%& */ \ - (uint8_t)((c)-59)<4 || /* ;<=> */ \ - (uint8_t)((c)-93)<4 || /* ]^_` */ \ - (uint8_t)((c)-123)<3 || /* {|} */ \ - (c)==42 || (c)==64 || (c)==91 /* *@[ */ \ - ) - -#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9) -#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9) - -#define PLUS 43 -#define MINUS 45 -#define BACKSLASH 92 -#define TILDE 126 - -/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */ -#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c)) - -/* encode directly sets D and O and CR LF SP TAB */ -static const UBool encodeDirectlyMaximum[128]={ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 -}; - -/* encode directly set D and CR LF SP TAB but not set O */ -static const UBool encodeDirectlyRestricted[128]={ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 -}; - -static const uint8_t -toBase64[64]={ - /* A-Z */ - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, - /* a-z */ - 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, - /* 0-9 */ - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, - /* +/ */ - 43, 47 -}; - -static const int8_t -fromBase64[128]={ - /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */ - -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3, - -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, - - /* general punctuation with + and / and a special value (-2) for - */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63, - /* digits */ - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, - - /* A-Z */ - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, - - /* a-z */ - -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3 -}; - -/* - * converter status values: - * - * toUnicodeStatus: - * 24 inDirectMode (boolean) - * 23..16 base64Counter (-1..7) - * 15..0 bits (up to 14 bits incoming base64) - * - * fromUnicodeStatus: - * 31..28 version (0: set O direct 1: set O escaped) - * 24 inDirectMode (boolean) - * 23..16 base64Counter (0..2) - * 7..0 bits (6 bits outgoing base64) - * - */ - -U_CDECL_BEGIN -static void U_CALLCONV -_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) { - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode */ - cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */ - cnv->toULength=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode */ - cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ - } -} - -static void U_CALLCONV -_UTF7Open(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - (void)pArgs; - if(UCNV_GET_VERSION(cnv)<=1) { - /* TODO(markus): Should just use cnv->options rather than copying the version number. */ - cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28; - _UTF7Reset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static void U_CALLCONV -_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - - uint8_t *bytes; - uint8_t byteIndex; - - int32_t length, targetCapacity; - - /* UTF-7 state */ - uint16_t bits; - int8_t base64Counter; - UBool inDirectMode; - - int8_t base64Value; - - int32_t sourceIndex, nextSourceIndex; - - uint8_t b; - /* set up the local pointers */ - cnv=pArgs->converter; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - /* get the state machine state */ - { - uint32_t status=cnv->toUnicodeStatus; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint16_t)status; - } - bytes=cnv->toUBytes; - byteIndex=cnv->toULength; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=byteIndex==0 ? 0 : -1; - nextSourceIndex=0; - - if(inDirectMode) { -directMode: - /* - * In Direct Mode, most US-ASCII characters are encoded directly, i.e., - * with their US-ASCII byte values. - * Backslash and Tilde and most control characters are not allowed in UTF-7. - * A plus sign starts Unicode (or "escape") Mode. - * - * In Direct Mode, only the sourceIndex is used. - */ - byteIndex=0; - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - b=*source++; - if(!isLegalUTF7(b)) { - /* illegal */ - bytes[0]=b; - byteIndex=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if(b!=PLUS) { - /* write directly encoded character */ - *target++=b; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else /* PLUS */ { - /* switch to Unicode mode */ - nextSourceIndex=++sourceIndex; - inDirectMode=FALSE; - byteIndex=0; - bits=0; - base64Counter=-1; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - /* - * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. - * The base64 sequence ends with any character that is not in the base64 alphabet. - * A terminating minus sign is consumed. - * - * In Unicode Mode, the sourceIndex has the index to the start of the current - * base64 bytes, while nextSourceIndex is precisely parallel to source, - * keeping the index to the following byte. - * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. - */ - while(source=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) { - /* either - * base64Value==-1 for any legal character except base64 and minus sign, or - * base64Value==-3 for illegal characters: - * 1. In either case, leave Unicode mode. - * 2.1. If we ended with an incomplete UChar or none after the +, then - * generate an error for the preceding erroneous sequence and deal with - * the current (possibly illegal) character next time through. - * 2.2. Else the current char comes after a complete UChar, which was already - * pushed to the output buf, so: - * 2.2.1. If the current char is legal, just save it for processing next time. - * It may be for example, a plus which we need to deal with in direct mode. - * 2.2.2. Else if the current char is illegal, we might as well deal with it here. - */ - inDirectMode=TRUE; - if(base64Counter==-1) { - /* illegal: + immediately followed by something other than base64 or minus sign */ - /* include the plus sign in the reported sequence, but not the subsequent char */ - --source; - bytes[0]=PLUS; - byteIndex=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if(bits!=0) { - /* bits are illegally left over, a UChar is incomplete */ - /* don't include current char (legal or illegal) in error seq */ - --source; - --byteIndex; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else { - /* previous UChar was complete */ - if(base64Value==-3) { - /* current character is illegal, deal with it here */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else { - /* un-read the current character in case it is a plus sign */ - --source; - sourceIndex=nextSourceIndex-1; - goto directMode; - } - } - } else if(base64Value>=0) { - /* collect base64 bytes into UChars */ - switch(base64Counter) { - case -1: /* -1 is immediately after the + */ - case 0: - bits=base64Value; - base64Counter=1; - break; - case 1: - case 3: - case 4: - case 6: - bits=(uint16_t)((bits<<6)|base64Value); - ++base64Counter; - break; - case 2: - *target++=(UChar)((bits<<4)|(base64Value>>2)); - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&3); - base64Counter=3; - break; - case 5: - *target++=(UChar)((bits<<2)|(base64Value>>4)); - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&15); - base64Counter=6; - break; - case 7: - *target++=(UChar)((bits<<6)|base64Value); - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex; - } - byteIndex=0; - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } else /*base64Value==-2*/ { - /* minus sign terminates the base64 sequence */ - inDirectMode=TRUE; - if(base64Counter==-1) { - /* +- i.e. a minus immediately following a plus */ - *target++=PLUS; - if(offsets!=NULL) { - *offsets++=sourceIndex-1; - } - } else { - /* absorb the minus and leave the Unicode Mode */ - if(bits!=0) { - /* bits are illegally left over, a UChar is incomplete */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } - sourceIndex=nextSourceIndex; - goto directMode; - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - - if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) { - /* - * if we are in Unicode mode, then the byteIndex might not be 0, - * but that is ok if bits==0 - * -> we set byteIndex=0 at the end of the stream to avoid a truncated error - * (not true for IMAP-mailbox-name where we must end in direct mode) - */ - byteIndex=0; - } - - /* set the converter state back into UConverter */ - cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; - cnv->toULength=byteIndex; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; -} - -static void U_CALLCONV -_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target, *targetLimit; - int32_t *offsets; - - int32_t length, targetCapacity, sourceIndex; - UChar c; - - /* UTF-7 state */ - const UBool *encodeDirectly; - uint8_t bits; - int8_t base64Counter; - UBool inDirectMode; - - /* set up the local pointers */ - cnv=pArgs->converter; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetLimit=(uint8_t *)pArgs->targetLimit; - offsets=pArgs->offsets; - - /* get the state machine state */ - { - uint32_t status=cnv->fromUnicodeStatus; - encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint8_t)status; - U_ASSERT(bits<=UPRV_LENGTHOF(toBase64)); - } - - /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ - sourceIndex=0; - - if(inDirectMode) { -directMode: - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - c=*source++; - /* currently always encode CR LF SP TAB directly */ - if(c<=127 && encodeDirectly[c]) { - /* encode directly */ - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else if(c==PLUS) { - /* output +- for + */ - *target++=PLUS; - if(targetcharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } else { - /* un-read this character and switch to Unicode Mode */ - --source; - *target++=PLUS; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - inDirectMode=FALSE; - base64Counter=0; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - while(sourcecharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - goto directMode; - } else { - /* - * base64 this character: - * Output 2 or 3 base64 bytes for the remaining bits of the previous character - * and the bits of this character, each implicitly in UTF-16BE. - * - * Here, bits is an 8-bit variable because only 6 bits need to be kept from one - * character to the next. The actual 2 or 4 bits are shifted to the left edge - * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. - */ - switch(base64Counter) { - case 0: - *target++=toBase64[c>>10]; - if(target>4)&0x3f]; - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f]; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&15)<<2); - base64Counter=1; - break; - case 1: - *target++=toBase64[bits|(c>>14)]; - if(target>8)&0x3f]; - if(target>2)&0x3f]; - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f]; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f]; - cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f]; - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&3)<<4); - base64Counter=2; - break; - case 2: - *target++=toBase64[bits|(c>>12)]; - if(target>6)&0x3f]; - if(targetcharErrorBuffer[0]=toBase64[c&0x3f]; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f]; - cnv->charErrorBuffer[1]=toBase64[c&0x3f]; - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - - if(pArgs->flush && source>=sourceLimit) { - /* flush remaining bits to the target */ - if(!inDirectMode) { - if (base64Counter!=0) { - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits]; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* Add final MINUS to terminate unicodeMode */ - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=MINUS; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* reset the state for the next conversion */ - cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ - } else { - /* set the converter state back into UConverter */ - cnv->fromUnicodeStatus= - (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ - ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - return; -} - -static const char * U_CALLCONV -_UTF7GetName(const UConverter *cnv) { - switch(cnv->fromUnicodeStatus>>28) { - case 1: - return "UTF-7,version=1"; - default: - return "UTF-7"; - } -} -U_CDECL_END - -static const UConverterImpl _UTF7Impl={ - UCNV_UTF7, - - NULL, - NULL, - - _UTF7Open, - NULL, - _UTF7Reset, - - _UTF7ToUnicodeWithOffsets, - _UTF7ToUnicodeWithOffsets, - _UTF7FromUnicodeWithOffsets, - _UTF7FromUnicodeWithOffsets, - NULL, - - NULL, - _UTF7GetName, - NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ - NULL, - ucnv_getCompleteUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _UTF7StaticData={ - sizeof(UConverterStaticData), - "UTF-7", - 0, /* TODO CCSID for UTF-7 */ - UCNV_IBM, UCNV_UTF7, - 1, 4, - { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _UTF7Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData, &_UTF7Impl); - -/* IMAP mailbox name encoding ----------------------------------------------- */ - -/* - * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 - * http://www.ietf.org/rfc/rfc2060.txt - * - * 5.1.3. Mailbox International Naming Convention - * - * By convention, international mailbox names are specified using a - * modified version of the UTF-7 encoding described in [UTF-7]. The - * purpose of these modifications is to correct the following problems - * with UTF-7: - * - * 1) UTF-7 uses the "+" character for shifting; this conflicts with - * the common use of "+" in mailbox names, in particular USENET - * newsgroup names. - * - * 2) UTF-7's encoding is BASE64 which uses the "/" character; this - * conflicts with the use of "/" as a popular hierarchy delimiter. - * - * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with - * the use of "\" as a popular hierarchy delimiter. - * - * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with - * the use of "~" in some servers as a home directory indicator. - * - * 5) UTF-7 permits multiple alternate forms to represent the same - * string; in particular, printable US-ASCII chararacters can be - * represented in encoded form. - * - * In modified UTF-7, printable US-ASCII characters except for "&" - * represent themselves; that is, characters with octet values 0x20-0x25 - * and 0x27-0x7e. The character "&" (0x26) is represented by the two- - * octet sequence "&-". - * - * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all - * Unicode 16-bit octets) are represented in modified BASE64, with a - * further modification from [UTF-7] that "," is used instead of "/". - * Modified BASE64 MUST NOT be used to represent any printing US-ASCII - * character which can represent itself. - * - * "&" is used to shift to modified BASE64 and "-" to shift back to US- - * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that - * is, a name that ends with a Unicode 16-bit octet MUST end with a "- - * "). - * - * For example, here is a mailbox name which mixes English, Japanese, - * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- - */ - -/* - * Tests for US-ASCII characters belonging to character classes - * defined in UTF-7. - * - * Set D (directly encoded characters) consists of the following - * characters: the upper and lower case letters A through Z - * and a through z, the 10 digits 0-9, and the following nine special - * characters (note that "+" and "=" are omitted): - * '(),-./:? - * - * Set O (optional direct characters) consists of the following - * characters (note that "\" and "~" are omitted): - * !"#$%&*;<=>@[]^_`{|} - * - * According to the rules in RFC 2152, the byte values for the following - * US-ASCII characters are not used in UTF-7 and are therefore illegal: - * - all C0 control codes except for CR LF TAB - * - BACKSLASH - * - TILDE - * - DEL - * - all codes beyond US-ASCII, i.e. all >127 - */ - -/* uses '&' not '+' to start a base64 sequence */ -#define AMPERSAND 0x26 -#define COMMA 0x2c -#define SLASH 0x2f - -/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */ -#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e) - -/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */ -#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND) - -#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA) -#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c]) - -/* - * converter status values: - * - * toUnicodeStatus: - * 24 inDirectMode (boolean) - * 23..16 base64Counter (-1..7) - * 15..0 bits (up to 14 bits incoming base64) - * - * fromUnicodeStatus: - * 24 inDirectMode (boolean) - * 23..16 base64Counter (0..2) - * 7..0 bits (6 bits outgoing base64) - * - * ignore bits 31..25 - */ - -U_CDECL_BEGIN -static void U_CALLCONV -_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - - uint8_t *bytes; - uint8_t byteIndex; - - int32_t length, targetCapacity; - - /* UTF-7 state */ - uint16_t bits; - int8_t base64Counter; - UBool inDirectMode; - - int8_t base64Value; - - int32_t sourceIndex, nextSourceIndex; - - UChar c; - uint8_t b; - - /* set up the local pointers */ - cnv=pArgs->converter; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - /* get the state machine state */ - { - uint32_t status=cnv->toUnicodeStatus; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint16_t)status; - } - bytes=cnv->toUBytes; - byteIndex=cnv->toULength; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=byteIndex==0 ? 0 : -1; - nextSourceIndex=0; - - if(inDirectMode) { -directMode: - /* - * In Direct Mode, US-ASCII characters are encoded directly, i.e., - * with their US-ASCII byte values. - * An ampersand starts Unicode (or "escape") Mode. - * - * In Direct Mode, only the sourceIndex is used. - */ - byteIndex=0; - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - b=*source++; - if(!isLegalIMAP(b)) { - /* illegal */ - bytes[0]=b; - byteIndex=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if(b!=AMPERSAND) { - /* write directly encoded character */ - *target++=b; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else /* AMPERSAND */ { - /* switch to Unicode mode */ - nextSourceIndex=++sourceIndex; - inDirectMode=FALSE; - byteIndex=0; - bits=0; - base64Counter=-1; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - /* - * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. - * The base64 sequence ends with any character that is not in the base64 alphabet. - * A terminating minus sign is consumed. - * US-ASCII must not be base64-ed. - * - * In Unicode Mode, the sourceIndex has the index to the start of the current - * base64 bytes, while nextSourceIndex is precisely parallel to source, - * keeping the index to the following byte. - * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. - */ - while(source0x7e) { - /* illegal - test other illegal US-ASCII values by base64Value==-3 */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } else if((base64Value=FROM_BASE64_IMAP(b))>=0) { - /* collect base64 bytes into UChars */ - switch(base64Counter) { - case -1: /* -1 is immediately after the & */ - case 0: - bits=base64Value; - base64Counter=1; - break; - case 1: - case 3: - case 4: - case 6: - bits=(uint16_t)((bits<<6)|base64Value); - ++base64Counter; - break; - case 2: - c=(UChar)((bits<<4)|(base64Value>>2)); - if(isLegalIMAP(c)) { - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&3); - base64Counter=3; - break; - case 5: - c=(UChar)((bits<<2)|(base64Value>>4)); - if(isLegalIMAP(c)) { - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex-1; - } - bytes[0]=b; /* keep this byte in case an error occurs */ - byteIndex=1; - bits=(uint16_t)(base64Value&15); - base64Counter=6; - break; - case 7: - c=(UChar)((bits<<6)|base64Value); - if(isLegalIMAP(c)) { - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex; - } - byteIndex=0; - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } else if(base64Value==-2) { - /* minus sign terminates the base64 sequence */ - inDirectMode=TRUE; - if(base64Counter==-1) { - /* &- i.e. a minus immediately following an ampersand */ - *target++=AMPERSAND; - if(offsets!=NULL) { - *offsets++=sourceIndex-1; - } - } else { - /* absorb the minus and leave the Unicode Mode */ - if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) { - /* bits are illegally left over, a UChar is incomplete */ - /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } - sourceIndex=nextSourceIndex; - goto directMode; - } else { - if(base64Counter==-1) { - /* illegal: & immediately followed by something other than base64 or minus sign */ - /* include the ampersand in the reported sequence */ - --sourceIndex; - bytes[0]=AMPERSAND; - bytes[1]=b; - byteIndex=2; - } - /* base64Value==-1 for characters that are illegal only in Unicode mode */ - /* base64Value==-3 for illegal characters */ - /* illegal */ - inDirectMode=TRUE; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } -endloop: - - /* - * the end of the input stream and detection of truncated input - * are handled by the framework, but here we must check if we are in Unicode - * mode and byteIndex==0 because we must end in direct mode - * - * conditions: - * successful - * in Unicode mode and byteIndex==0 - * end of input and no truncated input - */ - if( U_SUCCESS(*pErrorCode) && - !inDirectMode && byteIndex==0 && - pArgs->flush && source>=sourceLimit - ) { - if(base64Counter==-1) { - /* & at the very end of the input */ - /* make the ampersand the reported sequence */ - bytes[0]=AMPERSAND; - byteIndex=1; - } - /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */ - - inDirectMode=TRUE; /* avoid looping */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - - /* set the converter state back into UConverter */ - cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; - cnv->toULength=byteIndex; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; -} - -static void U_CALLCONV -_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target, *targetLimit; - int32_t *offsets; - - int32_t length, targetCapacity, sourceIndex; - UChar c; - uint8_t b; - - /* UTF-7 state */ - uint8_t bits; - int8_t base64Counter; - UBool inDirectMode; - - /* set up the local pointers */ - cnv=pArgs->converter; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetLimit=(uint8_t *)pArgs->targetLimit; - offsets=pArgs->offsets; - - /* get the state machine state */ - { - uint32_t status=cnv->fromUnicodeStatus; - inDirectMode=(UBool)((status>>24)&1); - base64Counter=(int8_t)(status>>16); - bits=(uint8_t)status; - } - - /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ - sourceIndex=0; - - if(inDirectMode) { -directMode: - length=(int32_t)(sourceLimit-source); - targetCapacity=(int32_t)(targetLimit-target); - if(length>targetCapacity) { - length=targetCapacity; - } - while(length>0) { - c=*source++; - /* encode 0x20..0x7e except '&' directly */ - if(inSetDIMAP(c)) { - /* encode directly */ - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - } else if(c==AMPERSAND) { - /* output &- for & */ - *target++=AMPERSAND; - if(targetcharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } else { - /* un-read this character and switch to Unicode Mode */ - --source; - *target++=AMPERSAND; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - inDirectMode=FALSE; - base64Counter=0; - goto unicodeMode; - } - --length; - } - if(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { -unicodeMode: - while(sourcecharErrorBuffer[0]=MINUS; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - goto directMode; - } else { - /* - * base64 this character: - * Output 2 or 3 base64 bytes for the remaining bits of the previous character - * and the bits of this character, each implicitly in UTF-16BE. - * - * Here, bits is an 8-bit variable because only 6 bits need to be kept from one - * character to the next. The actual 2 or 4 bits are shifted to the left edge - * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. - */ - switch(base64Counter) { - case 0: - b=(uint8_t)(c>>10); - *target++=TO_BASE64_IMAP(b); - if(target>4)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>4)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&15)<<2); - base64Counter=1; - break; - case 1: - b=(uint8_t)(bits|(c>>14)); - *target++=TO_BASE64_IMAP(b); - if(target>8)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(target>2)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>2)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>8)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - b=(uint8_t)((c>>2)&0x3f); - cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=(uint8_t)((c&3)<<4); - base64Counter=2; - break; - case 2: - b=(uint8_t)(bits|(c>>12)); - *target++=TO_BASE64_IMAP(b); - if(target>6)&0x3f); - *target++=TO_BASE64_IMAP(b); - if(targetcharErrorBuffer[0]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex++; - } - b=(uint8_t)((c>>6)&0x3f); - cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); - b=(uint8_t)(c&0x3f); - cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); - cnv->charErrorBufferLength=2; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - bits=0; - base64Counter=0; - break; - default: - /* will never occur */ - break; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - - if(pArgs->flush && source>=sourceLimit) { - /* flush remaining bits to the target */ - if(!inDirectMode) { - if(base64Counter!=0) { - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits); - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* need to terminate with a minus */ - if(targetcharErrorBuffer[cnv->charErrorBufferLength++]=MINUS; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - } - /* reset the state for the next conversion */ - cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ - } else { - /* set the converter state back into UConverter */ - cnv->fromUnicodeStatus= - (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ - ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - return; -} -U_CDECL_END - -static const UConverterImpl _IMAPImpl={ - UCNV_IMAP_MAILBOX, - - NULL, - NULL, - - _UTF7Open, - NULL, - _UTF7Reset, - - _IMAPToUnicodeWithOffsets, - _IMAPToUnicodeWithOffsets, - _IMAPFromUnicodeWithOffsets, - _IMAPFromUnicodeWithOffsets, - NULL, - - NULL, - NULL, - NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ - NULL, - ucnv_getCompleteUnicodeSet, - NULL, - NULL -}; - -static const UConverterStaticData _IMAPStaticData={ - sizeof(UConverterStaticData), - "IMAP-mailbox-name", - 0, /* TODO CCSID for IMAP-mailbox-name */ - UCNV_IBM, UCNV_IMAP_MAILBOX, - 1, 4, - { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _IMAPData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData, &_IMAPImpl); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnv_u8.cpp b/deps/node/deps/icu-small/source/common/ucnv_u8.cpp deleted file mode 100644 index 878d6730..00000000 --- a/deps/node/deps/icu-small/source/common/ucnv_u8.cpp +++ /dev/null @@ -1,944 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnv_u8.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jul01 -* created by: Markus W. Scherer -* -* UTF-8 converter implementation. Used to be in ucnv_utf.c. -* -* Also, CESU-8 implementation, see UTR 26. -* The CESU-8 converter uses all the same functions as the -* UTF-8 converter, with a branch for converting supplementary code points. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "uassert.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" -#include "ustr_imp.h" - -/* Prototypes --------------------------------------------------------------- */ - -/* Keep these here to make finicky compilers happy */ - -U_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args, - UErrorCode *err); -U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args, - UErrorCode *err); - - -/* UTF-8 -------------------------------------------------------------------- */ - -#define MAXIMUM_UCS2 0x0000FFFF - -static const uint32_t offsetsFromUTF8[5] = {0, - (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, - (uint32_t) 0x03C82080 -}; - -static UBool hasCESU8Data(const UConverter *cnv) -{ -#if UCONFIG_ONLY_HTML_CONVERSION - return FALSE; -#else - return (UBool)(cnv->sharedData == &_CESU8Data); -#endif -} -U_CDECL_BEGIN -static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = cnv->toUBytes; - UBool isCESU8 = hasCESU8Data(cnv); - uint32_t ch, ch2 = 0; - int32_t i, inBytes; - - /* Restore size of current sequence */ - if (cnv->toULength > 0 && myTarget < targetLimit) - { - inBytes = cnv->mode; /* restore # of bytes to consume */ - i = cnv->toULength; /* restore # of bytes consumed */ - cnv->toULength = 0; - - ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ - cnv->toUnicodeStatus = 0; - goto morebytes; - } - - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - if (U8_IS_SINGLE(ch)) /* Simple case */ - { - *(myTarget++) = (UChar) ch; - } - else - { - /* store the first char */ - toUBytes[0] = (char)ch; - inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */ - i = 1; - -morebytes: - while (i < inBytes) - { - if (mySource < sourceLimit) - { - toUBytes[i] = (char) (ch2 = *mySource); - if (!icu::UTF8::isValidTrail(ch, static_cast(ch2), i, inBytes) && - !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2))) - { - break; /* i < inBytes */ - } - ch = (ch << 6) + ch2; - ++mySource; - i++; - } - else - { - /* stores a partially calculated target*/ - cnv->toUnicodeStatus = ch; - cnv->mode = inBytes; - cnv->toULength = (int8_t) i; - goto donefornow; - } - } - - // In CESU-8, only surrogates, not supplementary code points, are encoded directly. - if (i == inBytes && (!isCESU8 || i <= 3)) - { - /* Remove the accumulated high bits */ - ch -= offsetsFromUTF8[inBytes]; - - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - } - else - { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - } - else - { - /* Put in overflow buffer (not handled here) */ - cnv->UCharErrorBuffer[0] = (UChar) ch; - cnv->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } - else - { - cnv->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; -} - -static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const unsigned char *mySource = (unsigned char *) args->source; - UChar *myTarget = args->target; - int32_t *myOffsets = args->offsets; - int32_t offsetNum = 0; - const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; - const UChar *targetLimit = args->targetLimit; - unsigned char *toUBytes = cnv->toUBytes; - UBool isCESU8 = hasCESU8Data(cnv); - uint32_t ch, ch2 = 0; - int32_t i, inBytes; - - /* Restore size of current sequence */ - if (cnv->toULength > 0 && myTarget < targetLimit) - { - inBytes = cnv->mode; /* restore # of bytes to consume */ - i = cnv->toULength; /* restore # of bytes consumed */ - cnv->toULength = 0; - - ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ - cnv->toUnicodeStatus = 0; - goto morebytes; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - if (U8_IS_SINGLE(ch)) /* Simple case */ - { - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum++; - } - else - { - toUBytes[0] = (char)ch; - inBytes = U8_COUNT_BYTES_NON_ASCII(ch); - i = 1; - -morebytes: - while (i < inBytes) - { - if (mySource < sourceLimit) - { - toUBytes[i] = (char) (ch2 = *mySource); - if (!icu::UTF8::isValidTrail(ch, static_cast(ch2), i, inBytes) && - !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2))) - { - break; /* i < inBytes */ - } - ch = (ch << 6) + ch2; - ++mySource; - i++; - } - else - { - cnv->toUnicodeStatus = ch; - cnv->mode = inBytes; - cnv->toULength = (int8_t)i; - goto donefornow; - } - } - - // In CESU-8, only surrogates, not supplementary code points, are encoded directly. - if (i == inBytes && (!isCESU8 || i <= 3)) - { - /* Remove the accumulated high bits */ - ch -= offsetsFromUTF8[inBytes]; - - /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ - if (ch <= MAXIMUM_UCS2) - { - /* fits in 16 bits */ - *(myTarget++) = (UChar) ch; - *(myOffsets++) = offsetNum; - } - else - { - /* write out the surrogates */ - *(myTarget++) = U16_LEAD(ch); - *(myOffsets++) = offsetNum; - ch = U16_TRAIL(ch); - if (myTarget < targetLimit) - { - *(myTarget++) = (UChar)ch; - *(myOffsets++) = offsetNum; - } - else - { - cnv->UCharErrorBuffer[0] = (UChar) ch; - cnv->UCharErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - offsetNum += i; - } - else - { - cnv->toULength = (int8_t)i; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - } - -donefornow: - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { /* End of target buffer */ - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = myTarget; - args->source = (const char *) mySource; - args->offsets = myOffsets; -} -U_CDECL_END - -U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const UChar *mySource = args->source; - const UChar *sourceLimit = args->sourceLimit; - uint8_t *myTarget = (uint8_t *) args->target; - const uint8_t *targetLimit = (uint8_t *) args->targetLimit; - uint8_t *tempPtr; - UChar32 ch; - uint8_t tempBuf[4]; - int32_t indexToWrite; - UBool isNotCESU8 = !hasCESU8Data(cnv); - - if (cnv->fromUChar32 && myTarget < targetLimit) - { - ch = cnv->fromUChar32; - cnv->fromUChar32 = 0; - goto lowsurrogate; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (ch < 0x80) /* Single byte */ - { - *(myTarget++) = (uint8_t) ch; - } - else if (ch < 0x800) /* Double byte */ - { - *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); - if (myTarget < targetLimit) - { - *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); - } - else - { - cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); - cnv->charErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - else { - /* Check for surrogates */ - if(U16_IS_SURROGATE(ch) && isNotCESU8) { -lowsurrogate: - if (mySource < sourceLimit) { - /* test both code units */ - if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { - /* convert and consume this supplementary code point */ - ch=U16_GET_SUPPLEMENTARY(ch, *mySource); - ++mySource; - /* exit this condition tree */ - } - else { - /* this is an unpaired trail or lead code unit */ - /* callback(illegal) */ - cnv->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* no more input */ - cnv->fromUChar32 = ch; - break; - } - } - - /* Do we write the buffer directly for speed, - or do we have to be careful about target buffer space? */ - tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); - - if (ch <= MAXIMUM_UCS2) { - indexToWrite = 2; - tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); - } - else { - indexToWrite = 3; - tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); - tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); - } - tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); - tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); - - if (tempPtr == myTarget) { - /* There was enough space to write the codepoint directly. */ - myTarget += (indexToWrite + 1); - } - else { - /* We might run out of room soon. Write it slowly. */ - for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { - if (myTarget < targetLimit) { - *(myTarget++) = *tempPtr; - } - else { - cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; -} - -U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, - UErrorCode * err) -{ - UConverter *cnv = args->converter; - const UChar *mySource = args->source; - int32_t *myOffsets = args->offsets; - const UChar *sourceLimit = args->sourceLimit; - uint8_t *myTarget = (uint8_t *) args->target; - const uint8_t *targetLimit = (uint8_t *) args->targetLimit; - uint8_t *tempPtr; - UChar32 ch; - int32_t offsetNum, nextSourceIndex; - int32_t indexToWrite; - uint8_t tempBuf[4]; - UBool isNotCESU8 = !hasCESU8Data(cnv); - - if (cnv->fromUChar32 && myTarget < targetLimit) - { - ch = cnv->fromUChar32; - cnv->fromUChar32 = 0; - offsetNum = -1; - nextSourceIndex = 0; - goto lowsurrogate; - } else { - offsetNum = 0; - } - - while (mySource < sourceLimit && myTarget < targetLimit) - { - ch = *(mySource++); - - if (ch < 0x80) /* Single byte */ - { - *(myOffsets++) = offsetNum++; - *(myTarget++) = (char) ch; - } - else if (ch < 0x800) /* Double byte */ - { - *(myOffsets++) = offsetNum; - *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); - if (myTarget < targetLimit) - { - *(myOffsets++) = offsetNum++; - *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); - } - else - { - cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); - cnv->charErrorBufferLength = 1; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - else - /* Check for surrogates */ - { - nextSourceIndex = offsetNum + 1; - - if(U16_IS_SURROGATE(ch) && isNotCESU8) { -lowsurrogate: - if (mySource < sourceLimit) { - /* test both code units */ - if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { - /* convert and consume this supplementary code point */ - ch=U16_GET_SUPPLEMENTARY(ch, *mySource); - ++mySource; - ++nextSourceIndex; - /* exit this condition tree */ - } - else { - /* this is an unpaired trail or lead code unit */ - /* callback(illegal) */ - cnv->fromUChar32 = ch; - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - else { - /* no more input */ - cnv->fromUChar32 = ch; - break; - } - } - - /* Do we write the buffer directly for speed, - or do we have to be careful about target buffer space? */ - tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); - - if (ch <= MAXIMUM_UCS2) { - indexToWrite = 2; - tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); - } - else { - indexToWrite = 3; - tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); - tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); - } - tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); - tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); - - if (tempPtr == myTarget) { - /* There was enough space to write the codepoint directly. */ - myTarget += (indexToWrite + 1); - myOffsets[0] = offsetNum; - myOffsets[1] = offsetNum; - myOffsets[2] = offsetNum; - if (indexToWrite >= 3) { - myOffsets[3] = offsetNum; - } - myOffsets += (indexToWrite + 1); - } - else { - /* We might run out of room soon. Write it slowly. */ - for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { - if (myTarget < targetLimit) - { - *(myOffsets++) = offsetNum; - *(myTarget++) = *tempPtr; - } - else - { - cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - } - offsetNum = nextSourceIndex; - } - } - - if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) - { - *err = U_BUFFER_OVERFLOW_ERROR; - } - - args->target = (char *) myTarget; - args->source = mySource; - args->offsets = myOffsets; -} - -U_CDECL_BEGIN -static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, - UErrorCode *err) { - UConverter *cnv; - const uint8_t *sourceInitial; - const uint8_t *source; - uint8_t myByte; - UChar32 ch; - int8_t i; - - /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ - - cnv = args->converter; - sourceInitial = source = (const uint8_t *)args->source; - if (source >= (const uint8_t *)args->sourceLimit) - { - /* no input */ - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; - } - - myByte = (uint8_t)*(source++); - if (U8_IS_SINGLE(myByte)) - { - args->source = (const char *)source; - return (UChar32)myByte; - } - - uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte); - if (countTrailBytes == 0) { - cnv->toUBytes[0] = myByte; - cnv->toULength = 1; - *err = U_ILLEGAL_CHAR_FOUND; - args->source = (const char *)source; - return 0xffff; - } - - /*The byte sequence is longer than the buffer area passed*/ - if (((const char *)source + countTrailBytes) > args->sourceLimit) - { - /* check if all of the remaining bytes are trail bytes */ - uint16_t extraBytesToWrite = countTrailBytes + 1; - cnv->toUBytes[0] = myByte; - i = 1; - *err = U_TRUNCATED_CHAR_FOUND; - while(source < (const uint8_t *)args->sourceLimit) { - uint8_t b = *source; - if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) { - cnv->toUBytes[i++] = b; - ++source; - } else { - /* error even before we run out of input */ - *err = U_ILLEGAL_CHAR_FOUND; - break; - } - } - cnv->toULength = i; - args->source = (const char *)source; - return 0xffff; - } - - ch = myByte << 6; - if(countTrailBytes == 2) { - uint8_t t1 = *source, t2; - if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) { - args->source = (const char *)(source + 1); - return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3]; - } - } else if(countTrailBytes == 1) { - uint8_t t1 = *source; - if(U8_IS_TRAIL(t1)) { - args->source = (const char *)(source + 1); - return (ch + t1) - offsetsFromUTF8[2]; - } - } else { // countTrailBytes == 3 - uint8_t t1 = *source, t2, t3; - if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) && - U8_IS_TRAIL(t3 = *++source)) { - args->source = (const char *)(source + 1); - return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4]; - } - } - args->source = (const char *)source; - - for(i = 0; sourceInitial < source; ++i) { - cnv->toUBytes[i] = *sourceInitial++; - } - cnv->toULength = i; - *err = U_ILLEGAL_CHAR_FOUND; - return 0xffff; -} -U_CDECL_END - -/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ - -U_CDECL_BEGIN -/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ -static void U_CALLCONV -ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - UConverter *utf8; - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t count; - - int8_t oldToULength, toULength, toULimit; - - UChar32 c; - uint8_t b, t1, t2; - - /* set up the local pointers */ - utf8=pToUArgs->converter; - source=(uint8_t *)pToUArgs->source; - sourceLimit=(uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - /* get the converter state from the UTF-8 UConverter */ - if(utf8->toULength > 0) { - toULength=oldToULength=utf8->toULength; - toULimit=(int8_t)utf8->mode; - c=(UChar32)utf8->toUnicodeStatus; - } else { - toULength=oldToULength=toULimit=0; - c = 0; - } - - count=(int32_t)(sourceLimit-source)+oldToULength; - if(counttargetCapacity) { - count=targetCapacity; - } - - // The conversion loop checks count>0 only once per character. - // If the buffer ends with a truncated sequence, - // then we reduce the count to stop before that, - // and collect the remaining bytes after the conversion loop. - - // Do not go back into the bytes that will be read for finishing a partial - // sequence from the previous buffer. - int32_t length=count-toULimit; - U8_TRUNCATE_IF_INCOMPLETE(source, 0, length); - count=toULimit+length; - } - - if(c!=0) { - utf8->toUnicodeStatus=0; - utf8->toULength=0; - goto moreBytes; - /* See note in ucnv_SBCSFromUTF8() about this goto. */ - } - - /* conversion loop */ - while(count>0) { - b=*source++; - if(U8_IS_SINGLE(b)) { - /* convert ASCII */ - *target++=b; - --count; - continue; - } else { - if(b>=0xe0) { - if( /* handle U+0800..U+FFFF inline */ - b<0xf0 && - U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) && - U8_IS_TRAIL(t2=source[1]) - ) { - source+=2; - *target++=b; - *target++=t1; - *target++=t2; - count-=3; - continue; - } - } else { - if( /* handle U+0080..U+07FF inline */ - b>=0xc2 && - U8_IS_TRAIL(t1=*source) - ) { - ++source; - *target++=b; - *target++=t1; - count-=2; - continue; - } - } - - /* handle "complicated" and error cases, and continuing partial characters */ - oldToULength=0; - toULength=1; - toULimit=U8_COUNT_BYTES_NON_ASCII(b); - c=b; -moreBytes: - while(toULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - return; - } - } - - if(toULength!=toULimit) { - /* error handling: illegal UTF-8 byte sequence */ - source-=(toULength-oldToULength); - while(oldToULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toULength=toULength; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return; - } - - /* copy the legal byte sequence to the target */ - { - int8_t i; - - for(i=0; itoUBytes[i]; - } - source-=(toULength-oldToULength); - for(; i=0); - - if(U_SUCCESS(*pErrorCode) && sourcetargetLimit) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - b=*source; - toULimit=U8_COUNT_BYTES(b); - if(toULimit>(sourceLimit-source)) { - /* collect a truncated byte sequence */ - toULength=0; - c=b; - for(;;) { - utf8->toUBytes[toULength++]=b; - if(++source==sourceLimit) { - /* partial byte sequence at end of source */ - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - break; - } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) { - utf8->toULength=toULength; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - c=(c<<6)+b; - } - } else { - /* partial-sequence target overflow: fall back to the pivoting implementation */ - *pErrorCode=U_USING_DEFAULT_WARNING; - } - } - } - - /* write back the updated pointers */ - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; -} - -U_CDECL_END - -/* UTF-8 converter data ----------------------------------------------------- */ - -static const UConverterImpl _UTF8Impl={ - UCNV_UTF8, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - ucnv_toUnicode_UTF8, - ucnv_toUnicode_UTF8_OFFSETS_LOGIC, - ucnv_fromUnicode_UTF8, - ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, - ucnv_getNextUChar_UTF8, - - NULL, - NULL, - NULL, - NULL, - ucnv_getNonSurrogateUnicodeSet, - - ucnv_UTF8FromUTF8, - ucnv_UTF8FromUTF8 -}; - -/* The 1208 CCSID refers to any version of Unicode of UTF-8 */ -static const UConverterStaticData _UTF8StaticData={ - sizeof(UConverterStaticData), - "UTF-8", - 1208, UCNV_IBM, UCNV_UTF8, - 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ - { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _UTF8Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF8StaticData, &_UTF8Impl); - -/* CESU-8 converter data ---------------------------------------------------- */ - -static const UConverterImpl _CESU8Impl={ - UCNV_CESU8, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - ucnv_toUnicode_UTF8, - ucnv_toUnicode_UTF8_OFFSETS_LOGIC, - ucnv_fromUnicode_UTF8, - ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, - NULL, - - NULL, - NULL, - NULL, - NULL, - ucnv_getCompleteUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _CESU8StaticData={ - sizeof(UConverterStaticData), - "CESU-8", - 9400, /* CCSID for CESU-8 */ - UCNV_UNKNOWN, UCNV_CESU8, 1, 3, - { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - - -const UConverterSharedData _CESU8Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CESU8StaticData, &_CESU8Impl); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnvbocu.cpp b/deps/node/deps/icu-small/source/common/ucnvbocu.cpp deleted file mode 100644 index 5b66c505..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvbocu.cpp +++ /dev/null @@ -1,1413 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucnvbocu.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002mar27 -* created by: Markus W. Scherer -* -* This is an implementation of the Binary Ordered Compression for Unicode, -* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/ -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "putilimp.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "uassert.h" - -/* BOCU-1 constants and macros ---------------------------------------------- */ - -/* - * BOCU-1 encodes the code points of a Unicode string as - * a sequence of byte-encoded differences (slope detection), - * preserving lexical order. - * - * Optimize the difference-taking for runs of Unicode text within - * small scripts: - * - * Most small scripts are allocated within aligned 128-blocks of Unicode - * code points. Lexical order is preserved if the "previous code point" state - * is always moved into the middle of such a block. - * - * Additionally, "prev" is moved from anywhere in the Unihan and Hangul - * areas into the middle of those areas. - * - * C0 control codes and space are encoded with their US-ASCII bytes. - * "prev" is reset for C0 controls but not for space. - */ - -/* initial value for "prev": middle of the ASCII range */ -#define BOCU1_ASCII_PREV 0x40 - -/* bounding byte values for differences */ -#define BOCU1_MIN 0x21 -#define BOCU1_MIDDLE 0x90 -#define BOCU1_MAX_LEAD 0xfe -#define BOCU1_MAX_TRAIL 0xff -#define BOCU1_RESET 0xff - -/* number of lead bytes */ -#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1) - -/* adjust trail byte counts for the use of some C0 control byte values */ -#define BOCU1_TRAIL_CONTROLS_COUNT 20 -#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT) - -/* number of trail bytes */ -#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT) - -/* - * number of positive and negative single-byte codes - * (counting 0==BOCU1_MIDDLE among the positive ones) - */ -#define BOCU1_SINGLE 64 - -/* number of lead bytes for positive and negative 2/3/4-byte sequences */ -#define BOCU1_LEAD_2 43 -#define BOCU1_LEAD_3 3 -#define BOCU1_LEAD_4 1 - -/* The difference value range for single-byters. */ -#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1) -#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE) - -/* The difference value range for double-byters. */ -#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) -#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) - -/* The difference value range for 3-byters. */ -#define BOCU1_REACH_POS_3 \ - (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) - -#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) - -/* The lead byte start values. */ -#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1) -#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2) -#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3) - /* ==BOCU1_MAX_LEAD */ - -#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1) -#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2) -#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3) - /* ==BOCU1_MIN+1 */ - -/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ -#define BOCU1_LENGTH_FROM_LEAD(lead) \ - ((BOCU1_START_NEG_2<=(lead) && (lead)>24 : 4) - -/* - * 12 commonly used C0 control codes (and space) are only used to encode - * themselves directly, - * which makes BOCU-1 MIME-usable and reasonably safe for - * ASCII-oriented software. - * - * These controls are - * 0 NUL - * - * 7 BEL - * 8 BS - * - * 9 TAB - * a LF - * b VT - * c FF - * d CR - * - * e SO - * f SI - * - * 1a SUB - * 1b ESC - * - * The other 20 C0 controls are also encoded directly (to preserve order) - * but are also used as trail bytes in difference encoding - * (for better compression). - */ -#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t]) - -/* - * Byte value map for control codes, - * from external byte values 0x00..0x20 - * to trail byte values 0..19 (0..0x13) as used in the difference calculation. - * External byte values that are illegal as trail bytes are mapped to -1. - */ -static const int8_t -bocu1ByteToTrail[BOCU1_MIN]={ -/* 0 1 2 3 4 5 6 7 */ - -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, - -/* 8 9 a b c d e f */ - -1, -1, -1, -1, -1, -1, -1, -1, - -/* 10 11 12 13 14 15 16 17 */ - 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, - -/* 18 19 1a 1b 1c 1d 1e 1f */ - 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13, - -/* 20 */ - -1 -}; - -/* - * Byte value map for control codes, - * from trail byte values 0..19 (0..0x13) as used in the difference calculation - * to external byte values 0x00..0x20. - */ -static const int8_t -bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ -/* 0 1 2 3 4 5 6 7 */ - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, - -/* 8 9 a b c d e f */ - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, - -/* 10 11 12 13 */ - 0x1c, 0x1d, 0x1e, 0x1f -}; - -/** - * Integer division and modulo with negative numerators - * yields negative modulo results and quotients that are one more than - * what we need here. - * This macro adjust the results so that the modulo-value m is always >=0. - * - * For positive n, the if() condition is always FALSE. - * - * @param n Number to be split into quotient and rest. - * Will be modified to contain the quotient. - * @param d Divisor. - * @param m Output variable for the rest (modulo result). - */ -#define NEGDIVMOD(n, d, m) { \ - (m)=(n)%(d); \ - (n)/=(d); \ - if((m)<0) { \ - --(n); \ - (m)+=(d); \ - } \ -} - -/* Faster versions of packDiff() for single-byte-encoded diff values. */ - -/** Is a diff value encodable in a single byte? */ -#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1) - -/** Encode a diff value in a single byte. */ -#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff)) - -/** Is a diff value encodable in two bytes? */ -#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2) - -/* BOCU-1 implementation functions ------------------------------------------ */ - -#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV) - -/** - * Compute the next "previous" value for differencing - * from the current code point. - * - * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below) - * @return "previous code point" state value - */ -static inline int32_t -bocu1Prev(int32_t c) { - /* compute new prev */ - if(/* 0x3040<=c && */ c<=0x309f) { - /* Hiragana is not 128-aligned */ - return 0x3070; - } else if(0x4e00<=c && c<=0x9fa5) { - /* CJK Unihan */ - return 0x4e00-BOCU1_REACH_NEG_2; - } else if(0xac00<=c /* && c<=0xd7a3 */) { - /* Korean Hangul */ - return (0xd7a3+0xac00)/2; - } else { - /* mostly small scripts */ - return BOCU1_SIMPLE_PREV(c); - } -} - -/** Fast version of bocu1Prev() for most scripts. */ -#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c)) - -/* - * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c. - * The UConverter fields are used as follows: - * - * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) - * - * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) - * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0) - */ - -/* BOCU-1-from-Unicode conversion functions --------------------------------- */ - -/** - * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes - * and return a packed integer with them. - * - * The encoding favors small absolute differences with short encodings - * to compress runs of same-script characters. - * - * Optimized version with unrolled loops and fewer floating-point operations - * than the standard packDiff(). - * - * @param diff difference value -0x10ffff..0x10ffff - * @return - * 0x010000zz for 1-byte sequence zz - * 0x0200yyzz for 2-byte sequence yy zz - * 0x03xxyyzz for 3-byte sequence xx yy zz - * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03) - */ -static int32_t -packDiff(int32_t diff) { - int32_t result, m; - - U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */ - if(diff>=BOCU1_REACH_NEG_1) { - /* mostly positive differences, and single-byte negative ones */ -#if 0 /* single-byte case handled in macros, see below */ - if(diff<=BOCU1_REACH_POS_1) { - /* single byte */ - return 0x01000000|(BOCU1_MIDDLE+diff); - } else -#endif - if(diff<=BOCU1_REACH_POS_2) { - /* two bytes */ - diff-=BOCU1_REACH_POS_1+1; - result=0x02000000; - - m=diff%BOCU1_TRAIL_COUNT; - diff/=BOCU1_TRAIL_COUNT; - result|=BOCU1_TRAIL_TO_BYTE(m); - - result|=(BOCU1_START_POS_2+diff)<<8; - } else if(diff<=BOCU1_REACH_POS_3) { - /* three bytes */ - diff-=BOCU1_REACH_POS_2+1; - result=0x03000000; - - m=diff%BOCU1_TRAIL_COUNT; - diff/=BOCU1_TRAIL_COUNT; - result|=BOCU1_TRAIL_TO_BYTE(m); - - m=diff%BOCU1_TRAIL_COUNT; - diff/=BOCU1_TRAIL_COUNT; - result|=BOCU1_TRAIL_TO_BYTE(m)<<8; - - result|=(BOCU1_START_POS_3+diff)<<16; - } else { - /* four bytes */ - diff-=BOCU1_REACH_POS_3+1; - - m=diff%BOCU1_TRAIL_COUNT; - diff/=BOCU1_TRAIL_COUNT; - result=BOCU1_TRAIL_TO_BYTE(m); - - m=diff%BOCU1_TRAIL_COUNT; - diff/=BOCU1_TRAIL_COUNT; - result|=BOCU1_TRAIL_TO_BYTE(m)<<8; - - /* - * We know that / and % would deliver quotient 0 and rest=diff. - * Avoid division and modulo for performance. - */ - result|=BOCU1_TRAIL_TO_BYTE(diff)<<16; - - result|=((uint32_t)BOCU1_START_POS_4)<<24; - } - } else { - /* two- to four-byte negative differences */ - if(diff>=BOCU1_REACH_NEG_2) { - /* two bytes */ - diff-=BOCU1_REACH_NEG_1; - result=0x02000000; - - NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); - result|=BOCU1_TRAIL_TO_BYTE(m); - - result|=(BOCU1_START_NEG_2+diff)<<8; - } else if(diff>=BOCU1_REACH_NEG_3) { - /* three bytes */ - diff-=BOCU1_REACH_NEG_2; - result=0x03000000; - - NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); - result|=BOCU1_TRAIL_TO_BYTE(m); - - NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); - result|=BOCU1_TRAIL_TO_BYTE(m)<<8; - - result|=(BOCU1_START_NEG_3+diff)<<16; - } else { - /* four bytes */ - diff-=BOCU1_REACH_NEG_3; - - NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); - result=BOCU1_TRAIL_TO_BYTE(m); - - NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); - result|=BOCU1_TRAIL_TO_BYTE(m)<<8; - - /* - * We know that NEGDIVMOD would deliver - * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT. - * Avoid division and modulo for performance. - */ - m=diff+BOCU1_TRAIL_COUNT; - result|=BOCU1_TRAIL_TO_BYTE(m)<<16; - - result|=BOCU1_MIN<<24; - } - } - return result; -} - - -static void U_CALLCONV -_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t *offsets; - - int32_t prev, c, diff; - - int32_t sourceIndex, nextSourceIndex; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - /* get the converter state from UConverter */ - c=cnv->fromUChar32; - prev=(int32_t)cnv->fromUnicodeStatus; - if(prev==0) { - prev=BOCU1_ASCII_PREV; - } - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= c==0 ? 0 : -1; - nextSourceIndex=0; - - /* conversion loop */ - if(c!=0 && targetCapacity>0) { - goto getTrail; - } - -fastSingle: - /* fast loop for single-byte differences */ - /* use only one loop counter variable, targetCapacity, not also source */ - diff=(int32_t)(sourceLimit-source); - if(targetCapacity>diff) { - targetCapacity=diff; - } - while(targetCapacity>0 && (c=*source)<0x3000) { - if(c<=0x20) { - if(c!=0x20) { - prev=BOCU1_ASCII_PREV; - } - *target++=(uint8_t)c; - *offsets++=nextSourceIndex++; - ++source; - --targetCapacity; - } else { - diff=c-prev; - if(DIFF_IS_SINGLE(diff)) { - prev=BOCU1_SIMPLE_PREV(c); - *target++=(uint8_t)PACK_SINGLE_DIFF(diff); - *offsets++=nextSourceIndex++; - ++source; - --targetCapacity; - } else { - break; - } - } - } - /* restore real values */ - targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); - sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ - - /* regular loop for all cases */ - while(source0) { - c=*source++; - ++nextSourceIndex; - - if(c<=0x20) { - /* - * ISO C0 control & space: - * Encode directly for MIME compatibility, - * and reset state except for space, to not disrupt compression. - */ - if(c!=0x20) { - prev=BOCU1_ASCII_PREV; - } - *target++=(uint8_t)c; - *offsets++=sourceIndex; - --targetCapacity; - - sourceIndex=nextSourceIndex; - continue; - } - - if(U16_IS_LEAD(c)) { -getTrail: - if(source=0) { - diff-=BOCU1_REACH_POS_1+1; - m=diff%BOCU1_TRAIL_COUNT; - diff/=BOCU1_TRAIL_COUNT; - diff+=BOCU1_START_POS_2; - } else { - diff-=BOCU1_REACH_NEG_1; - NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); - diff+=BOCU1_START_NEG_2; - } - *target++=(uint8_t)diff; - *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); - *offsets++=sourceIndex; - *offsets++=sourceIndex; - targetCapacity-=2; - sourceIndex=nextSourceIndex; - } else { - int32_t length; /* will be 2..4 */ - - diff=packDiff(diff); - length=BOCU1_LENGTH_FROM_PACKED(diff); - - /* write the output character bytes from diff and length */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(length<=targetCapacity) { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(diff>>24); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(diff>>16); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(diff>>8); - *offsets++=sourceIndex; - /* case 1: handled above */ - *target++=(uint8_t)diff; - *offsets++=sourceIndex; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - targetCapacity-=length; - sourceIndex=nextSourceIndex; - } else { - uint8_t *charErrorBuffer; - - /* - * We actually do this backwards here: - * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the - * regular target. - */ - /* we know that 1<=targetCapacitycharErrorBuffer; - switch(length) { - /* each branch falls through to the next one */ - case 3: - *charErrorBuffer++=(uint8_t)(diff>>16); - U_FALLTHROUGH; - case 2: - *charErrorBuffer++=(uint8_t)(diff>>8); - U_FALLTHROUGH; - case 1: - *charErrorBuffer=(uint8_t)diff; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - cnv->charErrorBufferLength=(int8_t)length; - - /* now output what fits into the regular target */ - diff>>=8*length; /* length was reduced by targetCapacity */ - switch(targetCapacity) { - /* each branch falls through to the next one */ - case 3: - *target++=(uint8_t)(diff>>16); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(diff>>8); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)diff; - *offsets++=sourceIndex; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* set the converter state back into UConverter */ - cnv->fromUChar32= c<0 ? -c : 0; - cnv->fromUnicodeStatus=(uint32_t)prev; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -/* - * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling. - * If a change is made in the original function, then either - * change this function the same way or - * re-copy the original function and remove the variables - * offsets, sourceIndex, and nextSourceIndex. - */ -static void U_CALLCONV -_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - - int32_t prev, c, diff; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - - /* get the converter state from UConverter */ - c=cnv->fromUChar32; - prev=(int32_t)cnv->fromUnicodeStatus; - if(prev==0) { - prev=BOCU1_ASCII_PREV; - } - - /* conversion loop */ - if(c!=0 && targetCapacity>0) { - goto getTrail; - } - -fastSingle: - /* fast loop for single-byte differences */ - /* use only one loop counter variable, targetCapacity, not also source */ - diff=(int32_t)(sourceLimit-source); - if(targetCapacity>diff) { - targetCapacity=diff; - } - while(targetCapacity>0 && (c=*source)<0x3000) { - if(c<=0x20) { - if(c!=0x20) { - prev=BOCU1_ASCII_PREV; - } - *target++=(uint8_t)c; - } else { - diff=c-prev; - if(DIFF_IS_SINGLE(diff)) { - prev=BOCU1_SIMPLE_PREV(c); - *target++=(uint8_t)PACK_SINGLE_DIFF(diff); - } else { - break; - } - } - ++source; - --targetCapacity; - } - /* restore real values */ - targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); - - /* regular loop for all cases */ - while(source0) { - c=*source++; - - if(c<=0x20) { - /* - * ISO C0 control & space: - * Encode directly for MIME compatibility, - * and reset state except for space, to not disrupt compression. - */ - if(c!=0x20) { - prev=BOCU1_ASCII_PREV; - } - *target++=(uint8_t)c; - --targetCapacity; - continue; - } - - if(U16_IS_LEAD(c)) { -getTrail: - if(source=0) { - diff-=BOCU1_REACH_POS_1+1; - m=diff%BOCU1_TRAIL_COUNT; - diff/=BOCU1_TRAIL_COUNT; - diff+=BOCU1_START_POS_2; - } else { - diff-=BOCU1_REACH_NEG_1; - NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); - diff+=BOCU1_START_NEG_2; - } - *target++=(uint8_t)diff; - *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); - targetCapacity-=2; - } else { - int32_t length; /* will be 2..4 */ - - diff=packDiff(diff); - length=BOCU1_LENGTH_FROM_PACKED(diff); - - /* write the output character bytes from diff and length */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(length<=targetCapacity) { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(diff>>24); - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(diff>>16); - /* case 2: handled above */ - *target++=(uint8_t)(diff>>8); - /* case 1: handled above */ - *target++=(uint8_t)diff; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - targetCapacity-=length; - } else { - uint8_t *charErrorBuffer; - - /* - * We actually do this backwards here: - * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the - * regular target. - */ - /* we know that 1<=targetCapacitycharErrorBuffer; - switch(length) { - /* each branch falls through to the next one */ - case 3: - *charErrorBuffer++=(uint8_t)(diff>>16); - U_FALLTHROUGH; - case 2: - *charErrorBuffer++=(uint8_t)(diff>>8); - U_FALLTHROUGH; - case 1: - *charErrorBuffer=(uint8_t)diff; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - cnv->charErrorBufferLength=(int8_t)length; - - /* now output what fits into the regular target */ - diff>>=8*length; /* length was reduced by targetCapacity */ - switch(targetCapacity) { - /* each branch falls through to the next one */ - case 3: - *target++=(uint8_t)(diff>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(diff>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)diff; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* set the converter state back into UConverter */ - cnv->fromUChar32= c<0 ? -c : 0; - cnv->fromUnicodeStatus=(uint32_t)prev; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; -} - -/* BOCU-1-to-Unicode conversion functions ----------------------------------- */ - -/** - * Function for BOCU-1 decoder; handles multi-byte lead bytes. - * - * @param b lead byte; - * BOCU1_MIN<=b=BOCU1_START_NEG_2) { - /* positive difference */ - if(b=BOCU1_START_NEG_3) { - /* two bytes */ - diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; - count=1; - } else if(b>BOCU1_MIN) { - /* three bytes */ - diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2; - count=2; - } else { - /* four bytes */ - diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3; - count=3; - } - } - - /* return the state for decoding the trail byte(s) */ - return (diff<<2)|count; -} - -/** - * Function for BOCU-1 decoder; handles multi-byte trail bytes. - * - * @param count number of remaining trail bytes including this one - * @param b trail byte - * @return new delta for diff including b - <0 indicates an error - * - * @see decodeBocu1 - */ -static inline int32_t -decodeBocu1TrailByte(int32_t count, int32_t b) { - if(b<=0x20) { - /* skip some C0 controls and make the trail byte range contiguous */ - b=bocu1ByteToTrail[b]; - /* b<0 for an illegal trail byte value will result in return<0 below */ -#if BOCU1_MAX_TRAIL<0xff - } else if(b>BOCU1_MAX_TRAIL) { - return -99; -#endif - } else { - b-=BOCU1_TRAIL_BYTE_OFFSET; - } - - /* add trail byte into difference and decrement count */ - if(count==1) { - return b; - } else if(count==2) { - return b*BOCU1_TRAIL_COUNT; - } else /* count==3 */ { - return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); - } -} - -static void U_CALLCONV -_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - - int32_t prev, count, diff, c; - - int8_t byteIndex; - uint8_t *bytes; - - int32_t sourceIndex, nextSourceIndex; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - - /* get the converter state from UConverter */ - prev=(int32_t)cnv->toUnicodeStatus; - if(prev==0) { - prev=BOCU1_ASCII_PREV; - } - diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ - count=diff&3; - diff>>=2; - - byteIndex=cnv->toULength; - bytes=cnv->toUBytes; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=byteIndex==0 ? 0 : -1; - nextSourceIndex=0; - - /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ - if(count>0 && byteIndex>0 && targettargetLimit-target); - if(count>diff) { - count=diff; - } - while(count>0) { - if(BOCU1_START_NEG_2<=(c=*source) && c=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - - ++nextSourceIndex; - c=*source++; - if(BOCU1_START_NEG_2<=c && c=BOCU1_MIDDLE) { - diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; - } else { - diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; - } - - /* trail byte */ - ++nextSourceIndex; - c=decodeBocu1TrailByte(1, *source++); - if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { - bytes[0]=source[-2]; - bytes[1]=source[-1]; - byteIndex=2; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } else if(c==BOCU1_RESET) { - /* only reset the state, no code point */ - prev=BOCU1_ASCII_PREV; - sourceIndex=nextSourceIndex; - continue; - } else { - /* - * For multi-byte difference lead bytes, set the decoder state - * with the partial difference value from the lead byte and - * with the number of trail bytes. - */ - bytes[0]=(uint8_t)c; - byteIndex=1; - - diff=decodeBocu1LeadByte(c); - count=diff&3; - diff>>=2; -getTrail: - for(;;) { - if(source>=sourceLimit) { - goto endloop; - } - ++nextSourceIndex; - c=bytes[byteIndex++]=*source++; - - /* trail byte in any position */ - c=decodeBocu1TrailByte(count, c); - if(c<0) { - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - - diff+=c; - if(--count==0) { - /* final trail byte, deliver a code point */ - byteIndex=0; - c=prev+diff; - if((uint32_t)c>0x10ffff) { - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - break; - } - } - } - - /* calculate the next prev and output c */ - prev=BOCU1_PREV(c); - if(c<=0xffff) { - *target++=(UChar)c; - *offsets++=sourceIndex; - } else { - /* output surrogate pair */ - *target++=U16_LEAD(c); - if(targetUCharErrorBuffer[0]=U16_TRAIL(c); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - sourceIndex=nextSourceIndex; - } -endloop: - - if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { - /* set the converter state in UConverter to deal with the next character */ - cnv->toUnicodeStatus=BOCU1_ASCII_PREV; - cnv->mode=0; - } else { - /* set the converter state back into UConverter */ - cnv->toUnicodeStatus=(uint32_t)prev; - cnv->mode=(diff<<2)|count; - } - cnv->toULength=byteIndex; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; -} - -/* - * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling. - * If a change is made in the original function, then either - * change this function the same way or - * re-copy the original function and remove the variables - * offsets, sourceIndex, and nextSourceIndex. - */ -static void U_CALLCONV -_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - - int32_t prev, count, diff, c; - - int8_t byteIndex; - uint8_t *bytes; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - - /* get the converter state from UConverter */ - prev=(int32_t)cnv->toUnicodeStatus; - if(prev==0) { - prev=BOCU1_ASCII_PREV; - } - diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ - count=diff&3; - diff>>=2; - - byteIndex=cnv->toULength; - bytes=cnv->toUBytes; - - /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ - if(count>0 && byteIndex>0 && targettargetLimit-target); - if(count>diff) { - count=diff; - } - while(count>0) { - if(BOCU1_START_NEG_2<=(c=*source) && c=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - - c=*source++; - if(BOCU1_START_NEG_2<=c && c=BOCU1_MIDDLE) { - diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; - } else { - diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; - } - - /* trail byte */ - c=decodeBocu1TrailByte(1, *source++); - if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { - bytes[0]=source[-2]; - bytes[1]=source[-1]; - byteIndex=2; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } else if(c==BOCU1_RESET) { - /* only reset the state, no code point */ - prev=BOCU1_ASCII_PREV; - continue; - } else { - /* - * For multi-byte difference lead bytes, set the decoder state - * with the partial difference value from the lead byte and - * with the number of trail bytes. - */ - bytes[0]=(uint8_t)c; - byteIndex=1; - - diff=decodeBocu1LeadByte(c); - count=diff&3; - diff>>=2; -getTrail: - for(;;) { - if(source>=sourceLimit) { - goto endloop; - } - c=bytes[byteIndex++]=*source++; - - /* trail byte in any position */ - c=decodeBocu1TrailByte(count, c); - if(c<0) { - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - - diff+=c; - if(--count==0) { - /* final trail byte, deliver a code point */ - byteIndex=0; - c=prev+diff; - if((uint32_t)c>0x10ffff) { - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - goto endloop; - } - break; - } - } - } - - /* calculate the next prev and output c */ - prev=BOCU1_PREV(c); - if(c<=0xffff) { - *target++=(UChar)c; - } else { - /* output surrogate pair */ - *target++=U16_LEAD(c); - if(targetUCharErrorBuffer[0]=U16_TRAIL(c); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - } -endloop: - - if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { - /* set the converter state in UConverter to deal with the next character */ - cnv->toUnicodeStatus=BOCU1_ASCII_PREV; - cnv->mode=0; - } else { - /* set the converter state back into UConverter */ - cnv->toUnicodeStatus=(uint32_t)prev; - cnv->mode=(diff<<2)|count; - } - cnv->toULength=byteIndex; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - return; -} - -/* miscellaneous ------------------------------------------------------------ */ - -static const UConverterImpl _Bocu1Impl={ - UCNV_BOCU1, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - _Bocu1ToUnicode, - _Bocu1ToUnicodeWithOffsets, - _Bocu1FromUnicode, - _Bocu1FromUnicodeWithOffsets, - NULL, - - NULL, - NULL, - NULL, - NULL, - ucnv_getCompleteUnicodeSet, - - NULL, - NULL -}; - -static const UConverterStaticData _Bocu1StaticData={ - sizeof(UConverterStaticData), - "BOCU-1", - 1214, /* CCSID for BOCU-1 */ - UCNV_IBM, UCNV_BOCU1, - 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */ - { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */ - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _Bocu1Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnvdisp.cpp b/deps/node/deps/icu-small/source/common/ucnvdisp.cpp deleted file mode 100644 index ac86b985..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvdisp.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2004, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* ucnvdisp.c: -* Implements APIs for the ICU's codeset conversion library display names. -* -* Modification History: -* -* Date Name Description -* 04/04/99 helena Fixed internal header inclusion. -* 05/09/00 helena Added implementation to handle fallback mappings. -* 06/20/2000 helena OS/400 port changes; mostly typecast. -* 09/08/2004 grhoten split from ucnv.c -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ustring.h" -#include "unicode/ures.h" -#include "unicode/ucnv.h" -#include "cstring.h" -#include "ustr_imp.h" -#include "ucnv_imp.h" -#include "putilimp.h" - -U_CAPI int32_t U_EXPORT2 -ucnv_getDisplayName(const UConverter *cnv, - const char *displayLocale, - UChar *displayName, int32_t displayNameCapacity, - UErrorCode *pErrorCode) { - UResourceBundle *rb; - const UChar *name; - int32_t length; - UErrorCode localStatus = U_ZERO_ERROR; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* open the resource bundle and get the display name string */ - rb=ures_open(NULL, displayLocale, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* use the internal name as the key */ - name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, &localStatus); - ures_close(rb); - - if(U_SUCCESS(localStatus)) { - /* copy the string */ - if (*pErrorCode == U_ZERO_ERROR) { - *pErrorCode = localStatus; - } - u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR); - } else { - /* convert the internal name into a Unicode string */ - length=(int32_t)uprv_strlen(cnv->sharedData->staticData->name); - u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity)); - } - return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode); -} - -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/ucnvhz.cpp b/deps/node/deps/icu-small/source/common/ucnvhz.cpp deleted file mode 100644 index 31595374..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvhz.cpp +++ /dev/null @@ -1,633 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnvhz.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000oct16 -* created by: Ram Viswanadha -* 10/31/2000 Ram Implemented offsets logic function -* -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "cmemory.h" -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/uset.h" -#include "unicode/utf16.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "ucnv_imp.h" - -#define UCNV_TILDE 0x7E /* ~ */ -#define UCNV_OPEN_BRACE 0x7B /* { */ -#define UCNV_CLOSE_BRACE 0x7D /* } */ -#define SB_ESCAPE "\x7E\x7D" -#define DB_ESCAPE "\x7E\x7B" -#define TILDE_ESCAPE "\x7E\x7E" -#define ESC_LEN 2 - - -#define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \ - while(len-->0){ \ - if(targetIndex < targetLength){ \ - args->target[targetIndex] = (unsigned char) *strToAppend; \ - if(args->offsets!=NULL){ \ - *(offsets++) = sourceIndex-1; \ - } \ - targetIndex++; \ - } \ - else{ \ - args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ - *err =U_BUFFER_OVERFLOW_ERROR; \ - } \ - strToAppend++; \ - } \ -} - - -typedef struct{ - UConverter* gbConverter; - int32_t targetIndex; - int32_t sourceIndex; - UBool isEscapeAppended; - UBool isStateDBCS; - UBool isTargetUCharDBCS; - UBool isEmptySegment; -}UConverterDataHZ; - - -U_CDECL_BEGIN -static void U_CALLCONV -_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ - UConverter *gbConverter; - if(pArgs->onlyTestIsLoadable) { - ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ - return; - } - gbConverter = ucnv_open("GBK", errorCode); - if(U_FAILURE(*errorCode)) { - return; - } - cnv->toUnicodeStatus = 0; - cnv->fromUnicodeStatus= 0; - cnv->mode=0; - cnv->fromUChar32=0x0000; - cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); - if(cnv->extraInfo != NULL){ - ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; - } - else { - ucnv_close(gbConverter); - *errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } -} - -static void U_CALLCONV -_HZClose(UConverter *cnv){ - if(cnv->extraInfo != NULL) { - ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); - if(!cnv->isExtraLocal) { - uprv_free(cnv->extraInfo); - } - cnv->extraInfo = NULL; - } -} - -static void U_CALLCONV -_HZReset(UConverter *cnv, UConverterResetChoice choice){ - if(choice<=UCNV_RESET_TO_UNICODE) { - cnv->toUnicodeStatus = 0; - cnv->mode=0; - if(cnv->extraInfo != NULL){ - ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; - ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; - } - } - if(choice!=UCNV_RESET_TO_UNICODE) { - cnv->fromUnicodeStatus= 0; - cnv->fromUChar32=0x0000; - if(cnv->extraInfo != NULL){ - ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; - ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; - ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; - ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; - } - } -} - -/**************************************HZ Encoding************************************************* -* Rules for HZ encoding -* -* In ASCII mode, a byte is interpreted as an ASCII character, unless a -* '~' is encountered. The character '~' is an escape character. By -* convention, it must be immediately followed ONLY by '~', '{' or '\n' -* (), with the following special meaning. - -* 1. The escape sequence '~~' is interpreted as a '~'. -* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. -* 3. The escape sequence '~\n' is a line-continuation marker to be -* consumed with no output produced. -* In GB mode, characters are interpreted two bytes at a time as (pure) -* GB codes until the escape-from-GB code '~}' is read. This code -* switches the mode from GB back to ASCII. (Note that the escape- -* from-GB code '~}' ($7E7D) is outside the defined GB range.) -* -* Source: RFC 1842 -* -* Note that the formal syntax in RFC 1842 is invalid. I assume that the -* intended definition of single-byte-segment is as follows (pedberg): -* single-byte-segment = single-byte-seq 1*single-byte-char -*/ - - -static void U_CALLCONV -UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, - UErrorCode* err){ - char tempBuf[2]; - const char *mySource = ( char *) args->source; - UChar *myTarget = args->target; - const char *mySourceLimit = args->sourceLimit; - UChar32 targetUniChar = 0x0000; - int32_t mySourceChar = 0x0000; - UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); - tempBuf[0]=0; - tempBuf[1]=0; - - /* Calling code already handles this situation. */ - /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - }*/ - - while(mySource< mySourceLimit){ - - if(myTarget < args->targetLimit){ - - mySourceChar= (unsigned char) *mySource++; - - if(args->converter->mode == UCNV_TILDE) { - /* second byte after ~ */ - args->converter->mode=0; - switch(mySourceChar) { - case 0x0A: - /* no output for ~\n (line-continuation marker) */ - continue; - case UCNV_TILDE: - if(args->offsets) { - args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); - } - *(myTarget++)=(UChar)mySourceChar; - myData->isEmptySegment = FALSE; - continue; - case UCNV_OPEN_BRACE: - case UCNV_CLOSE_BRACE: - myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); - if (myData->isEmptySegment) { - myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUCallbackReason = UCNV_IRREGULAR; - args->converter->toUBytes[0] = UCNV_TILDE; - args->converter->toUBytes[1] = static_cast(mySourceChar); - args->converter->toULength = 2; - args->target = myTarget; - args->source = mySource; - return; - } - myData->isEmptySegment = TRUE; - continue; - default: - /* if the first byte is equal to TILDE and the trail byte - * is not a valid byte then it is an error condition - */ - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - */ - myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ - *err = U_ILLEGAL_ESCAPE_SEQUENCE; - args->converter->toUBytes[0] = UCNV_TILDE; - if( myData->isStateDBCS ? - (0x21 <= mySourceChar && mySourceChar <= 0x7e) : - mySourceChar <= 0x7f - ) { - /* The current byte could be the start of a character: Back it out. */ - args->converter->toULength = 1; - --mySource; - } else { - /* Include the current byte in the illegal sequence. */ - args->converter->toUBytes[1] = static_cast(mySourceChar); - args->converter->toULength = 2; - } - args->target = myTarget; - args->source = mySource; - return; - } - } else if(myData->isStateDBCS) { - if(args->converter->toUnicodeStatus == 0x00){ - /* lead byte */ - if(mySourceChar == UCNV_TILDE) { - args->converter->mode = UCNV_TILDE; - } else { - /* add another bit to distinguish a 0 byte from not having seen a lead byte */ - args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); - myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ - } - continue; - } - else{ - /* trail byte */ - int leadIsOk, trailIsOk; - uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; - targetUniChar = 0xffff; - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - * - * In HZ DBCS, if the second byte is in the 21..7e range, - * we report only the first byte as the illegal sequence. - * Otherwise we convert or report the pair of bytes. - */ - leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); - trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); - if (leadIsOk && trailIsOk) { - tempBuf[0] = (char) (leadByte+0x80) ; - tempBuf[1] = (char) (mySourceChar+0x80); - targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, - tempBuf, 2, args->converter->useFallback); - mySourceChar= (leadByte << 8) | mySourceChar; - } else if (trailIsOk) { - /* report a single illegal byte and continue with the following DBCS starter byte */ - --mySource; - mySourceChar = (int32_t)leadByte; - } else { - /* report a pair of illegal bytes if the second byte is not a DBCS starter */ - /* add another bit so that the code below writes 2 bytes in case of error */ - mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; - } - args->converter->toUnicodeStatus =0x00; - } - } - else{ - if(mySourceChar == UCNV_TILDE) { - args->converter->mode = UCNV_TILDE; - continue; - } else if(mySourceChar <= 0x7f) { - targetUniChar = (UChar)mySourceChar; /* ASCII */ - myData->isEmptySegment = FALSE; /* the segment has something valid */ - } else { - targetUniChar = 0xffff; - myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ - } - } - if(targetUniChar < 0xfffe){ - if(args->offsets) { - args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); - } - - *(myTarget++)=(UChar)targetUniChar; - } - else /* targetUniChar>=0xfffe */ { - if(targetUniChar == 0xfffe){ - *err = U_INVALID_CHAR_FOUND; - } - else{ - *err = U_ILLEGAL_CHAR_FOUND; - } - if(mySourceChar > 0xff){ - args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); - args->converter->toUBytes[1] = (uint8_t)mySourceChar; - args->converter->toULength=2; - } - else{ - args->converter->toUBytes[0] = (uint8_t)mySourceChar; - args->converter->toULength=1; - } - break; - } - } - else{ - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - args->target = myTarget; - args->source = mySource; -} - - -static void U_CALLCONV -UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, - UErrorCode * err){ - const UChar *mySource = args->source; - char *myTarget = args->target; - int32_t* offsets = args->offsets; - int32_t mySourceIndex = 0; - int32_t myTargetIndex = 0; - int32_t targetLength = (int32_t)(args->targetLimit - myTarget); - int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); - uint32_t targetUniChar = 0x0000; - UChar32 mySourceChar = 0x0000; - UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; - UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; - UBool oldIsTargetUCharDBCS; - int len =0; - const char* escSeq=NULL; - - /* Calling code already handles this situation. */ - /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - }*/ - if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { - goto getTrail; - } - /*writing the char to the output stream */ - while (mySourceIndex < mySourceLength){ - targetUniChar = missingCharMarker; - if (myTargetIndex < targetLength){ - - mySourceChar = (UChar) mySource[mySourceIndex++]; - - - oldIsTargetUCharDBCS = isTargetUCharDBCS; - if(mySourceChar ==UCNV_TILDE){ - /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ - len = ESC_LEN; - escSeq = TILDE_ESCAPE; - CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - continue; - } else if(mySourceChar <= 0x7f) { - targetUniChar = mySourceChar; - } else { - int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, - mySourceChar,&targetUniChar,args->converter->useFallback); - /* we can only use lead bytes 21..7D and trail bytes 21..7E */ - if( length == 2 && - (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && - (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) - ) { - targetUniChar -= 0x8080; - } else { - targetUniChar = missingCharMarker; - } - } - if (targetUniChar != missingCharMarker){ - myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); - if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ - /*Shifting from a double byte to single byte mode*/ - if(!isTargetUCharDBCS){ - len =ESC_LEN; - escSeq = SB_ESCAPE; - CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - myConverterData->isEscapeAppended = TRUE; - } - else{ /* Shifting from a single byte to double byte mode*/ - len =ESC_LEN; - escSeq = DB_ESCAPE; - CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - myConverterData->isEscapeAppended = TRUE; - - } - } - - if(isTargetUCharDBCS){ - if( myTargetIndex > 8); - if(offsets){ - *(offsets++) = mySourceIndex-1; - } - if(myTargetIndex < targetLength){ - myTarget[myTargetIndex++] =(char) targetUniChar; - if(offsets){ - *(offsets++) = mySourceIndex-1; - } - }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; - *err = U_BUFFER_OVERFLOW_ERROR; - } - }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; - *err = U_BUFFER_OVERFLOW_ERROR; - } - - }else{ - if( myTargetIndex converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; - *err = U_BUFFER_OVERFLOW_ERROR; - } - } - - } - else{ - /* oops.. the code point is unassigned */ - /*Handle surrogates */ - /*check if the char is a First surrogate*/ - if(U16_IS_SURROGATE(mySourceChar)) { - if(U16_IS_SURROGATE_LEAD(mySourceChar)) { - args->converter->fromUChar32=mySourceChar; -getTrail: - /*look ahead to find the trail surrogate*/ - if(mySourceIndex < mySourceLength) { - /* test the following code unit */ - UChar trail=(UChar) args->source[mySourceIndex]; - if(U16_IS_TRAIL(trail)) { - ++mySourceIndex; - mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); - args->converter->fromUChar32=0x00; - /* there are no surrogates in GB2312*/ - *err = U_INVALID_CHAR_FOUND; - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* no more input */ - *err = U_ZERO_ERROR; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* callback(unassigned) for a BMP code point */ - *err = U_INVALID_CHAR_FOUND; - } - - args->converter->fromUChar32=mySourceChar; - break; - } - } - else{ - *err = U_BUFFER_OVERFLOW_ERROR; - break; - } - targetUniChar=missingCharMarker; - } - - args->target += myTargetIndex; - args->source += mySourceIndex; - myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; -} - -static void U_CALLCONV -_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { - UConverter *cnv = args->converter; - UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; - char *p; - char buffer[4]; - p = buffer; - - if( convData->isTargetUCharDBCS){ - *p++= UCNV_TILDE; - *p++= UCNV_CLOSE_BRACE; - convData->isTargetUCharDBCS=FALSE; - } - *p++= (char)cnv->subChars[0]; - - ucnv_cbFromUWriteBytes(args, - buffer, (int32_t)(p - buffer), - offsetIndex, err); -} - -/* - * Structure for cloning an HZ converter into a single memory block. - * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, - * and then ucnv_safeClone() of the sub-converter may additionally align - * subCnv inside the cloneHZStruct, for which we need the deadSpace after - * subCnv. This is because UAlignedMemory may be larger than the actually - * necessary alignment size for the platform. - * The other cloneHZStruct fields will not be moved around, - * and are aligned properly with cloneHZStruct's alignment. - */ -struct cloneHZStruct -{ - UConverter cnv; - UConverter subCnv; - UAlignedMemory deadSpace; - UConverterDataHZ mydata; -}; - - -static UConverter * U_CALLCONV -_HZ_SafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneHZStruct * localClone; - int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); - - if (U_FAILURE(*status)){ - return 0; - } - - if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - return 0; - } - - localClone = (struct cloneHZStruct *)stackBuffer; - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); - localClone->cnv.extraInfo = &localClone->mydata; - localClone->cnv.isExtraLocal = TRUE; - - /* deep-clone the sub-converter */ - size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ - ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = - ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); - - return &localClone->cnv; -} - -static void U_CALLCONV -_HZ_GetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - /* HZ converts all of ASCII */ - sa->addRange(sa->set, 0, 0x7f); - - /* add all of the code points that the sub-converter handles */ - ucnv_MBCSGetFilteredUnicodeSetForUnicode( - ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, - sa, which, UCNV_SET_FILTER_HZ, - pErrorCode); -} -U_CDECL_END -static const UConverterImpl _HZImpl={ - - UCNV_HZ, - - NULL, - NULL, - - _HZOpen, - _HZClose, - _HZReset, - - UConverter_toUnicode_HZ_OFFSETS_LOGIC, - UConverter_toUnicode_HZ_OFFSETS_LOGIC, - UConverter_fromUnicode_HZ_OFFSETS_LOGIC, - UConverter_fromUnicode_HZ_OFFSETS_LOGIC, - NULL, - - NULL, - NULL, - _HZ_WriteSub, - _HZ_SafeClone, - _HZ_GetUnicodeSet, - NULL, - NULL -}; - -static const UConverterStaticData _HZStaticData={ - sizeof(UConverterStaticData), - "HZ", - 0, - UCNV_IBM, - UCNV_HZ, - 1, - 4, - { 0x1a, 0, 0, 0 }, - 1, - FALSE, - FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ - -}; - -const UConverterSharedData _HZData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl); - -#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ucnvisci.cpp b/deps/node/deps/icu-small/source/common/ucnvisci.cpp deleted file mode 100644 index d0c07f2b..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvisci.cpp +++ /dev/null @@ -1,1635 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnvisci.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001JUN26 -* created by: Ram Viswanadha -* -* Date Name Description -* 24/7/2001 Ram Added support for EXT character handling -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cstring.h" -#include "uassert.h" - -#define UCNV_OPTIONS_VERSION_MASK 0xf -#define NUKTA 0x093c -#define HALANT 0x094d -#define ZWNJ 0x200c /* Zero Width Non Joiner */ -#define ZWJ 0x200d /* Zero width Joiner */ -#define INVALID_CHAR 0xffff -#define ATR 0xEF /* Attribute code */ -#define EXT 0xF0 /* Extension code */ -#define DANDA 0x0964 -#define DOUBLE_DANDA 0x0965 -#define ISCII_NUKTA 0xE9 -#define ISCII_HALANT 0xE8 -#define ISCII_DANDA 0xEA -#define ISCII_INV 0xD9 -#define ISCII_VOWEL_SIGN_E 0xE0 -#define INDIC_BLOCK_BEGIN 0x0900 -#define INDIC_BLOCK_END 0x0D7F -#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) -#define VOCALLIC_RR 0x0931 -#define LF 0x0A -#define ASCII_END 0xA0 -#define NO_CHAR_MARKER 0xFFFE -#define TELUGU_DELTA DELTA * TELUGU -#define DEV_ABBR_SIGN 0x0970 -#define DEV_ANUDATTA 0x0952 -#define EXT_RANGE_BEGIN 0xA1 -#define EXT_RANGE_END 0xEE - -#define PNJ_DELTA 0x0100 -#define PNJ_BINDI 0x0A02 -#define PNJ_TIPPI 0x0A70 -#define PNJ_SIGN_VIRAMA 0x0A4D -#define PNJ_ADHAK 0x0A71 -#define PNJ_HA 0x0A39 -#define PNJ_RRA 0x0A5C - -typedef enum { - DEVANAGARI =0, - BENGALI, - GURMUKHI, - GUJARATI, - ORIYA, - TAMIL, - TELUGU, - KANNADA, - MALAYALAM, - DELTA=0x80 -}UniLang; - -/** - * Enumeration for switching code pages if + - * is encountered - */ -typedef enum { - DEF = 0x40, - RMN = 0x41, - DEV = 0x42, - BNG = 0x43, - TML = 0x44, - TLG = 0x45, - ASM = 0x46, - ORI = 0x47, - KND = 0x48, - MLM = 0x49, - GJR = 0x4A, - PNJ = 0x4B, - ARB = 0x71, - PES = 0x72, - URD = 0x73, - SND = 0x74, - KSM = 0x75, - PST = 0x76 -}ISCIILang; - -typedef enum { - DEV_MASK =0x80, - PNJ_MASK =0x40, - GJR_MASK =0x20, - ORI_MASK =0x10, - BNG_MASK =0x08, - KND_MASK =0x04, - MLM_MASK =0x02, - TML_MASK =0x01, - ZERO =0x00 -}MaskEnum; - -#define ISCII_CNV_PREFIX "ISCII,version=" - -typedef struct { - UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ - UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ - uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ - uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ - uint16_t currentDeltaToUnicode; /* current delta in Indic block */ - MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ - MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ - MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ - UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ - UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ - char name[sizeof(ISCII_CNV_PREFIX) + 1]; - UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ -} UConverterDataISCII; - -typedef struct LookupDataStruct { - UniLang uniLang; - MaskEnum maskEnum; - ISCIILang isciiLang; -} LookupDataStruct; - -static const LookupDataStruct lookupInitialData[]={ - { DEVANAGARI, DEV_MASK, DEV }, - { BENGALI, BNG_MASK, BNG }, - { GURMUKHI, PNJ_MASK, PNJ }, - { GUJARATI, GJR_MASK, GJR }, - { ORIYA, ORI_MASK, ORI }, - { TAMIL, TML_MASK, TML }, - { TELUGU, KND_MASK, TLG }, - { KANNADA, KND_MASK, KND }, - { MALAYALAM, MLM_MASK, MLM } -}; - -/* - * For special handling of certain Gurmukhi characters. - * Bit 0 (value 1): PNJ consonant - * Bit 1 (value 2): PNJ Bindi Tippi - */ -static const uint8_t pnjMap[80] = { - /* 0A00..0A0F */ - 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0A10..0A1F */ - 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - /* 0A20..0A2F */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, - /* 0A30..0A3F */ - 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, - /* 0A40..0A4F */ - 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static UBool -isPNJConsonant(UChar32 c) { - if (c < 0xa00 || 0xa50 <= c) { - return FALSE; - } else { - return (UBool)(pnjMap[c - 0xa00] & 1); - } -} - -static UBool -isPNJBindiTippi(UChar32 c) { - if (c < 0xa00 || 0xa50 <= c) { - return FALSE; - } else { - return (UBool)(pnjMap[c - 0xa00] >> 1); - } -} -U_CDECL_BEGIN -static void U_CALLCONV -_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { - if(pArgs->onlyTestIsLoadable) { - return; - } - - cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); - - if (cnv->extraInfo != NULL) { - int32_t len=0; - UConverterDataISCII *converterData= - (UConverterDataISCII *) cnv->extraInfo; - converterData->contextCharToUnicode=NO_CHAR_MARKER; - cnv->toUnicodeStatus = missingCharMarker; - converterData->contextCharFromUnicode=0x0000; - converterData->resetToDefaultToUnicode=FALSE; - /* check if the version requested is supported */ - if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { - /* initialize state variables */ - converterData->currentDeltaFromUnicode - = converterData->currentDeltaToUnicode - = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); - - converterData->currentMaskFromUnicode - = converterData->currentMaskToUnicode - = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; - - converterData->isFirstBuffer=TRUE; - (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); - len = (int32_t)uprv_strlen(converterData->name); - converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); - converterData->name[len+1]=0; - - converterData->prevToUnicodeStatus = 0x0000; - } else { - uprv_free(cnv->extraInfo); - cnv->extraInfo = NULL; - *errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - - } else { - *errorCode =U_MEMORY_ALLOCATION_ERROR; - } -} - -static void U_CALLCONV -_ISCIIClose(UConverter *cnv) { - if (cnv->extraInfo!=NULL) { - if (!cnv->isExtraLocal) { - uprv_free(cnv->extraInfo); - } - cnv->extraInfo=NULL; - } -} - -static const char* U_CALLCONV -_ISCIIgetName(const UConverter* cnv) { - if (cnv->extraInfo) { - UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; - return myData->name; - } - return NULL; -} - -static void U_CALLCONV -_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { - UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); - if (choice<=UCNV_RESET_TO_UNICODE) { - cnv->toUnicodeStatus = missingCharMarker; - cnv->mode=0; - data->currentDeltaToUnicode=data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - data->contextCharToUnicode=NO_CHAR_MARKER; - data->prevToUnicodeStatus = 0x0000; - } - if (choice!=UCNV_RESET_TO_UNICODE) { - cnv->fromUChar32=0x0000; - data->contextCharFromUnicode=0x00; - data->currentMaskFromUnicode=data->defMaskToUnicode; - data->currentDeltaFromUnicode=data->defDeltaToUnicode; - data->isFirstBuffer=TRUE; - data->resetToDefaultToUnicode=FALSE; - } -} - -/** - * The values in validity table are indexed by the lower bits of Unicode - * range 0x0900 - 0x09ff. The values have a structure like: - * --------------------------------------------------------------- - * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | - * | | | | | ASM | KND | | | - * --------------------------------------------------------------- - * If a code point is valid in a particular script - * then that bit is turned on - * - * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for - * to represent these languages - * - * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case - * and combine and use 1 bit to represent these languages. - * - * TODO: It is probably easier to understand and maintain to change this - * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. - */ - -static const uint8_t validityTable[128] = { -/* This state table is tool generated please do not edit unless you know exactly what you are doing */ -/* Note: This table was edited to mirror the Windows XP implementation */ -/*ISCII:Valid:Unicode */ -/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , -/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , -/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , -/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , -/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , -/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , -/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/* - * The length of the array is 128 to provide values for 0x900..0x97f. - * The last 15 entries for 0x971..0x97f of the validity table are all zero - * because no Indic script uses such Unicode code points. - */ -/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO -}; - -static const uint16_t fromUnicodeTable[128]={ - 0x00a0 ,/* 0x0900 */ - 0x00a1 ,/* 0x0901 */ - 0x00a2 ,/* 0x0902 */ - 0x00a3 ,/* 0x0903 */ - 0xa4e0 ,/* 0x0904 */ - 0x00a4 ,/* 0x0905 */ - 0x00a5 ,/* 0x0906 */ - 0x00a6 ,/* 0x0907 */ - 0x00a7 ,/* 0x0908 */ - 0x00a8 ,/* 0x0909 */ - 0x00a9 ,/* 0x090a */ - 0x00aa ,/* 0x090b */ - 0xA6E9 ,/* 0x090c */ - 0x00ae ,/* 0x090d */ - 0x00ab ,/* 0x090e */ - 0x00ac ,/* 0x090f */ - 0x00ad ,/* 0x0910 */ - 0x00b2 ,/* 0x0911 */ - 0x00af ,/* 0x0912 */ - 0x00b0 ,/* 0x0913 */ - 0x00b1 ,/* 0x0914 */ - 0x00b3 ,/* 0x0915 */ - 0x00b4 ,/* 0x0916 */ - 0x00b5 ,/* 0x0917 */ - 0x00b6 ,/* 0x0918 */ - 0x00b7 ,/* 0x0919 */ - 0x00b8 ,/* 0x091a */ - 0x00b9 ,/* 0x091b */ - 0x00ba ,/* 0x091c */ - 0x00bb ,/* 0x091d */ - 0x00bc ,/* 0x091e */ - 0x00bd ,/* 0x091f */ - 0x00be ,/* 0x0920 */ - 0x00bf ,/* 0x0921 */ - 0x00c0 ,/* 0x0922 */ - 0x00c1 ,/* 0x0923 */ - 0x00c2 ,/* 0x0924 */ - 0x00c3 ,/* 0x0925 */ - 0x00c4 ,/* 0x0926 */ - 0x00c5 ,/* 0x0927 */ - 0x00c6 ,/* 0x0928 */ - 0x00c7 ,/* 0x0929 */ - 0x00c8 ,/* 0x092a */ - 0x00c9 ,/* 0x092b */ - 0x00ca ,/* 0x092c */ - 0x00cb ,/* 0x092d */ - 0x00cc ,/* 0x092e */ - 0x00cd ,/* 0x092f */ - 0x00cf ,/* 0x0930 */ - 0x00d0 ,/* 0x0931 */ - 0x00d1 ,/* 0x0932 */ - 0x00d2 ,/* 0x0933 */ - 0x00d3 ,/* 0x0934 */ - 0x00d4 ,/* 0x0935 */ - 0x00d5 ,/* 0x0936 */ - 0x00d6 ,/* 0x0937 */ - 0x00d7 ,/* 0x0938 */ - 0x00d8 ,/* 0x0939 */ - 0xFFFF ,/* 0x093A */ - 0xFFFF ,/* 0x093B */ - 0x00e9 ,/* 0x093c */ - 0xEAE9 ,/* 0x093d */ - 0x00da ,/* 0x093e */ - 0x00db ,/* 0x093f */ - 0x00dc ,/* 0x0940 */ - 0x00dd ,/* 0x0941 */ - 0x00de ,/* 0x0942 */ - 0x00df ,/* 0x0943 */ - 0xDFE9 ,/* 0x0944 */ - 0x00e3 ,/* 0x0945 */ - 0x00e0 ,/* 0x0946 */ - 0x00e1 ,/* 0x0947 */ - 0x00e2 ,/* 0x0948 */ - 0x00e7 ,/* 0x0949 */ - 0x00e4 ,/* 0x094a */ - 0x00e5 ,/* 0x094b */ - 0x00e6 ,/* 0x094c */ - 0x00e8 ,/* 0x094d */ - 0x00ec ,/* 0x094e */ - 0x00ed ,/* 0x094f */ - 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ - 0xFFFF ,/* 0x0951 */ - 0xF0B8 ,/* 0x0952 */ - 0xFFFF ,/* 0x0953 */ - 0xFFFF ,/* 0x0954 */ - 0xFFFF ,/* 0x0955 */ - 0xFFFF ,/* 0x0956 */ - 0xFFFF ,/* 0x0957 */ - 0xb3e9 ,/* 0x0958 */ - 0xb4e9 ,/* 0x0959 */ - 0xb5e9 ,/* 0x095a */ - 0xbae9 ,/* 0x095b */ - 0xbfe9 ,/* 0x095c */ - 0xC0E9 ,/* 0x095d */ - 0xc9e9 ,/* 0x095e */ - 0x00ce ,/* 0x095f */ - 0xAAe9 ,/* 0x0960 */ - 0xA7E9 ,/* 0x0961 */ - 0xDBE9 ,/* 0x0962 */ - 0xDCE9 ,/* 0x0963 */ - 0x00ea ,/* 0x0964 */ - 0xeaea ,/* 0x0965 */ - 0x00f1 ,/* 0x0966 */ - 0x00f2 ,/* 0x0967 */ - 0x00f3 ,/* 0x0968 */ - 0x00f4 ,/* 0x0969 */ - 0x00f5 ,/* 0x096a */ - 0x00f6 ,/* 0x096b */ - 0x00f7 ,/* 0x096c */ - 0x00f8 ,/* 0x096d */ - 0x00f9 ,/* 0x096e */ - 0x00fa ,/* 0x096f */ - 0xF0BF ,/* 0x0970 */ - 0xFFFF ,/* 0x0971 */ - 0xFFFF ,/* 0x0972 */ - 0xFFFF ,/* 0x0973 */ - 0xFFFF ,/* 0x0974 */ - 0xFFFF ,/* 0x0975 */ - 0xFFFF ,/* 0x0976 */ - 0xFFFF ,/* 0x0977 */ - 0xFFFF ,/* 0x0978 */ - 0xFFFF ,/* 0x0979 */ - 0xFFFF ,/* 0x097a */ - 0xFFFF ,/* 0x097b */ - 0xFFFF ,/* 0x097c */ - 0xFFFF ,/* 0x097d */ - 0xFFFF ,/* 0x097e */ - 0xFFFF ,/* 0x097f */ -}; -static const uint16_t toUnicodeTable[256]={ - 0x0000,/* 0x00 */ - 0x0001,/* 0x01 */ - 0x0002,/* 0x02 */ - 0x0003,/* 0x03 */ - 0x0004,/* 0x04 */ - 0x0005,/* 0x05 */ - 0x0006,/* 0x06 */ - 0x0007,/* 0x07 */ - 0x0008,/* 0x08 */ - 0x0009,/* 0x09 */ - 0x000a,/* 0x0a */ - 0x000b,/* 0x0b */ - 0x000c,/* 0x0c */ - 0x000d,/* 0x0d */ - 0x000e,/* 0x0e */ - 0x000f,/* 0x0f */ - 0x0010,/* 0x10 */ - 0x0011,/* 0x11 */ - 0x0012,/* 0x12 */ - 0x0013,/* 0x13 */ - 0x0014,/* 0x14 */ - 0x0015,/* 0x15 */ - 0x0016,/* 0x16 */ - 0x0017,/* 0x17 */ - 0x0018,/* 0x18 */ - 0x0019,/* 0x19 */ - 0x001a,/* 0x1a */ - 0x001b,/* 0x1b */ - 0x001c,/* 0x1c */ - 0x001d,/* 0x1d */ - 0x001e,/* 0x1e */ - 0x001f,/* 0x1f */ - 0x0020,/* 0x20 */ - 0x0021,/* 0x21 */ - 0x0022,/* 0x22 */ - 0x0023,/* 0x23 */ - 0x0024,/* 0x24 */ - 0x0025,/* 0x25 */ - 0x0026,/* 0x26 */ - 0x0027,/* 0x27 */ - 0x0028,/* 0x28 */ - 0x0029,/* 0x29 */ - 0x002a,/* 0x2a */ - 0x002b,/* 0x2b */ - 0x002c,/* 0x2c */ - 0x002d,/* 0x2d */ - 0x002e,/* 0x2e */ - 0x002f,/* 0x2f */ - 0x0030,/* 0x30 */ - 0x0031,/* 0x31 */ - 0x0032,/* 0x32 */ - 0x0033,/* 0x33 */ - 0x0034,/* 0x34 */ - 0x0035,/* 0x35 */ - 0x0036,/* 0x36 */ - 0x0037,/* 0x37 */ - 0x0038,/* 0x38 */ - 0x0039,/* 0x39 */ - 0x003A,/* 0x3A */ - 0x003B,/* 0x3B */ - 0x003c,/* 0x3c */ - 0x003d,/* 0x3d */ - 0x003e,/* 0x3e */ - 0x003f,/* 0x3f */ - 0x0040,/* 0x40 */ - 0x0041,/* 0x41 */ - 0x0042,/* 0x42 */ - 0x0043,/* 0x43 */ - 0x0044,/* 0x44 */ - 0x0045,/* 0x45 */ - 0x0046,/* 0x46 */ - 0x0047,/* 0x47 */ - 0x0048,/* 0x48 */ - 0x0049,/* 0x49 */ - 0x004a,/* 0x4a */ - 0x004b,/* 0x4b */ - 0x004c,/* 0x4c */ - 0x004d,/* 0x4d */ - 0x004e,/* 0x4e */ - 0x004f,/* 0x4f */ - 0x0050,/* 0x50 */ - 0x0051,/* 0x51 */ - 0x0052,/* 0x52 */ - 0x0053,/* 0x53 */ - 0x0054,/* 0x54 */ - 0x0055,/* 0x55 */ - 0x0056,/* 0x56 */ - 0x0057,/* 0x57 */ - 0x0058,/* 0x58 */ - 0x0059,/* 0x59 */ - 0x005a,/* 0x5a */ - 0x005b,/* 0x5b */ - 0x005c,/* 0x5c */ - 0x005d,/* 0x5d */ - 0x005e,/* 0x5e */ - 0x005f,/* 0x5f */ - 0x0060,/* 0x60 */ - 0x0061,/* 0x61 */ - 0x0062,/* 0x62 */ - 0x0063,/* 0x63 */ - 0x0064,/* 0x64 */ - 0x0065,/* 0x65 */ - 0x0066,/* 0x66 */ - 0x0067,/* 0x67 */ - 0x0068,/* 0x68 */ - 0x0069,/* 0x69 */ - 0x006a,/* 0x6a */ - 0x006b,/* 0x6b */ - 0x006c,/* 0x6c */ - 0x006d,/* 0x6d */ - 0x006e,/* 0x6e */ - 0x006f,/* 0x6f */ - 0x0070,/* 0x70 */ - 0x0071,/* 0x71 */ - 0x0072,/* 0x72 */ - 0x0073,/* 0x73 */ - 0x0074,/* 0x74 */ - 0x0075,/* 0x75 */ - 0x0076,/* 0x76 */ - 0x0077,/* 0x77 */ - 0x0078,/* 0x78 */ - 0x0079,/* 0x79 */ - 0x007a,/* 0x7a */ - 0x007b,/* 0x7b */ - 0x007c,/* 0x7c */ - 0x007d,/* 0x7d */ - 0x007e,/* 0x7e */ - 0x007f,/* 0x7f */ - 0x0080,/* 0x80 */ - 0x0081,/* 0x81 */ - 0x0082,/* 0x82 */ - 0x0083,/* 0x83 */ - 0x0084,/* 0x84 */ - 0x0085,/* 0x85 */ - 0x0086,/* 0x86 */ - 0x0087,/* 0x87 */ - 0x0088,/* 0x88 */ - 0x0089,/* 0x89 */ - 0x008a,/* 0x8a */ - 0x008b,/* 0x8b */ - 0x008c,/* 0x8c */ - 0x008d,/* 0x8d */ - 0x008e,/* 0x8e */ - 0x008f,/* 0x8f */ - 0x0090,/* 0x90 */ - 0x0091,/* 0x91 */ - 0x0092,/* 0x92 */ - 0x0093,/* 0x93 */ - 0x0094,/* 0x94 */ - 0x0095,/* 0x95 */ - 0x0096,/* 0x96 */ - 0x0097,/* 0x97 */ - 0x0098,/* 0x98 */ - 0x0099,/* 0x99 */ - 0x009a,/* 0x9a */ - 0x009b,/* 0x9b */ - 0x009c,/* 0x9c */ - 0x009d,/* 0x9d */ - 0x009e,/* 0x9e */ - 0x009f,/* 0x9f */ - 0x00A0,/* 0xa0 */ - 0x0901,/* 0xa1 */ - 0x0902,/* 0xa2 */ - 0x0903,/* 0xa3 */ - 0x0905,/* 0xa4 */ - 0x0906,/* 0xa5 */ - 0x0907,/* 0xa6 */ - 0x0908,/* 0xa7 */ - 0x0909,/* 0xa8 */ - 0x090a,/* 0xa9 */ - 0x090b,/* 0xaa */ - 0x090e,/* 0xab */ - 0x090f,/* 0xac */ - 0x0910,/* 0xad */ - 0x090d,/* 0xae */ - 0x0912,/* 0xaf */ - 0x0913,/* 0xb0 */ - 0x0914,/* 0xb1 */ - 0x0911,/* 0xb2 */ - 0x0915,/* 0xb3 */ - 0x0916,/* 0xb4 */ - 0x0917,/* 0xb5 */ - 0x0918,/* 0xb6 */ - 0x0919,/* 0xb7 */ - 0x091a,/* 0xb8 */ - 0x091b,/* 0xb9 */ - 0x091c,/* 0xba */ - 0x091d,/* 0xbb */ - 0x091e,/* 0xbc */ - 0x091f,/* 0xbd */ - 0x0920,/* 0xbe */ - 0x0921,/* 0xbf */ - 0x0922,/* 0xc0 */ - 0x0923,/* 0xc1 */ - 0x0924,/* 0xc2 */ - 0x0925,/* 0xc3 */ - 0x0926,/* 0xc4 */ - 0x0927,/* 0xc5 */ - 0x0928,/* 0xc6 */ - 0x0929,/* 0xc7 */ - 0x092a,/* 0xc8 */ - 0x092b,/* 0xc9 */ - 0x092c,/* 0xca */ - 0x092d,/* 0xcb */ - 0x092e,/* 0xcc */ - 0x092f,/* 0xcd */ - 0x095f,/* 0xce */ - 0x0930,/* 0xcf */ - 0x0931,/* 0xd0 */ - 0x0932,/* 0xd1 */ - 0x0933,/* 0xd2 */ - 0x0934,/* 0xd3 */ - 0x0935,/* 0xd4 */ - 0x0936,/* 0xd5 */ - 0x0937,/* 0xd6 */ - 0x0938,/* 0xd7 */ - 0x0939,/* 0xd8 */ - 0x200D,/* 0xd9 */ - 0x093e,/* 0xda */ - 0x093f,/* 0xdb */ - 0x0940,/* 0xdc */ - 0x0941,/* 0xdd */ - 0x0942,/* 0xde */ - 0x0943,/* 0xdf */ - 0x0946,/* 0xe0 */ - 0x0947,/* 0xe1 */ - 0x0948,/* 0xe2 */ - 0x0945,/* 0xe3 */ - 0x094a,/* 0xe4 */ - 0x094b,/* 0xe5 */ - 0x094c,/* 0xe6 */ - 0x0949,/* 0xe7 */ - 0x094d,/* 0xe8 */ - 0x093c,/* 0xe9 */ - 0x0964,/* 0xea */ - 0xFFFF,/* 0xeb */ - 0xFFFF,/* 0xec */ - 0xFFFF,/* 0xed */ - 0xFFFF,/* 0xee */ - 0xFFFF,/* 0xef */ - 0xFFFF,/* 0xf0 */ - 0x0966,/* 0xf1 */ - 0x0967,/* 0xf2 */ - 0x0968,/* 0xf3 */ - 0x0969,/* 0xf4 */ - 0x096a,/* 0xf5 */ - 0x096b,/* 0xf6 */ - 0x096c,/* 0xf7 */ - 0x096d,/* 0xf8 */ - 0x096e,/* 0xf9 */ - 0x096f,/* 0xfa */ - 0xFFFF,/* 0xfb */ - 0xFFFF,/* 0xfc */ - 0xFFFF,/* 0xfd */ - 0xFFFF,/* 0xfe */ - 0xFFFF /* 0xff */ -}; - -static const uint16_t vowelSignESpecialCases[][2]={ - { 2 /*length of array*/ , 0 }, - { 0xA4 , 0x0904 }, -}; - -static const uint16_t nuktaSpecialCases[][2]={ - { 16 /*length of array*/ , 0 }, - { 0xA6 , 0x090c }, - { 0xEA , 0x093D }, - { 0xDF , 0x0944 }, - { 0xA1 , 0x0950 }, - { 0xb3 , 0x0958 }, - { 0xb4 , 0x0959 }, - { 0xb5 , 0x095a }, - { 0xba , 0x095b }, - { 0xbf , 0x095c }, - { 0xC0 , 0x095d }, - { 0xc9 , 0x095e }, - { 0xAA , 0x0960 }, - { 0xA7 , 0x0961 }, - { 0xDB , 0x0962 }, - { 0xDC , 0x0963 }, -}; - - -#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){ \ - int32_t offset = (int32_t)(source - args->source-1); \ - /* write the targetUniChar to target */ \ - if(target < targetLimit){ \ - if(targetByteUnit <= 0xFF){ \ - *(target)++ = (uint8_t)(targetByteUnit); \ - if(offsets){ \ - *(offsets++) = offset; \ - } \ - }else{ \ - if (targetByteUnit > 0xFFFF) { \ - *(target)++ = (uint8_t)(targetByteUnit>>16); \ - if (offsets) { \ - --offset; \ - *(offsets++) = offset; \ - } \ - } \ - if (!(target < targetLimit)) { \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t)(targetByteUnit >> 8); \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t)targetByteUnit; \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } else { \ - *(target)++ = (uint8_t)(targetByteUnit>>8); \ - if(offsets){ \ - *(offsets++) = offset; \ - } \ - if(target < targetLimit){ \ - *(target)++ = (uint8_t) targetByteUnit; \ - if(offsets){ \ - *(offsets++) = offset ; \ - } \ - }else{ \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ - (uint8_t) (targetByteUnit); \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ - } \ - } \ - }else{ \ - if (targetByteUnit & 0xFF0000) { \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit >>16); \ - } \ - if(targetByteUnit & 0xFF00){ \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit >>8); \ - } \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit); \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ -} - -/* Rules: - * Explicit Halant : - * + - * Soft Halant : - * + - */ -static void U_CALLCONV -UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( - UConverterFromUnicodeArgs * args, UErrorCode * err) { - const UChar *source = args->source; - const UChar *sourceLimit = args->sourceLimit; - unsigned char *target = (unsigned char *) args->target; - unsigned char *targetLimit = (unsigned char *) args->targetLimit; - int32_t* offsets = args->offsets; - uint32_t targetByteUnit = 0x0000; - UChar32 sourceChar = 0x0000; - UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ - UConverterDataISCII *converterData; - uint16_t newDelta=0; - uint16_t range = 0; - UBool deltaChanged = FALSE; - - if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - /* initialize data */ - converterData=(UConverterDataISCII*)args->converter->extraInfo; - newDelta=converterData->currentDeltaFromUnicode; - range = (uint16_t)(newDelta/DELTA); - - if ((sourceChar = args->converter->fromUChar32)!=0) { - goto getTrail; - } - - /*writing the char to the output stream */ - while (source < sourceLimit) { - /* Write the language code following LF only if LF is not the last character. */ - if (args->converter->fromUnicodeStatus == LF) { - targetByteUnit = ATR<<8; - targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; - args->converter->fromUnicodeStatus = 0x0000; - /* now append ATR and language code */ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } - - sourceChar = *source++; - tempContextFromUnicode = converterData->contextCharFromUnicode; - - targetByteUnit = missingCharMarker; - - /*check if input is in ASCII and C0 control codes range*/ - if (sourceChar <= ASCII_END) { - args->converter->fromUnicodeStatus = sourceChar; - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); - if (U_FAILURE(*err)) { - break; - } - continue; - } - switch (sourceChar) { - case ZWNJ: - /* contextChar has HALANT */ - if (converterData->contextCharFromUnicode) { - converterData->contextCharFromUnicode = 0x00; - targetByteUnit = ISCII_HALANT; - } else { - /* consume ZWNJ and continue */ - converterData->contextCharFromUnicode = 0x00; - continue; - } - break; - case ZWJ: - /* contextChar has HALANT */ - if (converterData->contextCharFromUnicode) { - targetByteUnit = ISCII_NUKTA; - } else { - targetByteUnit =ISCII_INV; - } - converterData->contextCharFromUnicode = 0x00; - break; - default: - /* is the sourceChar in the INDIC_RANGE? */ - if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { - /* Danda and Double Danda are valid in Northern scripts.. since Unicode - * does not include these codepoints in all Northern scrips we need to - * filter them out - */ - if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { - /* find out to which block the souceChar belongs*/ - range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); - newDelta =(uint16_t)(range*DELTA); - - /* Now are we in the same block as the previous? */ - if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { - converterData->currentDeltaFromUnicode = newDelta; - converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; - deltaChanged =TRUE; - converterData->isFirstBuffer=FALSE; - } - - if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { - if (sourceChar == PNJ_TIPPI) { - /* Make sure Tippi is converterd to Bindi. */ - sourceChar = PNJ_BINDI; - } else if (sourceChar == PNJ_ADHAK) { - /* This is for consonant cluster handling. */ - converterData->contextCharFromUnicode = PNJ_ADHAK; - } - - } - /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ - /* now subtract the new delta from sourceChar*/ - sourceChar -= converterData->currentDeltaFromUnicode; - } - - /* get the target byte unit */ - targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; - - /* is the code point valid in current script? */ - if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { - /* Vocallic RR is assigned in ISCII Telugu and Unicode */ - if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { - targetByteUnit=missingCharMarker; - } - } - - if (deltaChanged) { - /* we are in a script block which is different than - * previous sourceChar's script block write ATR and language codes - */ - uint32_t temp=0; - temp =(uint16_t)(ATR<<8); - temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); - /* reset */ - deltaChanged=FALSE; - /* now append ATR and language code */ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); - if (U_FAILURE(*err)) { - break; - } - } - - if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { - continue; - } - } - /* reset context char */ - converterData->contextCharFromUnicode = 0x00; - break; - } - if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { - /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ - /* reset context char */ - converterData->contextCharFromUnicode = 0x0000; - targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; - /* write targetByteUnit to target */ - WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } else if (targetByteUnit != missingCharMarker) { - if (targetByteUnit==ISCII_HALANT) { - converterData->contextCharFromUnicode = (UChar)targetByteUnit; - } - /* write targetByteUnit to target*/ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } else { - /* oops.. the code point is unassigned */ - /*check if the char is a First surrogate*/ - if (U16_IS_SURROGATE(sourceChar)) { - if (U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if (source < sourceLimit) { - /* test the following code unit */ - UChar trail= (*source); - if (U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - *err =U_INVALID_CHAR_FOUND; - /* convert this surrogate code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* no more input */ - *err = U_ZERO_ERROR; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* callback(unassigned) for a BMP code point */ - *err = U_INVALID_CHAR_FOUND; - } - - args->converter->fromUChar32=sourceChar; - break; - } - }/* end while(mySourceIndexsource = source; - args->target = (char*)target; -} - -static const uint16_t lookupTable[][2]={ - { ZERO, ZERO }, /*DEFALT*/ - { ZERO, ZERO }, /*ROMAN*/ - { DEVANAGARI, DEV_MASK }, - { BENGALI, BNG_MASK }, - { TAMIL, TML_MASK }, - { TELUGU, KND_MASK }, - { BENGALI, BNG_MASK }, - { ORIYA, ORI_MASK }, - { KANNADA, KND_MASK }, - { MALAYALAM, MLM_MASK }, - { GUJARATI, GJR_MASK }, - { GURMUKHI, PNJ_MASK } -}; - -#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\ - /* add offset to current Indic Block */ \ - if(targetUniChar>ASCII_END && \ - targetUniChar != ZWJ && \ - targetUniChar != ZWNJ && \ - targetUniChar != DANDA && \ - targetUniChar != DOUBLE_DANDA){ \ - \ - targetUniChar+=(uint16_t)(delta); \ - } \ - /* now write the targetUniChar */ \ - if(targettargetLimit){ \ - *(target)++ = (UChar)targetUniChar; \ - if(offsets){ \ - *(offsets)++ = (int32_t)(offset); \ - } \ - }else{ \ - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ - (UChar)targetUniChar; \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ -} - -#define GET_MAPPING(sourceChar,targetUniChar,data){ \ - targetUniChar = toUnicodeTable[(sourceChar)] ; \ - /* is the code point valid in current script? */ \ - if(sourceChar> ASCII_END && \ - (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ - /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ - if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ - targetUniChar!=VOCALLIC_RR){ \ - targetUniChar=missingCharMarker; \ - } \ - } \ -} - -/*********** - * Rules for ISCII to Unicode converter - * ISCII is stateful encoding. To convert ISCII bytes to Unicode, - * which has both precomposed and decomposed forms characters - * pre-context and post-context need to be considered. - * - * Post context - * i) ATR : Attribute code is used to declare the font and script switching. - * Currently we only switch scripts and font codes consumed without generating an error - * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, - * obsolete characters - * Pre context - * i) Halant: if preceeded by a halant then it is a explicit halant - * ii) Nukta : - * a) if preceeded by a halant then it is a soft halant - * b) if preceeded by specific consonants and the ligatures have pre-composed - * characters in Unicode then convert to pre-composed characters - * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda - * - */ - -static void U_CALLCONV -UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { - const char *source = ( char *) args->source; - UChar *target = args->target; - const char *sourceLimit = args->sourceLimit; - const UChar* targetLimit = args->targetLimit; - uint32_t targetUniChar = 0x0000; - uint8_t sourceChar = 0x0000; - UConverterDataISCII* data; - UChar32* toUnicodeStatus=NULL; - UChar32 tempTargetUniChar = 0x0000; - UChar* contextCharToUnicode= NULL; - UBool found; - int i; - int offset = 0; - - if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - data = (UConverterDataISCII*)(args->converter->extraInfo); - contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ - toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ - - while (U_SUCCESS(*err) && sourcecurrentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); - data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; - } else if (sourceChar==DEF) { - /* switch back to default */ - data->currentDeltaToUnicode = data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - } else { - if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { - /* these are display codes consume and continue */ - } else { - *err =U_ILLEGAL_CHAR_FOUND; - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - goto CALLBACK; - } - } - - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - - continue; - - } else if (*contextCharToUnicode==EXT) { - /* check if sourceChar is in 0xA1-0xEE range */ - if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { - /* We currently support only Anudatta and Devanagari abbreviation sign */ - if (sourceChar==0xBF || sourceChar == 0xB8) { - targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; - - /* find out if the mapping is valid in this state */ - if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { - *contextCharToUnicode= NO_CHAR_MARKER; - - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* write to target */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - - continue; - } - } - /* byte unit is unassigned */ - targetUniChar = missingCharMarker; - *err= U_INVALID_CHAR_FOUND; - } else { - /* only 0xA1 - 0xEE are legal after EXT char */ - *contextCharToUnicode= NO_CHAR_MARKER; - *err = U_ILLEGAL_CHAR_FOUND; - } - goto CALLBACK; - } else if (*contextCharToUnicode==ISCII_INV) { - if (sourceChar==ISCII_HALANT) { - targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ - } else { - targetUniChar = ZWJ; - } - - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* write to target */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - } - - /* look at the pre-context and perform special processing */ - switch (sourceChar) { - case ISCII_INV: - case EXT: - case ATR: - *contextCharToUnicode = (UChar)sourceChar; - - if (*toUnicodeStatus != missingCharMarker) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); - *toUnicodeStatus = missingCharMarker; - } - continue; - case ISCII_DANDA: - /* handle double danda*/ - if (*contextCharToUnicode== ISCII_DANDA) { - targetUniChar = DOUBLE_DANDA; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - } else { - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - } - break; - case ISCII_HALANT: - /* handle explicit halant */ - if (*contextCharToUnicode == ISCII_HALANT) { - targetUniChar = ZWNJ; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - } else { - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - } - break; - case 0x0A: - case 0x0D: - data->resetToDefaultToUnicode = TRUE; - GET_MAPPING(sourceChar,targetUniChar,data) - ; - *contextCharToUnicode = sourceChar; - break; - - case ISCII_VOWEL_SIGN_E: - i=1; - found=FALSE; - for (; icurrentMaskToUnicode) { - /*targetUniChar += data->currentDeltaToUnicode ;*/ - *contextCharToUnicode= NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - break; - } - } - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - break; - - case ISCII_NUKTA: - /* handle soft halant */ - if (*contextCharToUnicode == ISCII_HALANT) { - targetUniChar = ZWJ; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - break; - } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. - * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). - */ - targetUniChar = PNJ_RRA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - if (U_SUCCESS(*err)) { - targetUniChar = PNJ_SIGN_VIRAMA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - if (U_SUCCESS(*err)) { - targetUniChar = PNJ_HA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - } else { - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; - } - } else { - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; - } - *toUnicodeStatus = missingCharMarker; - data->contextCharToUnicode = NO_CHAR_MARKER; - continue; - } else { - /* try to handle + ISCII_NUKTA special mappings */ - i=1; - found =FALSE; - for (; icurrentMaskToUnicode) { - /*targetUniChar += data->currentDeltaToUnicode ;*/ - *contextCharToUnicode= NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - if (data->currentDeltaToUnicode == PNJ_DELTA) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - continue; - } - break; - } - /* else fall through to default */ - } - /* else fall through to default */ - U_FALLTHROUGH; - } - default:GET_MAPPING(sourceChar,targetUniChar,data) - ; - *contextCharToUnicode = sourceChar; - break; - } - - if (*toUnicodeStatus != missingCharMarker) { - /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ - if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && - (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) { - /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ - offset = (int)(source-args->source - 3); - tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ - *toUnicodeStatus = missingCharMarker; - continue; - } else { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. - * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. - */ - if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { - targetUniChar = PNJ_TIPPI - PNJ_DELTA; - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); - } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { - /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ - data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; - } else { - /* write the previously mapped codepoint */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); - } - } - *toUnicodeStatus = missingCharMarker; - } - - if (targetUniChar != missingCharMarker) { - /* now save the targetUniChar for delayed write */ - *toUnicodeStatus = (UChar) targetUniChar; - if (data->resetToDefaultToUnicode==TRUE) { - data->currentDeltaToUnicode = data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - data->resetToDefaultToUnicode=FALSE; - } - } else { - - /* we reach here only if targetUniChar == missingCharMarker - * so assign codes to reason and err - */ - *err = U_INVALID_CHAR_FOUND; -CALLBACK: - args->converter->toUBytes[0] = (uint8_t) sourceChar; - args->converter->toULength = 1; - break; - } - - } else { - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { - /* end of the input stream */ - UConverter *cnv = args->converter; - - if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { - /* set toUBytes[] */ - cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; - cnv->toULength = 1; - - /* avoid looping on truncated sequences */ - *contextCharToUnicode = NO_CHAR_MARKER; - } else { - cnv->toULength = 0; - } - - if (*toUnicodeStatus != missingCharMarker) { - /* output a remaining target character */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); - *toUnicodeStatus = missingCharMarker; - } - } - - args->target = target; - args->source = source; -} - -/* structure for SafeClone calculations */ -struct cloneISCIIStruct { - UConverter cnv; - UConverterDataISCII mydata; -}; - -static UConverter * U_CALLCONV -_ISCII_SafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneISCIIStruct * localClone; - int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); - - if (U_FAILURE(*status)) { - return 0; - } - - if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - return 0; - } - - localClone = (struct cloneISCIIStruct *)stackBuffer; - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); - localClone->cnv.extraInfo = &localClone->mydata; - localClone->cnv.isExtraLocal = TRUE; - - return &localClone->cnv; -} - -static void U_CALLCONV -_ISCIIGetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) -{ - (void)cnv; - (void)which; - (void)pErrorCode; - int32_t idx, script; - uint8_t mask; - - /* Since all ISCII versions allow switching to other ISCII - scripts, we add all roundtrippable characters to this set. */ - sa->addRange(sa->set, 0, ASCII_END); - for (script = DEVANAGARI; script <= MALAYALAM; script++) { - mask = (uint8_t)(lookupInitialData[script].maskEnum); - for (idx = 0; idx < DELTA; idx++) { - /* added check for TELUGU character */ - if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { - sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); - } - } - } - sa->add(sa->set, DANDA); - sa->add(sa->set, DOUBLE_DANDA); - sa->add(sa->set, ZWNJ); - sa->add(sa->set, ZWJ); -} -U_CDECL_END -static const UConverterImpl _ISCIIImpl={ - - UCNV_ISCII, - - NULL, - NULL, - - _ISCIIOpen, - _ISCIIClose, - _ISCIIReset, - - UConverter_toUnicode_ISCII_OFFSETS_LOGIC, - UConverter_toUnicode_ISCII_OFFSETS_LOGIC, - UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, - UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, - NULL, - - NULL, - _ISCIIgetName, - NULL, - _ISCII_SafeClone, - _ISCIIGetUnicodeSet, - NULL, - NULL -}; - -static const UConverterStaticData _ISCIIStaticData={ - sizeof(UConverterStaticData), - "ISCII", - 0, - UCNV_IBM, - UCNV_ISCII, - 1, - 4, - { 0x1a, 0, 0, 0 }, - 0x1, - FALSE, - FALSE, - 0x0, - 0x0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ - -}; - -const UConverterSharedData _ISCIIData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl); - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ucnvlat1.cpp b/deps/node/deps/icu-small/source/common/ucnvlat1.cpp deleted file mode 100644 index 15eeb5c5..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvlat1.cpp +++ /dev/null @@ -1,756 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnvlat1.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000feb07 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/uset.h" -#include "unicode/utf8.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "ustr_imp.h" - -/* control optimizations according to the platform */ -#define LATIN1_UNROLL_FROM_UNICODE 1 - -/* ISO 8859-1 --------------------------------------------------------------- */ - -/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ -U_CDECL_BEGIN -static void U_CALLCONV -_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source; - UChar *target; - int32_t targetCapacity, length; - int32_t *offsets; - - int32_t sourceIndex; - - /* set up the local pointers */ - source=(const uint8_t *)pArgs->source; - target=pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - sourceIndex=0; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); - if(length<=targetCapacity) { - targetCapacity=length; - } else { - /* target will be full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - length=targetCapacity; - } - - if(targetCapacity>=8) { - /* This loop is unrolled for speed and improved pipelining. */ - int32_t count, loops; - - loops=count=targetCapacity>>3; - length=targetCapacity&=0x7; - do { - target[0]=source[0]; - target[1]=source[1]; - target[2]=source[2]; - target[3]=source[3]; - target[4]=source[4]; - target[5]=source[5]; - target[6]=source[6]; - target[7]=source[7]; - target+=8; - source+=8; - } while(--count>0); - - if(offsets!=NULL) { - do { - offsets[0]=sourceIndex++; - offsets[1]=sourceIndex++; - offsets[2]=sourceIndex++; - offsets[3]=sourceIndex++; - offsets[4]=sourceIndex++; - offsets[5]=sourceIndex++; - offsets[6]=sourceIndex++; - offsets[7]=sourceIndex++; - offsets+=8; - } while(--loops>0); - } - } - - /* conversion loop */ - while(targetCapacity>0) { - *target++=*source++; - --targetCapacity; - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - - /* set offsets */ - if(offsets!=NULL) { - while(length>0) { - *offsets++=sourceIndex++; - --length; - } - pArgs->offsets=offsets; - } -} - -/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */ -static UChar32 U_CALLCONV -_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source=(const uint8_t *)pArgs->source; - if(source<(const uint8_t *)pArgs->sourceLimit) { - pArgs->source=(const char *)(source+1); - return *source; - } - - /* no output because of empty input */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; -} - -/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */ -static void U_CALLCONV -_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target, *oldTarget; - int32_t targetCapacity, length; - int32_t *offsets; - - UChar32 cp; - UChar c, max; - - int32_t sourceIndex; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=oldTarget=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - if(cnv->sharedData==&_Latin1Data) { - max=0xff; /* Latin-1 */ - } else { - max=0x7f; /* US-ASCII */ - } - - /* get the converter state from UConverter */ - cp=cnv->fromUChar32; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= cp==0 ? 0 : -1; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length0) { - goto getTrail; - } - -#if LATIN1_UNROLL_FROM_UNICODE - /* unroll the loop with the most common case */ - if(targetCapacity>=16) { - int32_t count, loops; - UChar u, oredChars; - - loops=count=targetCapacity>>4; - do { - oredChars=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - oredChars|=u=*source++; - *target++=(uint8_t)u; - - /* were all 16 entries really valid? */ - if(oredChars>max) { - /* no, return to the first of these 16 */ - source-=16; - target-=16; - break; - } - } while(--count>0); - count=loops-count; - targetCapacity-=16*count; - - if(offsets!=NULL) { - oldTarget+=16*count; - while(count>0) { - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - --count; - } - } - } -#endif - - /* conversion loop */ - c=0; - while(targetCapacity>0 && (c=*source++)<=max) { - /* convert the Unicode code point */ - *target++=(uint8_t)c; - --targetCapacity; - } - - if(c>max) { - cp=c; - if(!U_IS_SURROGATE(cp)) { - /* callback(unassigned) */ - } else if(U_IS_SURROGATE_LEAD(cp)) { -getTrail: - if(sourcefromUChar32=cp; - goto noMoreInput; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - } - - *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND; - cnv->fromUChar32=cp; - } -noMoreInput: - - /* set offsets since the start */ - if(offsets!=NULL) { - size_t count=target-oldTarget; - while(count>0) { - *offsets++=sourceIndex++; - --count; - } - } - - if(U_SUCCESS(*pErrorCode) && source=(uint8_t *)pArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */ -static void U_CALLCONV -ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - UConverter *utf8; - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - - UChar32 c; - uint8_t b, t1; - - /* set up the local pointers */ - utf8=pToUArgs->converter; - source=(uint8_t *)pToUArgs->source; - sourceLimit=(uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - /* get the converter state from the UTF-8 UConverter */ - if (utf8->toULength > 0) { - c=(UChar32)utf8->toUnicodeStatus; - } else { - c = 0; - } - if(c!=0 && source=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) { - ++source; - *target++=(uint8_t)(((c&3)<<6)|t1); - --targetCapacity; - - utf8->toUnicodeStatus=0; - utf8->toULength=0; - } else { - /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ - *pErrorCode=U_USING_DEFAULT_WARNING; - return; - } - } - - /* - * Make sure that the last byte sequence before sourceLimit is complete - * or runs into a lead byte. - * In the conversion loop compare source with sourceLimit only once - * per multi-byte character. - * For Latin-1, adjust sourceLimit only for 1 trail byte because - * the conversion loop handles at most 2-byte sequences. - */ - if(source0) { - b=*source++; - if(U8_IS_SINGLE(b)) { - /* convert ASCII */ - *target++=(uint8_t)b; - --targetCapacity; - } else if( /* handle U+0080..U+00FF inline */ - b>=0xc2 && b<=0xc3 && - (t1=(uint8_t)(*source-0x80)) <= 0x3f - ) { - ++source; - *target++=(uint8_t)(((b&3)<<6)|t1); - --targetCapacity; - } else { - /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ - pToUArgs->source=(char *)(source-1); - pFromUArgs->target=(char *)target; - *pErrorCode=U_USING_DEFAULT_WARNING; - return; - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* - * The sourceLimit may have been adjusted before the conversion loop - * to stop before a truncated sequence. - * If so, then collect the truncated sequence now. - * For Latin-1, there is at most exactly one lead byte because of the - * smaller sourceLimit adjustment logic. - */ - if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { - utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++; - utf8->toULength=1; - utf8->mode=U8_COUNT_BYTES(b); - } - - /* write back the updated pointers */ - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; -} - -static void U_CALLCONV -_Latin1GetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - (void)cnv; - (void)which; - (void)pErrorCode; - sa->addRange(sa->set, 0, 0xff); -} -U_CDECL_END - - -static const UConverterImpl _Latin1Impl={ - UCNV_LATIN_1, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - _Latin1ToUnicodeWithOffsets, - _Latin1ToUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _Latin1GetNextUChar, - - NULL, - NULL, - NULL, - NULL, - _Latin1GetUnicodeSet, - - NULL, - ucnv_Latin1FromUTF8 -}; - -static const UConverterStaticData _Latin1StaticData={ - sizeof(UConverterStaticData), - "ISO-8859-1", - 819, UCNV_IBM, UCNV_LATIN_1, 1, 1, - { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _Latin1Data= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl); - -/* US-ASCII ----------------------------------------------------------------- */ - -U_CDECL_BEGIN -/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ -static void U_CALLCONV -_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source, *sourceLimit; - UChar *target, *oldTarget; - int32_t targetCapacity, length; - int32_t *offsets; - - int32_t sourceIndex; - - uint8_t c; - - /* set up the local pointers */ - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=oldTarget=pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=0; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length=8) { - /* This loop is unrolled for speed and improved pipelining. */ - int32_t count, loops; - UChar oredChars; - - loops=count=targetCapacity>>3; - do { - oredChars=target[0]=source[0]; - oredChars|=target[1]=source[1]; - oredChars|=target[2]=source[2]; - oredChars|=target[3]=source[3]; - oredChars|=target[4]=source[4]; - oredChars|=target[5]=source[5]; - oredChars|=target[6]=source[6]; - oredChars|=target[7]=source[7]; - - /* were all 16 entries really valid? */ - if(oredChars>0x7f) { - /* no, return to the first of these 16 */ - break; - } - source+=8; - target+=8; - } while(--count>0); - count=loops-count; - targetCapacity-=count*8; - - if(offsets!=NULL) { - oldTarget+=count*8; - while(count>0) { - offsets[0]=sourceIndex++; - offsets[1]=sourceIndex++; - offsets[2]=sourceIndex++; - offsets[3]=sourceIndex++; - offsets[4]=sourceIndex++; - offsets[5]=sourceIndex++; - offsets[6]=sourceIndex++; - offsets[7]=sourceIndex++; - offsets+=8; - --count; - } - } - } - - /* conversion loop */ - c=0; - while(targetCapacity>0 && (c=*source++)<=0x7f) { - *target++=c; - --targetCapacity; - } - - if(c>0x7f) { - /* callback(illegal); copy the current bytes to toUBytes[] */ - UConverter *cnv=pArgs->converter; - cnv->toUBytes[0]=c; - cnv->toULength=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } else if(source=pArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* set offsets since the start */ - if(offsets!=NULL) { - size_t count=target-oldTarget; - while(count>0) { - *offsets++=sourceIndex++; - --count; - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */ -static UChar32 U_CALLCONV -_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - const uint8_t *source; - uint8_t b; - - source=(const uint8_t *)pArgs->source; - if(source<(const uint8_t *)pArgs->sourceLimit) { - b=*source++; - pArgs->source=(const char *)source; - if(b<=0x7f) { - return b; - } else { - UConverter *cnv=pArgs->converter; - cnv->toUBytes[0]=b; - cnv->toULength=1; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return 0xffff; - } - } - - /* no output because of empty input */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; -} - -/* "Convert" UTF-8 to US-ASCII: Validate and copy. */ -static void U_CALLCONV -ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity, length; - - uint8_t c; - - if(pToUArgs->converter->toULength > 0) { - /* no handling of partial UTF-8 characters here, fall back to pivoting */ - *pErrorCode=U_USING_DEFAULT_WARNING; - return; - } - - /* set up the local pointers */ - source=(const uint8_t *)pToUArgs->source; - sourceLimit=(const uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - /* - * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length=16) { - int32_t count, loops; - uint8_t oredChars; - - loops=count=targetCapacity>>4; - do { - oredChars=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - oredChars|=*target++=*source++; - - /* were all 16 entries really valid? */ - if(oredChars>0x7f) { - /* no, return to the first of these 16 */ - source-=16; - target-=16; - break; - } - } while(--count>0); - count=loops-count; - targetCapacity-=16*count; - } - - /* conversion loop */ - c=0; - while(targetCapacity>0 && (c=*source)<=0x7f) { - ++source; - *target++=c; - --targetCapacity; - } - - if(c>0x7f) { - /* non-ASCII character, handle in standard converter */ - *pErrorCode=U_USING_DEFAULT_WARNING; - } else if(source=(const uint8_t *)pFromUArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* write back the updated pointers */ - pToUArgs->source=(const char *)source; - pFromUArgs->target=(char *)target; -} - -static void U_CALLCONV -_ASCIIGetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - (void)cnv; - (void)which; - (void)pErrorCode; - sa->addRange(sa->set, 0, 0x7f); -} -U_CDECL_END - -static const UConverterImpl _ASCIIImpl={ - UCNV_US_ASCII, - - NULL, - NULL, - - NULL, - NULL, - NULL, - - _ASCIIToUnicodeWithOffsets, - _ASCIIToUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _Latin1FromUnicodeWithOffsets, - _ASCIIGetNextUChar, - - NULL, - NULL, - NULL, - NULL, - _ASCIIGetUnicodeSet, - - NULL, - ucnv_ASCIIFromUTF8 -}; - -static const UConverterStaticData _ASCIIStaticData={ - sizeof(UConverterStaticData), - "US-ASCII", - 367, UCNV_IBM, UCNV_US_ASCII, 1, 1, - { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _ASCIIData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnvmbcs.cpp b/deps/node/deps/icu-small/source/common/ucnvmbcs.cpp deleted file mode 100644 index e1248a7b..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvmbcs.cpp +++ /dev/null @@ -1,5723 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucnvmbcs.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000jul03 -* created by: Markus W. Scherer -* -* The current code in this file replaces the previous implementation -* of conversion code from multi-byte codepages to Unicode and back. -* This implementation supports the following: -* - legacy variable-length codepages with up to 4 bytes per character -* - all Unicode code points (up to 0x10ffff) -* - efficient distinction of unassigned vs. illegal byte sequences -* - it is possible in fromUnicode() to directly deal with simple -* stateful encodings (used for EBCDIC_STATEFUL) -* - it is possible to convert Unicode code points -* to a single zero byte (but not as a fallback except for SBCS) -* -* Remaining limitations in fromUnicode: -* - byte sequences must not have leading zero bytes -* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte -* - limitation to up to 4 bytes per character -* -* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these -* limitations and adds m:n character mappings and other features. -* See ucnv_ext.h for details. -* -* Change history: -* -* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U, -* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2 -* macros to ucnvmbcs.h file -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/udata.h" -#include "unicode/uset.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "ucnv_bld.h" -#include "ucnvmbcs.h" -#include "ucnv_ext.h" -#include "ucnv_cnv.h" -#include "cmemory.h" -#include "cstring.h" -#include "umutex.h" -#include "ustr_imp.h" - -/* control optimizations according to the platform */ -#define MBCS_UNROLL_SINGLE_TO_BMP 1 -#define MBCS_UNROLL_SINGLE_FROM_BMP 0 - -/* - * _MBCSHeader versions 5.3 & 4.3 - * (Note that the _MBCSHeader version is in addition to the converter formatVersion.) - * - * This version is optional. Version 5 is used for incompatible data format changes. - * makeconv will continue to generate version 4 files if possible. - * - * Changes from version 4: - * - * The main difference is an additional _MBCSHeader field with - * - the length (number of uint32_t) of the _MBCSHeader - * - flags for further incompatible data format changes - * - flags for further, backward compatible data format changes - * - * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from - * the file and needs to be reconstituted at load time. - * This requires a utf8Friendly format with an additional mbcsIndex table for fast - * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar. - * (For details about these structures see below, and see ucnvmbcs.h.) - * - * utf8Friendly also implies that the fromUnicode mappings are stored in ascending order - * of the Unicode code points. (This requires that the .ucm file has the |0 etc. - * precision markers for all mappings.) - * - * All fallbacks have been moved to the extension table, leaving only roundtrips in the - * omitted data that can be reconstituted from the toUnicode data. - * - * Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted. - * With only roundtrip mappings in the base fromUnicode data, this part is fully - * redundant with the mbcsIndex and will be reconstituted from that (also using the - * stage 1 table which contains the information about how stage 2 was compacted). - * - * The rest of the stage 2 table, the part for code points above maxFastUChar, - * is stored in the file and will be appended to the reconstituted part. - * - * The entire fromUBytes array is omitted from the file and will be reconstitued. - * This is done by enumerating all toUnicode roundtrip mappings, performing - * each mapping (using the stage 1 and reconstituted stage 2 tables) and - * writing instead of reading the byte values. - * - * _MBCSHeader version 4.3 - * - * Change from version 4.2: - * - Optional utf8Friendly data structures, with 64-entry stage 3 block - * allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS - * files which can be used instead of stages 1 & 2. - * Faster lookups for roundtrips from most commonly used characters, - * and lookups from UTF-8 byte sequences with a natural bit distribution. - * See ucnvmbcs.h for more details. - * - * Change from version 4.1: - * - Added an optional extension table structure at the end of the .cnv file. - * It is present if the upper bits of the header flags field contains a non-zero - * byte offset to it. - * Files that contain only a conversion table and no base table - * use the special outputType MBCS_OUTPUT_EXT_ONLY. - * These contain the base table name between the MBCS header and the extension - * data. - * - * Change from version 4.0: - * - Replace header.reserved with header.fromUBytesLength so that all - * fields in the data have length. - * - * Changes from version 3 (for performance improvements): - * - new bit distribution for state table entries - * - reordered action codes - * - new data structure for single-byte fromUnicode - * + stage 2 only contains indexes - * + stage 3 stores 16 bits per character with classification bits 15..8 - * - no multiplier for stage 1 entries - * - stage 2 for non-single-byte codepages contains the index and the flags in - * one 32-bit value - * - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers - * - * For more details about old versions of the MBCS data structure, see - * the corresponding versions of this file. - * - * Converting stateless codepage data ---------------------------------------*** - * (or codepage data with simple states) to Unicode. - * - * Data structure and algorithm for converting from complex legacy codepages - * to Unicode. (Designed before 2000-may-22.) - * - * The basic idea is that the structure of legacy codepages can be described - * with state tables. - * When reading a byte stream, each input byte causes a state transition. - * Some transitions result in the output of a code point, some result in - * "unassigned" or "illegal" output. - * This is used here for character conversion. - * - * The data structure begins with a state table consisting of a row - * per state, with 256 entries (columns) per row for each possible input - * byte value. - * Each entry is 32 bits wide, with two formats distinguished by - * the sign bit (bit 31): - * - * One format for transitional entries (bit 31 not set) for non-final bytes, and - * one format for final entries (bit 31 set). - * Both formats contain the number of the next state in the same bit - * positions. - * State 0 is the initial state. - * - * Most of the time, the offset values of subsequent states are added - * up to a scalar value. This value will eventually be the index of - * the Unicode code point in a table that follows the state table. - * The effect is that the code points for final state table rows - * are contiguous. The code points of final state rows follow each other - * in the order of the references to those final states by previous - * states, etc. - * - * For some terminal states, the offset is itself the output Unicode - * code point (16 bits for a BMP code point or 20 bits for a supplementary - * code point (stored as code point minus 0x10000 so that 20 bits are enough). - * For others, the code point in the Unicode table is stored with either - * one or two code units: one for BMP code points, two for a pair of - * surrogates. - * All code points for a final state entry take up the same number of code - * units, regardless of whether they all actually _use_ the same number - * of code units. This is necessary for simple array access. - * - * An additional feature comes in with what in ICU is called "fallback" - * mappings: - * - * In addition to round-trippable, precise, 1:1 mappings, there are often - * mappings defined between similar, though not the same, characters. - * Typically, such mappings occur only in fromUnicode mapping tables because - * Unicode has a superset repertoire of most other codepages. However, it - * is possible to provide such mappings in the toUnicode tables, too. - * In this case, the fallback mappings are partly integrated into the - * general state tables because the structure of the encoding includes their - * byte sequences. - * For final entries in an initial state, fallback mappings are stored in - * the entry itself like with roundtrip mappings. - * For other final entries, they are stored in the code units table if - * the entry is for a pair of code units. - * For single-unit results in the code units table, there is no space to - * alternatively hold a fallback mapping; in this case, the code unit - * is stored as U+fffe (unassigned), and the fallback mapping needs to - * be looked up by the scalar offset value in a separate table. - * - * "Unassigned" state entries really mean "structurally unassigned", - * i.e., such a byte sequence will never have a mapping result. - * - * The interpretation of the bits in each entry is as follows: - * - * Bit 31 not set, not a terminal entry ("transitional"): - * 30..24 next state - * 23..0 offset delta, to be added up - * - * Bit 31 set, terminal ("final") entry: - * 30..24 next state (regardless of action code) - * 23..20 action code: - * action codes 0 and 1 result in precise-mapping Unicode code points - * 0 valid byte sequence - * 19..16 not used, 0 - * 15..0 16-bit Unicode BMP code point - * never U+fffe or U+ffff - * 1 valid byte sequence - * 19..0 20-bit Unicode supplementary code point - * never U+fffe or U+ffff - * - * action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points - * 2 valid byte sequence (fallback) - * 19..16 not used, 0 - * 15..0 16-bit Unicode BMP code point as fallback result - * 3 valid byte sequence (fallback) - * 19..0 20-bit Unicode supplementary code point as fallback result - * - * action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results - * depending on the code units they result in - * 4 valid byte sequence - * 19..9 not used, 0 - * 8..0 final offset delta - * pointing to one 16-bit code unit which may be - * fffe unassigned -- look for a fallback for this offset - * ffff illegal - * 5 valid byte sequence - * 19..9 not used, 0 - * 8..0 final offset delta - * pointing to two 16-bit code units - * (typically UTF-16 surrogates) - * the result depends on the first code unit as follows: - * 0000..d7ff roundtrip BMP code point (1st alone) - * d800..dbff roundtrip surrogate pair (1st, 2nd) - * dc00..dfff fallback surrogate pair (1st-400, 2nd) - * e000 roundtrip BMP code point (2nd alone) - * e001 fallback BMP code point (2nd alone) - * fffe unassigned - * ffff illegal - * (the final offset deltas are at most 255 * 2, - * times 2 because of storing code unit pairs) - * - * 6 unassigned byte sequence - * 19..16 not used, 0 - * 15..0 16-bit Unicode BMP code point U+fffe (new with version 2) - * this does not contain a final offset delta because the main - * purpose of this action code is to save scalar offset values; - * therefore, fallback values cannot be assigned to byte - * sequences that result in this action code - * 7 illegal byte sequence - * 19..16 not used, 0 - * 15..0 16-bit Unicode BMP code point U+ffff (new with version 2) - * 8 state change only - * 19..0 not used, 0 - * useful for state changes in simple stateful encodings, - * at Shift-In/Shift-Out codes - * - * - * 9..15 reserved for future use - * current implementations will only perform a state change - * and ignore bits 19..0 - * - * An encoding with contiguous ranges of unassigned byte sequences, like - * Shift-JIS and especially EUC-TW, can be stored efficiently by having - * at least two states for the trail bytes: - * One trail byte state that results in code points, and one that only - * has "unassigned" and "illegal" terminal states. - * - * Note: partly by accident, this data structure supports simple stateful - * encodings without any additional logic. - * Currently, only simple Shift-In/Shift-Out schemes are handled with - * appropriate state tables (especially EBCDIC_STATEFUL!). - * - * MBCS version 2 added: - * unassigned and illegal action codes have U+fffe and U+ffff - * instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP() - * - * Converting from Unicode to codepage bytes --------------------------------*** - * - * The conversion data structure for fromUnicode is designed for the known - * structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to - * a sequence of 1..4 bytes, in addition to a flag that indicates if there is - * a roundtrip mapping. - * - * The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3 - * like in the character properties table. - * The beginning of the trie is at offsetFromUTable, the beginning of stage 3 - * with the resulting bytes is at offsetFromUBytes. - * - * Beginning with version 4, single-byte codepages have a significantly different - * trie compared to other codepages. - * In all cases, the entry in stage 1 is directly the index of the block of - * 64 entries in stage 2. - * - * Single-byte lookup: - * - * Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3. - * Stage 3 contains one 16-bit word per result: - * Bits 15..8 indicate the kind of result: - * f roundtrip result - * c fallback result from private-use code point - * 8 fallback result from other code points - * 0 unassigned - * Bits 7..0 contain the codepage byte. A zero byte is always possible. - * - * In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly - * file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup - * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. - * ASCII code points can be looked up with a linear array access into stage 3. - * See maxFastUChar and other details in ucnvmbcs.h. - * - * Multi-byte lookup: - * - * Stage 2 contains a 32-bit word for each 16-block in stage 3: - * Bits 31..16 contain flags for which stage 3 entries contain roundtrip results - * test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) - * If this test is false, then a non-zero result will be interpreted as - * a fallback mapping. - * Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char) - * - * Stage 3 contains 2, 3, or 4 bytes per result. - * 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness, - * while 3 bytes are stored as bytes in big-endian order. - * Leading zero bytes are ignored, and the number of bytes is counted. - * A zero byte mapping result is possible as a roundtrip result. - * For some output types, the actual result is processed from this; - * see ucnv_MBCSFromUnicodeWithOffsets(). - * - * Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10), - * or (version 3 and up) for BMP-only codepages, it contains 64 entries. - * - * In version 4.3, a utf8Friendly file contains an mbcsIndex table. - * For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup - * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. - * ASCII code points can be looked up with a linear array access into stage 3. - * See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h. - * - * In version 3, stage 2 blocks may overlap by multiples of the multiplier - * for compaction. - * In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks) - * may overlap by any number of entries. - * - * MBCS version 2 added: - * the converter checks for known output types, which allows - * adding new ones without crashing an unaware converter - */ - -/** - * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from - * consecutive sequences of bytes, starting from the one encoded in value, - * to Unicode code points. (Multiple mappings to reduce per-function call overhead.) - * Does not currently support m:n mappings or reverse fallbacks. - * This function will not be called for sequences of bytes with leading zeros. - * - * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode() - * @param value contains 1..4 bytes of the first byte sequence, right-aligned - * @param codePoints resulting Unicode code points, or negative if a byte sequence does - * not map to anything - * @return TRUE to continue enumeration, FALSE to stop - */ -typedef UBool U_CALLCONV -UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]); - -static void U_CALLCONV -ucnv_MBCSLoad(UConverterSharedData *sharedData, - UConverterLoadArgs *pArgs, - const uint8_t *raw, - UErrorCode *pErrorCode); - -static void U_CALLCONV -ucnv_MBCSUnload(UConverterSharedData *sharedData); - -static void U_CALLCONV -ucnv_MBCSOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode); - -static UChar32 U_CALLCONV -ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode); - -static void U_CALLCONV -ucnv_MBCSGetStarters(const UConverter* cnv, - UBool starters[256], - UErrorCode *pErrorCode); - -U_CDECL_BEGIN -static const char* U_CALLCONV -ucnv_MBCSGetName(const UConverter *cnv); -U_CDECL_END - -static void U_CALLCONV -ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, - int32_t offsetIndex, - UErrorCode *pErrorCode); - -static UChar32 U_CALLCONV -ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode); - -static void U_CALLCONV -ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode); - -static void U_CALLCONV -ucnv_MBCSGetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode); - -static void U_CALLCONV -ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode); - -static const UConverterImpl _SBCSUTF8Impl={ - UCNV_MBCS, - - ucnv_MBCSLoad, - ucnv_MBCSUnload, - - ucnv_MBCSOpen, - NULL, - NULL, - - ucnv_MBCSToUnicodeWithOffsets, - ucnv_MBCSToUnicodeWithOffsets, - ucnv_MBCSFromUnicodeWithOffsets, - ucnv_MBCSFromUnicodeWithOffsets, - ucnv_MBCSGetNextUChar, - - ucnv_MBCSGetStarters, - ucnv_MBCSGetName, - ucnv_MBCSWriteSub, - NULL, - ucnv_MBCSGetUnicodeSet, - - NULL, - ucnv_SBCSFromUTF8 -}; - -static const UConverterImpl _DBCSUTF8Impl={ - UCNV_MBCS, - - ucnv_MBCSLoad, - ucnv_MBCSUnload, - - ucnv_MBCSOpen, - NULL, - NULL, - - ucnv_MBCSToUnicodeWithOffsets, - ucnv_MBCSToUnicodeWithOffsets, - ucnv_MBCSFromUnicodeWithOffsets, - ucnv_MBCSFromUnicodeWithOffsets, - ucnv_MBCSGetNextUChar, - - ucnv_MBCSGetStarters, - ucnv_MBCSGetName, - ucnv_MBCSWriteSub, - NULL, - ucnv_MBCSGetUnicodeSet, - - NULL, - ucnv_DBCSFromUTF8 -}; - -static const UConverterImpl _MBCSImpl={ - UCNV_MBCS, - - ucnv_MBCSLoad, - ucnv_MBCSUnload, - - ucnv_MBCSOpen, - NULL, - NULL, - - ucnv_MBCSToUnicodeWithOffsets, - ucnv_MBCSToUnicodeWithOffsets, - ucnv_MBCSFromUnicodeWithOffsets, - ucnv_MBCSFromUnicodeWithOffsets, - ucnv_MBCSGetNextUChar, - - ucnv_MBCSGetStarters, - ucnv_MBCSGetName, - ucnv_MBCSWriteSub, - NULL, - ucnv_MBCSGetUnicodeSet, - NULL, - NULL -}; - -/* Static data is in tools/makeconv/ucnvstat.c for data-based - * converters. Be sure to update it as well. - */ - -const UConverterSharedData _MBCSData={ - sizeof(UConverterSharedData), 1, - NULL, NULL, FALSE, TRUE, &_MBCSImpl, - 0, UCNV_MBCS_TABLE_INITIALIZER -}; - - -/* GB 18030 data ------------------------------------------------------------ */ - -/* helper macros for linear values for GB 18030 four-byte sequences */ -#define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d)) - -#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30) - -#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff) - -/* - * Some ranges of GB 18030 where both the Unicode code points and the - * GB four-byte sequences are contiguous and are handled algorithmically by - * the special callback functions below. - * The values are start & end of Unicode & GB codes. - * - * Note that single surrogates are not mapped by GB 18030 - * as of the re-released mapping tables from 2000-nov-30. - */ -static const uint32_t -gb18030Ranges[14][4]={ - {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)}, - {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)}, - {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)}, - {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)}, - {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)}, - {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)}, - {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)}, - {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)}, - {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)}, - {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)}, - {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)}, - {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)}, - {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)}, - {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)} -}; - -/* bit flag for UConverter.options indicating GB 18030 special handling */ -#define _MBCS_OPTION_GB18030 0x8000 - -/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */ -#define _MBCS_OPTION_KEIS 0x01000 -#define _MBCS_OPTION_JEF 0x02000 -#define _MBCS_OPTION_JIPS 0x04000 - -#define KEIS_SO_CHAR_1 0x0A -#define KEIS_SO_CHAR_2 0x42 -#define KEIS_SI_CHAR_1 0x0A -#define KEIS_SI_CHAR_2 0x41 - -#define JEF_SO_CHAR 0x28 -#define JEF_SI_CHAR 0x29 - -#define JIPS_SO_CHAR_1 0x1A -#define JIPS_SO_CHAR_2 0x70 -#define JIPS_SI_CHAR_1 0x1A -#define JIPS_SI_CHAR_2 0x71 - -enum SISO_Option { - SI, - SO -}; -typedef enum SISO_Option SISO_Option; - -static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) { - int32_t SISOLength = 0; - - switch (option) { - case SI: - if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { - value[0] = KEIS_SI_CHAR_1; - value[1] = KEIS_SI_CHAR_2; - SISOLength = 2; - } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { - value[0] = JEF_SI_CHAR; - SISOLength = 1; - } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { - value[0] = JIPS_SI_CHAR_1; - value[1] = JIPS_SI_CHAR_2; - SISOLength = 2; - } else { - value[0] = UCNV_SI; - SISOLength = 1; - } - break; - case SO: - if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { - value[0] = KEIS_SO_CHAR_1; - value[1] = KEIS_SO_CHAR_2; - SISOLength = 2; - } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { - value[0] = JEF_SO_CHAR; - SISOLength = 1; - } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { - value[0] = JIPS_SO_CHAR_1; - value[1] = JIPS_SO_CHAR_2; - SISOLength = 2; - } else { - value[0] = UCNV_SO; - SISOLength = 1; - } - break; - default: - /* Should never happen. */ - break; - } - - return SISOLength; -} - -/* Miscellaneous ------------------------------------------------------------ */ - -/* similar to ucnv_MBCSGetNextUChar() but recursive */ -static UBool -enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[], - int32_t state, uint32_t offset, - uint32_t value, - UConverterEnumToUCallback *callback, const void *context, - UErrorCode *pErrorCode) { - UChar32 codePoints[32]; - const int32_t *row; - const uint16_t *unicodeCodeUnits; - UChar32 anyCodePoints; - int32_t b, limit; - - row=mbcsTable->stateTable[state]; - unicodeCodeUnits=mbcsTable->unicodeCodeUnits; - - value<<=8; - anyCodePoints=-1; /* becomes non-negative if there is a mapping */ - - b=(stateProps[state]&0x38)<<2; - if(b==0 && stateProps[state]>=0x40) { - /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */ - codePoints[0]=U_SENTINEL; - b=1; - } - limit=((stateProps[state]&7)+1)<<5; - while(b=0) { - /* recurse to a state with non-ignorable actions */ - if(!enumToU( - mbcsTable, stateProps, nextState, - offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), - value|(uint32_t)b, - callback, context, - pErrorCode)) { - return FALSE; - } - } - codePoints[b&0x1f]=U_SENTINEL; - } else { - UChar32 c; - int32_t action; - - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=MBCS_ENTRY_FINAL_ACTION(entry); - if(action==MBCS_STATE_VALID_DIRECT_16) { - /* output BMP code point */ - c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - } else if(action==MBCS_STATE_VALID_16) { - int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[finalOffset]; - if(c<0xfffe) { - /* output BMP code point */ - } else { - c=U_SENTINEL; - } - } else if(action==MBCS_STATE_VALID_16_PAIR) { - int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[finalOffset++]; - if(c<0xd800) { - /* output BMP code point below 0xd800 */ - } else if(c<=0xdbff) { - /* output roundtrip or fallback supplementary code point */ - c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00); - } else if(c==0xe000) { - /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ - c=unicodeCodeUnits[finalOffset]; - } else { - c=U_SENTINEL; - } - } else if(action==MBCS_STATE_VALID_DIRECT_20) { - /* output supplementary code point */ - c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); - } else { - c=U_SENTINEL; - } - - codePoints[b&0x1f]=c; - anyCodePoints&=c; - } - if(((++b)&0x1f)==0) { - if(anyCodePoints>=0) { - if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) { - return FALSE; - } - anyCodePoints=-1; - } - } - } - return TRUE; -} - -/* - * Only called if stateProps[state]==-1. - * A recursive call may do stateProps[state]|=0x40 if this state is the target of an - * MBCS_STATE_CHANGE_ONLY. - */ -static int8_t -getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) { - const int32_t *row; - int32_t min, max, entry, nextState; - - row=stateTable[state]; - stateProps[state]=0; - - /* find first non-ignorable state */ - for(min=0;; ++min) { - entry=row[min]; - nextState=MBCS_ENTRY_STATE(entry); - if(stateProps[nextState]==-1) { - getStateProp(stateTable, stateProps, nextState); - } - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - if(stateProps[nextState]>=0) { - break; - } - } else if(MBCS_ENTRY_FINAL_ACTION(entry)>5)<<3); - - /* find last non-ignorable state */ - for(max=0xff; min=0) { - break; - } - } else if(MBCS_ENTRY_FINAL_ACTION(entry)>5); - - /* recurse further and collect direct-state information */ - while(min<=max) { - entry=row[min]; - nextState=MBCS_ENTRY_STATE(entry); - if(stateProps[nextState]==-1) { - getStateProp(stateTable, stateProps, nextState); - } - if(MBCS_ENTRY_IS_FINAL(entry)) { - stateProps[nextState]|=0x40; - if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) { - stateProps[state]|=0x40; - } - } - ++min; - } - return stateProps[state]; -} - -/* - * Internal function enumerating the toUnicode data of an MBCS converter. - * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U - * table, but could also be used for a future ucnv_getUnicodeSet() option - * that includes reverse fallbacks (after updating this function's implementation). - * Currently only handles roundtrip mappings. - * Does not currently handle extensions. - */ -static void -ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable, - UConverterEnumToUCallback *callback, const void *context, - UErrorCode *pErrorCode) { - /* - * Properties for each state, to speed up the enumeration. - * Ignorable actions are unassigned/illegal/state-change-only: - * They do not lead to mappings. - * - * Bits 7..6: - * 1 direct/initial state (stateful converters have multiple) - * 0 non-initial state with transitions or with non-ignorable result actions - * -1 final state with only ignorable actions - * - * Bits 5..3: - * The lowest byte value with non-ignorable actions is - * value<<5 (rounded down). - * - * Bits 2..0: - * The highest byte value with non-ignorable actions is - * (value<<5)&0x1f (rounded up). - */ - int8_t stateProps[MBCS_MAX_STATE_COUNT]; - int32_t state; - - uprv_memset(stateProps, -1, sizeof(stateProps)); - - /* recurse from state 0 and set all stateProps */ - getStateProp(mbcsTable->stateTable, stateProps, 0); - - for(state=0; statecountStates; ++state) { - /*if(stateProps[state]==-1) { - printf("unused/unreachable %d\n", state); - }*/ - if(stateProps[state]>=0x40) { - /* start from each direct state */ - enumToU( - mbcsTable, stateProps, state, 0, 0, - callback, context, - pErrorCode); - } - } -} - -U_CFUNC void -ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, - const USetAdder *sa, - UConverterUnicodeSet which, - UConverterSetFilter filter, - UErrorCode *pErrorCode) { - const UConverterMBCSTable *mbcsTable; - const uint16_t *table; - - uint32_t st3; - uint16_t st1, maxStage1, st2; - - UChar32 c; - - /* enumerate the from-Unicode trie table */ - mbcsTable=&sharedData->mbcs; - table=mbcsTable->fromUnicodeTable; - if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { - maxStage1=0x440; - } else { - maxStage1=0x40; - } - - c=0; /* keep track of the current code point while enumerating */ - - if(mbcsTable->outputType==MBCS_OUTPUT_1) { - const uint16_t *stage2, *stage3, *results; - uint16_t minValue; - - results=(const uint16_t *)mbcsTable->fromUnicodeBytes; - - /* - * Set a threshold variable for selecting which mappings to use. - * See ucnv_MBCSSingleFromBMPWithOffsets() and - * MBCS_SINGLE_RESULT_FROM_U() for details. - */ - if(which==UCNV_ROUNDTRIP_SET) { - /* use only roundtrips */ - minValue=0xf00; - } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { - /* use all roundtrip and fallback results */ - minValue=0x800; - } - - for(st1=0; st1maxStage1) { - stage2=table+st2; - for(st2=0; st2<64; ++st2) { - if((st3=stage2[st2])!=0) { - /* read the stage 3 block */ - stage3=results+st3; - - do { - if(*stage3++>=minValue) { - sa->add(sa->set, c); - } - } while((++c&0xf)!=0); - } else { - c+=16; /* empty stage 3 block */ - } - } - } else { - c+=1024; /* empty stage 2 block */ - } - } - } else { - const uint32_t *stage2; - const uint8_t *stage3, *bytes; - uint32_t st3Multiplier; - uint32_t value; - UBool useFallback; - - bytes=mbcsTable->fromUnicodeBytes; - - useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); - - switch(mbcsTable->outputType) { - case MBCS_OUTPUT_3: - case MBCS_OUTPUT_4_EUC: - st3Multiplier=3; - break; - case MBCS_OUTPUT_4: - st3Multiplier=4; - break; - default: - st3Multiplier=2; - break; - } - - for(st1=0; st1(maxStage1>>1)) { - stage2=(const uint32_t *)table+st2; - for(st2=0; st2<64; ++st2) { - if((st3=stage2[st2])!=0) { - /* read the stage 3 block */ - stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3; - - /* get the roundtrip flags for the stage 3 block */ - st3>>=16; - - /* - * Add code points for which the roundtrip flag is set, - * or which map to non-zero bytes if we use fallbacks. - * See ucnv_MBCSFromUnicodeWithOffsets() for details. - */ - switch(filter) { - case UCNV_SET_FILTER_NONE: - do { - if(st3&1) { - sa->add(sa->set, c); - stage3+=st3Multiplier; - } else if(useFallback) { - uint8_t b=0; - switch(st3Multiplier) { - case 4: - b|=*stage3++; - U_FALLTHROUGH; - case 3: - b|=*stage3++; - U_FALLTHROUGH; - case 2: - b|=stage3[0]|stage3[1]; - stage3+=2; - U_FALLTHROUGH; - default: - break; - } - if(b!=0) { - sa->add(sa->set, c); - } - } - st3>>=1; - } while((++c&0xf)!=0); - break; - case UCNV_SET_FILTER_DBCS_ONLY: - /* Ignore single-byte results (<0x100). */ - do { - if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) { - sa->add(sa->set, c); - } - st3>>=1; - stage3+=2; /* +=st3Multiplier */ - } while((++c&0xf)!=0); - break; - case UCNV_SET_FILTER_2022_CN: - /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */ - do { - if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) { - sa->add(sa->set, c); - } - st3>>=1; - stage3+=3; /* +=st3Multiplier */ - } while((++c&0xf)!=0); - break; - case UCNV_SET_FILTER_SJIS: - /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */ - do { - if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) { - sa->add(sa->set, c); - } - st3>>=1; - stage3+=2; /* +=st3Multiplier */ - } while((++c&0xf)!=0); - break; - case UCNV_SET_FILTER_GR94DBCS: - /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */ - do { - if( ((st3&1)!=0 || useFallback) && - (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) && - (uint8_t)(value-0xa1)<=(0xfe - 0xa1) - ) { - sa->add(sa->set, c); - } - st3>>=1; - stage3+=2; /* +=st3Multiplier */ - } while((++c&0xf)!=0); - break; - case UCNV_SET_FILTER_HZ: - /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */ - do { - if( ((st3&1)!=0 || useFallback) && - (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) && - (uint8_t)(value-0xa1)<=(0xfe - 0xa1) - ) { - sa->add(sa->set, c); - } - st3>>=1; - stage3+=2; /* +=st3Multiplier */ - } while((++c&0xf)!=0); - break; - default: - *pErrorCode=U_INTERNAL_PROGRAM_ERROR; - return; - } - } else { - c+=16; /* empty stage 3 block */ - } - } - } else { - c+=1024; /* empty stage 2 block */ - } - } - } - - ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode); -} - -U_CFUNC void -ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - ucnv_MBCSGetFilteredUnicodeSetForUnicode( - sharedData, sa, which, - sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? - UCNV_SET_FILTER_DBCS_ONLY : - UCNV_SET_FILTER_NONE, - pErrorCode); -} - -static void U_CALLCONV -ucnv_MBCSGetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) { - if(cnv->options&_MBCS_OPTION_GB18030) { - sa->addRange(sa->set, 0, 0xd7ff); - sa->addRange(sa->set, 0xe000, 0x10ffff); - } else { - ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode); - } -} - -/* conversion extensions for input not in the main table -------------------- */ - -/* - * Hardcoded extension handling for GB 18030. - * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file. - * - * In the future, conversion extensions may handle m:n mappings and delta tables, - * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html - * - * If an input character cannot be mapped, then these functions set an error - * code. The framework will then call the callback function. - */ - -/* - * @return if(U_FAILURE) return the code point for cnv->fromUChar32 - * else return 0 after output has been written to the target - */ -static UChar32 -_extFromU(UConverter *cnv, const UConverterSharedData *sharedData, - UChar32 cp, - const UChar **source, const UChar *sourceLimit, - uint8_t **target, const uint8_t *targetLimit, - int32_t **offsets, int32_t sourceIndex, - UBool flush, - UErrorCode *pErrorCode) { - const int32_t *cx; - - cnv->useSubChar1=FALSE; - - if( (cx=sharedData->mbcs.extIndexes)!=NULL && - ucnv_extInitialMatchFromU( - cnv, cx, - cp, source, sourceLimit, - (char **)target, (char *)targetLimit, - offsets, sourceIndex, - flush, - pErrorCode) - ) { - return 0; /* an extension mapping handled the input */ - } - - /* GB 18030 */ - if((cnv->options&_MBCS_OPTION_GB18030)!=0) { - const uint32_t *range; - int32_t i; - - range=gb18030Ranges[0]; - for(i=0; itoUBytes[0..length[ - * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input - * else return 0 after output has been written to the target - */ -static int8_t -_extToU(UConverter *cnv, const UConverterSharedData *sharedData, - int8_t length, - const uint8_t **source, const uint8_t *sourceLimit, - UChar **target, const UChar *targetLimit, - int32_t **offsets, int32_t sourceIndex, - UBool flush, - UErrorCode *pErrorCode) { - const int32_t *cx; - - if( (cx=sharedData->mbcs.extIndexes)!=NULL && - ucnv_extInitialMatchToU( - cnv, cx, - length, (const char **)source, (const char *)sourceLimit, - target, targetLimit, - offsets, sourceIndex, - flush, - pErrorCode) - ) { - return 0; /* an extension mapping handled the input */ - } - - /* GB 18030 */ - if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) { - const uint32_t *range; - uint32_t linear; - int32_t i; - - linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]); - range=gb18030Ranges[0]; - for(i=0; iNL ------------------------------------------------------ */ - -/* - * This code modifies a standard EBCDIC<->Unicode mapping table for - * OS/390 (z/OS) Unix System Services (Open Edition). - * The difference is in the mapping of Line Feed and New Line control codes: - * Standard EBCDIC maps - * - * \x25 |0 - * \x15 |0 - * - * but OS/390 USS EBCDIC swaps the control codes for LF and NL, - * mapping - * - * \x15 |0 - * \x25 |0 - * - * This code modifies a loaded standard EBCDIC<->Unicode mapping table - * by copying it into allocated memory and swapping the LF and NL values. - * It allows to support the same EBCDIC charset in both versions without - * duplicating the entire installed table. - */ - -/* standard EBCDIC codes */ -#define EBCDIC_LF 0x25 -#define EBCDIC_NL 0x15 - -/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */ -#define EBCDIC_RT_LF 0xf25 -#define EBCDIC_RT_NL 0xf15 - -/* Unicode code points */ -#define U_LF 0x0a -#define U_NL 0x85 - -static UBool -_EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) { - UConverterMBCSTable *mbcsTable; - - const uint16_t *table, *results; - const uint8_t *bytes; - - int32_t (*newStateTable)[256]; - uint16_t *newResults; - uint8_t *p; - char *name; - - uint32_t stage2Entry; - uint32_t size, sizeofFromUBytes; - - mbcsTable=&sharedData->mbcs; - - table=mbcsTable->fromUnicodeTable; - bytes=mbcsTable->fromUnicodeBytes; - results=(const uint16_t *)bytes; - - /* - * Check that this is an EBCDIC table with SBCS portion - - * SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings. - * - * If not, ignore the option. Options are always ignored if they do not apply. - */ - if(!( - (mbcsTable->outputType==MBCS_OUTPUT_1 || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) && - mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) && - mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL) - )) { - return FALSE; - } - - if(mbcsTable->outputType==MBCS_OUTPUT_1) { - if(!( - EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) && - EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL) - )) { - return FALSE; - } - } else /* MBCS_OUTPUT_2_SISO */ { - stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); - if(!( - MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 && - EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF) - )) { - return FALSE; - } - - stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); - if(!( - MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 && - EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL) - )) { - return FALSE; - } - } - - if(mbcsTable->fromUBytesLength>0) { - /* - * We _know_ the number of bytes in the fromUnicodeBytes array - * starting with header.version 4.1. - */ - sizeofFromUBytes=mbcsTable->fromUBytesLength; - } else { - /* - * Otherwise: - * There used to be code to enumerate the fromUnicode - * trie and find the highest entry, but it was removed in ICU 3.2 - * because it was not tested and caused a low code coverage number. - * See Jitterbug 3674. - * This affects only some .cnv file formats with a header.version - * below 4.1, and only when swaplfnl is requested. - * - * ucnvmbcs.c revision 1.99 is the last one with the - * ucnv_MBCSSizeofFromUBytes() function. - */ - *pErrorCode=U_INVALID_FORMAT_ERROR; - return FALSE; - } - - /* - * The table has an appropriate format. - * Allocate and build - * - a modified to-Unicode state table - * - a modified from-Unicode output array - * - a converter name string with the swap option appended - */ - size= - mbcsTable->countStates*1024+ - sizeofFromUBytes+ - UCNV_MAX_CONVERTER_NAME_LENGTH+20; - p=(uint8_t *)uprv_malloc(size); - if(p==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - - /* copy and modify the to-Unicode state table */ - newStateTable=(int32_t (*)[256])p; - uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024); - - newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL); - newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF); - - /* copy and modify the from-Unicode result table */ - newResults=(uint16_t *)newStateTable[mbcsTable->countStates]; - uprv_memcpy(newResults, bytes, sizeofFromUBytes); - - /* conveniently, the table access macros work on the left side of expressions */ - if(mbcsTable->outputType==MBCS_OUTPUT_1) { - MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL; - MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF; - } else /* MBCS_OUTPUT_2_SISO */ { - stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); - MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL; - - stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); - MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF; - } - - /* set the canonical converter name */ - name=(char *)newResults+sizeofFromUBytes; - uprv_strcpy(name, sharedData->staticData->name); - uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING); - - /* set the pointers */ - umtx_lock(NULL); - if(mbcsTable->swapLFNLStateTable==NULL) { - mbcsTable->swapLFNLStateTable=newStateTable; - mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults; - mbcsTable->swapLFNLName=name; - - newStateTable=NULL; - } - umtx_unlock(NULL); - - /* release the allocated memory if another thread beat us to it */ - if(newStateTable!=NULL) { - uprv_free(newStateTable); - } - return TRUE; -} - -/* reconstitute omitted fromUnicode data ------------------------------------ */ - -/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */ -static UBool U_CALLCONV -writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) { - UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context; - const uint16_t *table; - uint32_t *stage2; - uint8_t *bytes, *p; - UChar32 c; - int32_t i, st3; - - table=mbcsTable->fromUnicodeTable; - bytes=(uint8_t *)mbcsTable->fromUnicodeBytes; - - /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */ - switch(mbcsTable->outputType) { - case MBCS_OUTPUT_3_EUC: - if(value<=0xffff) { - /* short sequences are stored directly */ - /* code set 0 or 1 */ - } else if(value<=0x8effff) { - /* code set 2 */ - value&=0x7fff; - } else /* first byte is 0x8f */ { - /* code set 3 */ - value&=0xff7f; - } - break; - case MBCS_OUTPUT_4_EUC: - if(value<=0xffffff) { - /* short sequences are stored directly */ - /* code set 0 or 1 */ - } else if(value<=0x8effffff) { - /* code set 2 */ - value&=0x7fffff; - } else /* first byte is 0x8f */ { - /* code set 3 */ - value&=0xff7fff; - } - break; - default: - break; - } - - for(i=0; i<=0x1f; ++value, ++i) { - c=codePoints[i]; - if(c<0) { - continue; - } - - /* locate the stage 2 & 3 data */ - stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f); - p=bytes; - st3=(int32_t)(uint16_t)*stage2*16+(c&0xf); - - /* write the codepage bytes into stage 3 */ - switch(mbcsTable->outputType) { - case MBCS_OUTPUT_3: - case MBCS_OUTPUT_4_EUC: - p+=st3*3; - p[0]=(uint8_t)(value>>16); - p[1]=(uint8_t)(value>>8); - p[2]=(uint8_t)value; - break; - case MBCS_OUTPUT_4: - ((uint32_t *)p)[st3]=value; - break; - default: - /* 2 bytes per character */ - ((uint16_t *)p)[st3]=(uint16_t)value; - break; - } - - /* set the roundtrip flag */ - *stage2|=(1UL<<(16+(c&0xf))); - } - return TRUE; - } - -static void -reconstituteData(UConverterMBCSTable *mbcsTable, - uint32_t stage1Length, uint32_t stage2Length, - uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */ - UErrorCode *pErrorCode) { - uint16_t *stage1; - uint32_t *stage2; - uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength; - mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength); - if(mbcsTable->reconstitutedData==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_memset(mbcsTable->reconstitutedData, 0, dataLength); - - /* copy existing data and reroute the pointers */ - stage1=(uint16_t *)mbcsTable->reconstitutedData; - uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2); - - stage2=(uint32_t *)(stage1+stage1Length); - uprv_memcpy(stage2+(fullStage2Length-stage2Length), - mbcsTable->fromUnicodeTable+stage1Length, - stage2Length*4); - - mbcsTable->fromUnicodeTable=stage1; - mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length); - - /* indexes into stage 2 count from the bottom of the fromUnicodeTable */ - stage2=(uint32_t *)stage1; - - /* reconstitute the initial part of stage 2 from the mbcsIndex */ - { - int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6; - int32_t stageUTF8Index=0; - int32_t st1, st2, st3, i; - - for(st1=0; stageUTF8IndexmbcsIndex[stageUTF8Index++]; - if(st3!=0) { - /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */ - st3>>=4; - /* - * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are - * allocated together as a single 64-block for access from the mbcsIndex - */ - stage2[st2++]=st3++; - stage2[st2++]=st3++; - stage2[st2++]=st3++; - stage2[st2++]=st3; - } else { - /* no stage 3 block, skip */ - st2+=4; - } - } - } else { - /* no stage 2 block, skip */ - stageUTF8Index+=16; - } - } - } - - /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */ - ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode); -} - -/* MBCS setup functions ----------------------------------------------------- */ - -static void U_CALLCONV -ucnv_MBCSLoad(UConverterSharedData *sharedData, - UConverterLoadArgs *pArgs, - const uint8_t *raw, - UErrorCode *pErrorCode) { - UDataInfo info; - UConverterMBCSTable *mbcsTable=&sharedData->mbcs; - _MBCSHeader *header=(_MBCSHeader *)raw; - uint32_t offset; - uint32_t headerLength; - UBool noFromU=FALSE; - - if(header->version[0]==4) { - headerLength=MBCS_HEADER_V4_LENGTH; - } else if(header->version[0]==5 && header->version[1]>=3 && - (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) { - headerLength=header->options&MBCS_OPT_LENGTH_MASK; - noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0); - } else { - *pErrorCode=U_INVALID_TABLE_FORMAT; - return; - } - - mbcsTable->outputType=(uint8_t)header->flags; - if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) { - *pErrorCode=U_INVALID_TABLE_FORMAT; - return; - } - - /* extension data, header version 4.2 and higher */ - offset=header->flags>>8; - if(offset!=0) { - mbcsTable->extIndexes=(const int32_t *)(raw+offset); - } - - if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) { - UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER; - UConverterSharedData *baseSharedData; - const int32_t *extIndexes; - const char *baseName; - - /* extension-only file, load the base table and set values appropriately */ - if((extIndexes=mbcsTable->extIndexes)==NULL) { - /* extension-only file without extension */ - *pErrorCode=U_INVALID_TABLE_FORMAT; - return; - } - - if(pArgs->nestedLoads!=1) { - /* an extension table must not be loaded as a base table */ - *pErrorCode=U_INVALID_TABLE_FILE; - return; - } - - /* load the base table */ - baseName=(const char *)header+headerLength*4; - if(0==uprv_strcmp(baseName, sharedData->staticData->name)) { - /* forbid loading this same extension-only file */ - *pErrorCode=U_INVALID_TABLE_FORMAT; - return; - } - - /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */ - args.size=sizeof(UConverterLoadArgs); - args.nestedLoads=2; - args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable; - args.reserved=pArgs->reserved; - args.options=pArgs->options; - args.pkg=pArgs->pkg; - args.name=baseName; - baseSharedData=ucnv_load(&args, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - if( baseSharedData->staticData->conversionType!=UCNV_MBCS || - baseSharedData->mbcs.baseSharedData!=NULL - ) { - ucnv_unload(baseSharedData); - *pErrorCode=U_INVALID_TABLE_FORMAT; - return; - } - if(pArgs->onlyTestIsLoadable) { - /* - * Exit as soon as we know that we can load the converter - * and the format is valid and supported. - * The worst that can happen in the following code is a memory - * allocation error. - */ - ucnv_unload(baseSharedData); - return; - } - - /* copy the base table data */ - uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable)); - - /* overwrite values with relevant ones for the extension converter */ - mbcsTable->baseSharedData=baseSharedData; - mbcsTable->extIndexes=extIndexes; - - /* - * It would be possible to share the swapLFNL data with a base converter, - * but the generated name would have to be different, and the memory - * would have to be free'd only once. - * It is easier to just create the data for the extension converter - * separately when it is requested. - */ - mbcsTable->swapLFNLStateTable=NULL; - mbcsTable->swapLFNLFromUnicodeBytes=NULL; - mbcsTable->swapLFNLName=NULL; - - /* - * The reconstitutedData must be deleted only when the base converter - * is unloaded. - */ - mbcsTable->reconstitutedData=NULL; - - /* - * Set a special, runtime-only outputType if the extension converter - * is a DBCS version of a base converter that also maps single bytes. - */ - if( sharedData->staticData->conversionType==UCNV_DBCS || - (sharedData->staticData->conversionType==UCNV_MBCS && - sharedData->staticData->minBytesPerChar>=2) - ) { - if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) { - /* the base converter is SI/SO-stateful */ - int32_t entry; - - /* get the dbcs state from the state table entry for SO=0x0e */ - entry=mbcsTable->stateTable[0][0xe]; - if( MBCS_ENTRY_IS_FINAL(entry) && - MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY && - MBCS_ENTRY_FINAL_STATE(entry)!=0 - ) { - mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); - - mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; - } - } else if( - baseSharedData->staticData->conversionType==UCNV_MBCS && - baseSharedData->staticData->minBytesPerChar==1 && - baseSharedData->staticData->maxBytesPerChar==2 && - mbcsTable->countStates<=127 - ) { - /* non-stateful base converter, need to modify the state table */ - int32_t (*newStateTable)[256]; - int32_t *state; - int32_t i, count; - - /* allocate a new state table and copy the base state table contents */ - count=mbcsTable->countStates; - newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024); - if(newStateTable==NULL) { - ucnv_unload(baseSharedData); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - - uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024); - - /* change all final single-byte entries to go to a new all-illegal state */ - state=newStateTable[0]; - for(i=0; i<256; ++i) { - if(MBCS_ENTRY_IS_FINAL(state[i])) { - state[i]=MBCS_ENTRY_TRANSITION(count, 0); - } - } - - /* build the new all-illegal state */ - state=newStateTable[count]; - for(i=0; i<256; ++i) { - state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0); - } - mbcsTable->stateTable=(const int32_t (*)[256])newStateTable; - mbcsTable->countStates=(uint8_t)(count+1); - mbcsTable->stateTableOwned=TRUE; - - mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; - } - } - - /* - * unlike below for files with base tables, do not get the unicodeMask - * from the sharedData; instead, use the base table's unicodeMask, - * which we copied in the memcpy above; - * this is necessary because the static data unicodeMask, especially - * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data - */ - } else { - /* conversion file with a base table; an additional extension table is optional */ - /* make sure that the output type is known */ - switch(mbcsTable->outputType) { - case MBCS_OUTPUT_1: - case MBCS_OUTPUT_2: - case MBCS_OUTPUT_3: - case MBCS_OUTPUT_4: - case MBCS_OUTPUT_3_EUC: - case MBCS_OUTPUT_4_EUC: - case MBCS_OUTPUT_2_SISO: - /* OK */ - break; - default: - *pErrorCode=U_INVALID_TABLE_FORMAT; - return; - } - if(pArgs->onlyTestIsLoadable) { - /* - * Exit as soon as we know that we can load the converter - * and the format is valid and supported. - * The worst that can happen in the following code is a memory - * allocation error. - */ - return; - } - - mbcsTable->countStates=(uint8_t)header->countStates; - mbcsTable->countToUFallbacks=header->countToUFallbacks; - mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4); - mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates); - mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits); - - mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable); - mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes); - mbcsTable->fromUBytesLength=header->fromUBytesLength; - - /* - * converter versions 6.1 and up contain a unicodeMask that is - * used here to select the most efficient function implementations - */ - info.size=sizeof(UDataInfo); - udata_getInfo((UDataMemory *)sharedData->dataMemory, &info); - if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) { - /* mask off possible future extensions to be safe */ - mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3); - } else { - /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */ - mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES; - } - - /* - * _MBCSHeader.version 4.3 adds utf8Friendly data structures. - * Check for the header version, SBCS vs. MBCS, and for whether the - * data structures are optimized for code points as high as what the - * runtime code is designed for. - * The implementation does not handle mapping tables with entries for - * unpaired surrogates. - */ - if( header->version[1]>=3 && - (mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 && - (mbcsTable->countStates==1 ? - (header->version[2]>=(SBCS_FAST_MAX>>8)) : - (header->version[2]>=(MBCS_FAST_MAX>>8)) - ) - ) { - mbcsTable->utf8Friendly=TRUE; - - if(mbcsTable->countStates==1) { - /* - * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher. - * Build a table with indexes to each block, to be used instead of - * the regular stage 1/2 table. - */ - int32_t i; - for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) { - mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)]; - } - /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */ - mbcsTable->maxFastUChar=SBCS_FAST_MAX; - } else { - /* - * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher. - * The .cnv file is prebuilt with an additional stage table with indexes - * to each block. - */ - mbcsTable->mbcsIndex=(const uint16_t *) - (mbcsTable->fromUnicodeBytes+ - (noFromU ? 0 : mbcsTable->fromUBytesLength)); - mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff; - } - } - - /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */ - { - uint32_t asciiRoundtrips=0xffffffff; - int32_t i; - - for(i=0; i<0x80; ++i) { - if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) { - asciiRoundtrips&=~((uint32_t)1<<(i>>2)); - } - } - mbcsTable->asciiRoundtrips=asciiRoundtrips; - } - - if(noFromU) { - uint32_t stage1Length= - mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ? - 0x440 : 0x40; - uint32_t stage2Length= - (header->offsetFromUBytes-header->offsetFromUTable)/4- - stage1Length/2; - reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode); - } - } - - /* Set the impl pointer here so that it is set for both extension-only and base tables. */ - if(mbcsTable->utf8Friendly) { - if(mbcsTable->countStates==1) { - sharedData->impl=&_SBCSUTF8Impl; - } else { - if(mbcsTable->outputType==MBCS_OUTPUT_2) { - sharedData->impl=&_DBCSUTF8Impl; - } - } - } - - if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) { - /* - * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip. - * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly. - */ - mbcsTable->asciiRoundtrips=0; - } -} - -static void U_CALLCONV -ucnv_MBCSUnload(UConverterSharedData *sharedData) { - UConverterMBCSTable *mbcsTable=&sharedData->mbcs; - - if(mbcsTable->swapLFNLStateTable!=NULL) { - uprv_free(mbcsTable->swapLFNLStateTable); - } - if(mbcsTable->stateTableOwned) { - uprv_free((void *)mbcsTable->stateTable); - } - if(mbcsTable->baseSharedData!=NULL) { - ucnv_unload(mbcsTable->baseSharedData); - } - if(mbcsTable->reconstitutedData!=NULL) { - uprv_free(mbcsTable->reconstitutedData); - } -} - -static void U_CALLCONV -ucnv_MBCSOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - UConverterMBCSTable *mbcsTable; - const int32_t *extIndexes; - uint8_t outputType; - int8_t maxBytesPerUChar; - - if(pArgs->onlyTestIsLoadable) { - return; - } - - mbcsTable=&cnv->sharedData->mbcs; - outputType=mbcsTable->outputType; - - if(outputType==MBCS_OUTPUT_DBCS_ONLY) { - /* the swaplfnl option does not apply, remove it */ - cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; - } - - if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) { - /* do this because double-checked locking is broken */ - UBool isCached; - - umtx_lock(NULL); - isCached=mbcsTable->swapLFNLStateTable!=NULL; - umtx_unlock(NULL); - - if(!isCached) { - if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) { - if(U_FAILURE(*pErrorCode)) { - return; /* something went wrong */ - } - - /* the option does not apply, remove it */ - cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; - } - } - } - - if(uprv_strstr(pArgs->name, "18030")!=NULL) { - if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) { - /* set a flag for GB 18030 mode, which changes the callback behavior */ - cnv->options|=_MBCS_OPTION_GB18030; - } - } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) { - /* set a flag for KEIS converter, which changes the SI/SO character sequence */ - cnv->options|=_MBCS_OPTION_KEIS; - } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) { - /* set a flag for JEF converter, which changes the SI/SO character sequence */ - cnv->options|=_MBCS_OPTION_JEF; - } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) { - /* set a flag for JIPS converter, which changes the SI/SO character sequence */ - cnv->options|=_MBCS_OPTION_JIPS; - } - - /* fix maxBytesPerUChar depending on outputType and options etc. */ - if(outputType==MBCS_OUTPUT_2_SISO) { - cnv->maxBytesPerUChar=3; /* SO+DBCS */ - } - - extIndexes=mbcsTable->extIndexes; - if(extIndexes!=NULL) { - maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes); - if(outputType==MBCS_OUTPUT_2_SISO) { - ++maxBytesPerUChar; /* SO + multiple DBCS */ - } - - if(maxBytesPerUChar>cnv->maxBytesPerUChar) { - cnv->maxBytesPerUChar=maxBytesPerUChar; - } - } - -#if 0 - /* - * documentation of UConverter fields used for status - * all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset() - */ - - /* toUnicode */ - cnv->toUnicodeStatus=0; /* offset */ - cnv->mode=0; /* state */ - cnv->toULength=0; /* byteIndex */ - - /* fromUnicode */ - cnv->fromUChar32=0; - cnv->fromUnicodeStatus=1; /* prevLength */ -#endif -} - -U_CDECL_BEGIN - -static const char* U_CALLCONV -ucnv_MBCSGetName(const UConverter *cnv) { - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) { - return cnv->sharedData->mbcs.swapLFNLName; - } else { - return cnv->sharedData->staticData->name; - } -} -U_CDECL_END - - -/* MBCS-to-Unicode conversion functions ------------------------------------- */ - -static UChar32 U_CALLCONV -ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) { - const _MBCSToUFallback *toUFallbacks; - uint32_t i, start, limit; - - limit=mbcsTable->countToUFallbacks; - if(limit>0) { - /* do a binary search for the fallback mapping */ - toUFallbacks=mbcsTable->toUFallbacks; - start=0; - while(startconverter; - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; - } else { - stateTable=cnv->sharedData->mbcs.stateTable; - } - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=0; - - /* conversion loop */ - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - - entry=stateTable[0][*source++]; - /* MBCS_ENTRY_IS_FINAL(entry) */ - - /* test the most common case first */ - if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { - /* output BMP code point */ - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - - /* normal end of action codes: prepare for a new character */ - ++sourceIndex; - continue; - } - - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if(action==MBCS_STATE_VALID_DIRECT_20 || - (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) - ) { - entry=MBCS_ENTRY_FINAL_VALUE(entry); - /* output surrogate pair */ - *target++=(UChar)(0xd800|(UChar)(entry>>10)); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); - if(targetUCharErrorBuffer[0]=c; - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - - ++sourceIndex; - continue; - } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { - if(UCNV_TO_U_USE_FALLBACK(cnv)) { - /* output BMP code point */ - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - - ++sourceIndex; - continue; - } - } else if(action==MBCS_STATE_UNASSIGNED) { - /* just fall through */ - } else if(action==MBCS_STATE_ILLEGAL) { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } else { - /* reserved, must never occur */ - ++sourceIndex; - continue; - } - - if(U_FAILURE(*pErrorCode)) { - /* callback(illegal) */ - break; - } else /* unassigned sequences indicated with byteIndex>0 */ { - /* try an extension mapping */ - pArgs->source=(const char *)source; - cnv->toUBytes[0]=*(source-1); - cnv->toULength=_extToU(cnv, cnv->sharedData, - 1, &source, sourceLimit, - &target, targetLimit, - &offsets, sourceIndex, - pArgs->flush, - pErrorCode); - sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source); - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - break; - } - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -/* - * This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages - * that only map to and from the BMP. - * In addition to single-byte optimizations, the offset calculations - * become much easier. - */ -static void -ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit, *lastSource; - UChar *target; - int32_t targetCapacity, length; - int32_t *offsets; - - const int32_t (*stateTable)[256]; - - int32_t sourceIndex; - - int32_t entry; - uint8_t action; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; - } else { - stateTable=cnv->sharedData->mbcs.stateTable; - } - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=0; - lastSource=source; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length=16) { - int32_t count, loops, oredEntries; - - loops=count=targetCapacity>>4; - do { - oredEntries=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - oredEntries|=entry=stateTable[0][*source++]; - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - - /* were all 16 entries really valid? */ - if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) { - /* no, return to the first of these 16 */ - source-=16; - target-=16; - break; - } - } while(--count>0); - count=loops-count; - targetCapacity-=16*count; - - if(offsets!=NULL) { - lastSource+=16*count; - while(count>0) { - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - --count; - } - } - } -#endif - - /* conversion loop */ - while(targetCapacity > 0 && source < sourceLimit) { - entry=stateTable[0][*source++]; - /* MBCS_ENTRY_IS_FINAL(entry) */ - - /* test the most common case first */ - if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { - /* output BMP code point */ - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - --targetCapacity; - continue; - } - - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if(action==MBCS_STATE_FALLBACK_DIRECT_16) { - if(UCNV_TO_U_USE_FALLBACK(cnv)) { - /* output BMP code point */ - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - --targetCapacity; - continue; - } - } else if(action==MBCS_STATE_UNASSIGNED) { - /* just fall through */ - } else if(action==MBCS_STATE_ILLEGAL) { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } else { - /* reserved, must never occur */ - continue; - } - - /* set offsets since the start or the last extension */ - if(offsets!=NULL) { - int32_t count=(int32_t)(source-lastSource); - - /* predecrement: do not set the offset for the callback-causing character */ - while(--count>0) { - *offsets++=sourceIndex++; - } - /* offset and sourceIndex are now set for the current character */ - } - - if(U_FAILURE(*pErrorCode)) { - /* callback(illegal) */ - break; - } else /* unassigned sequences indicated with byteIndex>0 */ { - /* try an extension mapping */ - lastSource=source; - cnv->toUBytes[0]=*(source-1); - cnv->toULength=_extToU(cnv, cnv->sharedData, - 1, &source, sourceLimit, - &target, pArgs->targetLimit, - &offsets, sourceIndex, - pArgs->flush, - pErrorCode); - sourceIndex+=1+(int32_t)(source-lastSource); - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - break; - } - - /* recalculate the targetCapacity after an extension mapping */ - targetCapacity=(int32_t)(pArgs->targetLimit-target); - length=(int32_t)(sourceLimit-source); - if(length=pArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* set offsets since the start or the last callback */ - if(offsets!=NULL) { - size_t count=source-lastSource; - while(count>0) { - *offsets++=sourceIndex++; - --count; - } - } - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -static UBool -hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) { - const int32_t *row=stateTable[state]; - int32_t b, entry; - /* First test for final entries in this state for some commonly valid byte values. */ - entry=row[0xa1]; - if( !MBCS_ENTRY_IS_TRANSITION(entry) && - MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL - ) { - return TRUE; - } - entry=row[0x41]; - if( !MBCS_ENTRY_IS_TRANSITION(entry) && - MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL - ) { - return TRUE; - } - /* Then test for final entries in this state. */ - for(b=0; b<=0xff; ++b) { - entry=row[b]; - if( !MBCS_ENTRY_IS_TRANSITION(entry) && - MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL - ) { - return TRUE; - } - } - /* Then recurse for transition entries. */ - for(b=0; b<=0xff; ++b) { - entry=row[b]; - if( MBCS_ENTRY_IS_TRANSITION(entry) && - hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)) - ) { - return TRUE; - } - } - return FALSE; -} - -/* - * Is byte b a single/lead byte in this state? - * Recurse for transition states, because here we don't want to say that - * b is a lead byte if all byte sequences that start with b are illegal. - */ -static UBool -isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) { - const int32_t *row=stateTable[state]; - int32_t entry=row[b]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ - return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)); - } else { - uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { - return FALSE; /* SI/SO are illegal for DBCS-only conversion */ - } else { - return action!=MBCS_STATE_ILLEGAL; - } - } -} - -U_CFUNC void -ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - - const int32_t (*stateTable)[256]; - const uint16_t *unicodeCodeUnits; - - uint32_t offset; - uint8_t state; - int8_t byteIndex; - uint8_t *bytes; - - int32_t sourceIndex, nextSourceIndex; - - int32_t entry; - UChar c; - uint8_t action; - - /* use optimized function if possible */ - cnv=pArgs->converter; - - if(cnv->preToULength>0) { - /* - * pass sourceIndex=-1 because we continue from an earlier buffer - * in the future, this may change with continuous offsets - */ - ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode); - - if(U_FAILURE(*pErrorCode) || cnv->preToULength<0) { - return; - } - } - - if(cnv->sharedData->mbcs.countStates==1) { - if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { - ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode); - } else { - ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode); - } - return; - } - - /* set up the local pointers */ - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; - } else { - stateTable=cnv->sharedData->mbcs.stateTable; - } - unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; - - /* get the converter state from UConverter */ - offset=cnv->toUnicodeStatus; - byteIndex=cnv->toULength; - bytes=cnv->toUBytes; - - /* - * if we are in the SBCS state for a DBCS-only converter, - * then load the DBCS state from the MBCS data - * (dbcsOnlyState==0 if it is not a DBCS-only converter) - */ - if((state=(uint8_t)(cnv->mode))==0) { - state=cnv->sharedData->mbcs.dbcsOnlyState; - } - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=byteIndex==0 ? 0 : -1; - nextSourceIndex=0; - - /* conversion loop */ - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - - if(byteIndex==0) { - /* optimized loop for 1/2-byte input and BMP output */ - if(offsets==NULL) { - do { - entry=stateTable[state][*source]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); - offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); - - ++source; - if( source=sourceLimit) { - break; - } - if(target>=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - - ++nextSourceIndex; - bytes[byteIndex++]=*source++; - } else /* byteIndex>0 */ { - ++nextSourceIndex; - entry=stateTable[state][bytes[byteIndex++]=*source++]; - } - - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); - offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); - continue; - } - - /* save the previous state for proper extension mapping with SI/SO-stateful converters */ - cnv->mode=state; - - /* set the next state early so that we can reuse the entry variable */ - state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ - - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if(action==MBCS_STATE_VALID_16) { - offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[offset]; - if(c<0xfffe) { - /* output BMP code point */ - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - } else if(c==0xfffe) { - if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { - /* output fallback BMP code point */ - *target++=(UChar)entry; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - } - } else { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else if(action==MBCS_STATE_VALID_DIRECT_16) { - /* output BMP code point */ - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - } else if(action==MBCS_STATE_VALID_16_PAIR) { - offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[offset++]; - if(c<0xd800) { - /* output BMP code point below 0xd800 */ - *target++=c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { - /* output roundtrip or fallback surrogate pair */ - *target++=(UChar)(c&0xdbff); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - if(targetUCharErrorBuffer[0]=unicodeCodeUnits[offset]; - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - - offset=0; - break; - } - } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { - /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ - *target++=unicodeCodeUnits[offset]; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - } else if(c==0xffff) { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else if(action==MBCS_STATE_VALID_DIRECT_20 || - (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) - ) { - entry=MBCS_ENTRY_FINAL_VALUE(entry); - /* output surrogate pair */ - *target++=(UChar)(0xd800|(UChar)(entry>>10)); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); - if(targetUCharErrorBuffer[0]=c; - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - - offset=0; - break; - } - } else if(action==MBCS_STATE_CHANGE_ONLY) { - /* - * This serves as a state change without any output. - * It is useful for reading simple stateful encodings, - * for example using just Shift-In/Shift-Out codes. - * The 21 unused bits may later be used for more sophisticated - * state transitions. - */ - if(cnv->sharedData->mbcs.dbcsOnlyState==0) { - byteIndex=0; - } else { - /* SI/SO are illegal for DBCS-only conversion */ - state=(uint8_t)(cnv->mode); /* restore the previous state */ - - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { - if(UCNV_TO_U_USE_FALLBACK(cnv)) { - /* output BMP code point */ - *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - byteIndex=0; - } - } else if(action==MBCS_STATE_UNASSIGNED) { - /* just fall through */ - } else if(action==MBCS_STATE_ILLEGAL) { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } else { - /* reserved, must never occur */ - byteIndex=0; - } - - /* end of action codes: prepare for a new character */ - offset=0; - - if(byteIndex==0) { - sourceIndex=nextSourceIndex; - } else if(U_FAILURE(*pErrorCode)) { - /* callback(illegal) */ - if(byteIndex>1) { - /* - * Ticket 5691: consistent illegal sequences: - * - We include at least the first byte in the illegal sequence. - * - If any of the non-initial bytes could be the start of a character, - * we stop the illegal sequence before the first one of those. - */ - UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); - int8_t i; - for(i=1; - isource); - byteIndex=i; /* length of reported illegal byte sequence */ - if(backOutDistance<=bytesFromThisBuffer) { - source-=backOutDistance; - } else { - /* Back out bytes from the previous buffer: Need to replay them. */ - cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); - /* preToULength is negative! */ - uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength); - source=(const uint8_t *)pArgs->source; - } - } - } - break; - } else /* unassigned sequences indicated with byteIndex>0 */ { - /* try an extension mapping */ - pArgs->source=(const char *)source; - byteIndex=_extToU(cnv, cnv->sharedData, - byteIndex, &source, sourceLimit, - &target, targetLimit, - &offsets, sourceIndex, - pArgs->flush, - pErrorCode); - sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source); - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - break; - } - } - } - - /* set the converter state back into UConverter */ - cnv->toUnicodeStatus=offset; - cnv->mode=state; - cnv->toULength=byteIndex; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; -} - -/* - * This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages. - * We still need a conversion loop in case we find reserved action codes, which are to be ignored. - */ -static UChar32 -ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const int32_t (*stateTable)[256]; - const uint8_t *source, *sourceLimit; - - int32_t entry; - uint8_t action; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; - } else { - stateTable=cnv->sharedData->mbcs.stateTable; - } - - /* conversion loop */ - while(sourcesource=(const char *)source; - - if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { - /* output BMP code point */ - return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - } - - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if( action==MBCS_STATE_VALID_DIRECT_20 || - (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) - ) { - /* output supplementary code point */ - return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); - } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { - if(UCNV_TO_U_USE_FALLBACK(cnv)) { - /* output BMP code point */ - return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - } - } else if(action==MBCS_STATE_UNASSIGNED) { - /* just fall through */ - } else if(action==MBCS_STATE_ILLEGAL) { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } else { - /* reserved, must never occur */ - continue; - } - - if(U_FAILURE(*pErrorCode)) { - /* callback(illegal) */ - break; - } else /* unassigned sequence */ { - /* defer to the generic implementation */ - pArgs->source=(const char *)source-1; - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } - } - - /* no output because of empty input or only state changes */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0xffff; -} - -/* - * Version of _MBCSToUnicodeWithOffsets() optimized for single-character - * conversion without offset handling. - * - * When a character does not have a mapping to Unicode, then we return to the - * generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback - * handling. - * We also defer to the generic code in other complicated cases and have them - * ultimately handled by _MBCSToUnicodeWithOffsets() itself. - * - * All normal mappings and errors are handled here. - */ -static UChar32 U_CALLCONV -ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const uint8_t *source, *sourceLimit, *lastSource; - - const int32_t (*stateTable)[256]; - const uint16_t *unicodeCodeUnits; - - uint32_t offset; - uint8_t state; - - int32_t entry; - UChar32 c; - uint8_t action; - - /* use optimized function if possible */ - cnv=pArgs->converter; - - if(cnv->preToULength>0) { - /* use the generic code in ucnv_getNextUChar() to continue with a partial match */ - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } - - if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) { - /* - * Using the generic ucnv_getNextUChar() code lets us deal correctly - * with the rare case of a codepage that maps single surrogates - * without adding the complexity to this already complicated function here. - */ - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } else if(cnv->sharedData->mbcs.countStates==1) { - return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode); - } - - /* set up the local pointers */ - source=lastSource=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; - } else { - stateTable=cnv->sharedData->mbcs.stateTable; - } - unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; - - /* get the converter state from UConverter */ - offset=cnv->toUnicodeStatus; - - /* - * if we are in the SBCS state for a DBCS-only converter, - * then load the DBCS state from the MBCS data - * (dbcsOnlyState==0 if it is not a DBCS-only converter) - */ - if((state=(uint8_t)(cnv->mode))==0) { - state=cnv->sharedData->mbcs.dbcsOnlyState; - } - - /* conversion loop */ - c=U_SENTINEL; - while(sourcemode=state; - - /* set the next state early so that we can reuse the entry variable */ - state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ - - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if(action==MBCS_STATE_VALID_DIRECT_16) { - /* output BMP code point */ - c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - break; - } else if(action==MBCS_STATE_VALID_16) { - offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[offset]; - if(c<0xfffe) { - /* output BMP code point */ - break; - } else if(c==0xfffe) { - if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { - break; - } - } else { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else if(action==MBCS_STATE_VALID_16_PAIR) { - offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[offset++]; - if(c<0xd800) { - /* output BMP code point below 0xd800 */ - break; - } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { - /* output roundtrip or fallback supplementary code point */ - c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00); - break; - } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { - /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ - c=unicodeCodeUnits[offset]; - break; - } else if(c==0xffff) { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else if(action==MBCS_STATE_VALID_DIRECT_20 || - (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) - ) { - /* output supplementary code point */ - c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); - break; - } else if(action==MBCS_STATE_CHANGE_ONLY) { - /* - * This serves as a state change without any output. - * It is useful for reading simple stateful encodings, - * for example using just Shift-In/Shift-Out codes. - * The 21 unused bits may later be used for more sophisticated - * state transitions. - */ - if(cnv->sharedData->mbcs.dbcsOnlyState!=0) { - /* SI/SO are illegal for DBCS-only conversion */ - state=(uint8_t)(cnv->mode); /* restore the previous state */ - - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } - } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { - if(UCNV_TO_U_USE_FALLBACK(cnv)) { - /* output BMP code point */ - c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - break; - } - } else if(action==MBCS_STATE_UNASSIGNED) { - /* just fall through */ - } else if(action==MBCS_STATE_ILLEGAL) { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - } else { - /* reserved (must never occur), or only state change */ - offset=0; - lastSource=source; - continue; - } - - /* end of action codes: prepare for a new character */ - offset=0; - - if(U_FAILURE(*pErrorCode)) { - /* callback(illegal) */ - break; - } else /* unassigned sequence */ { - /* defer to the generic implementation */ - cnv->toUnicodeStatus=0; - cnv->mode=state; - pArgs->source=(const char *)lastSource; - return UCNV_GET_NEXT_UCHAR_USE_TO_U; - } - } - } - - if(c<0) { - if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSourcetoUBytes; - cnv->toULength=(int8_t)(source-lastSource); - do { - *bytes++=*lastSource++; - } while(lastSourcesharedData->mbcs.dbcsOnlyState!=0); - uint8_t *bytes=cnv->toUBytes; - *bytes++=*lastSource++; /* first byte */ - if(lastSource==source) { - cnv->toULength=1; - } else /* lastSourcetoULength=i; - source=lastSource; - } - } else { - /* no output because of empty input or only state changes */ - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } - c=0xffff; - } - - /* set the converter state back into UConverter, ready for a new character */ - cnv->toUnicodeStatus=0; - cnv->mode=state; - - /* write back the updated pointer */ - pArgs->source=(const char *)source; - return c; -} - -#if 0 -/* - * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus - * Removal improves code coverage. - */ -/** - * This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * It does not handle conversion extensions (_extToU()). - */ -U_CFUNC UChar32 -ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData, - uint8_t b, UBool useFallback) { - int32_t entry; - uint8_t action; - - entry=sharedData->mbcs.stateTable[0][b]; - /* MBCS_ENTRY_IS_FINAL(entry) */ - - if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { - /* output BMP code point */ - return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - } - - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if(action==MBCS_STATE_VALID_DIRECT_20) { - /* output supplementary code point */ - return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); - } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { - if(!TO_U_USE_FALLBACK(useFallback)) { - return 0xfffe; - } - /* output BMP code point */ - return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { - if(!TO_U_USE_FALLBACK(useFallback)) { - return 0xfffe; - } - /* output supplementary code point */ - return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); - } else if(action==MBCS_STATE_UNASSIGNED) { - return 0xfffe; - } else if(action==MBCS_STATE_ILLEGAL) { - return 0xffff; - } else { - /* reserved, must never occur */ - return 0xffff; - } -} -#endif - -/* - * This is a simple version of _MBCSGetNextUChar() that is used - * by other converter implementations. - * It only returns an "assigned" result if it consumes the entire input. - * It does not use state from the converter, nor error codes. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * It handles conversion extensions but not GB 18030. - * - * Return value: - * U+fffe unassigned - * U+ffff illegal - * otherwise the Unicode code point - */ -U_CFUNC UChar32 -ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData, - const char *source, int32_t length, - UBool useFallback) { - const int32_t (*stateTable)[256]; - const uint16_t *unicodeCodeUnits; - - uint32_t offset; - uint8_t state, action; - - UChar32 c; - int32_t i, entry; - - if(length<=0) { - /* no input at all: "illegal" */ - return 0xffff; - } - -#if 0 -/* - * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus - * TODO In future releases, verify that this function is never called for SBCS - * conversions, i.e., that sharedData->mbcs.countStates==1 is still true. - * Removal improves code coverage. - */ - /* use optimized function if possible */ - if(sharedData->mbcs.countStates==1) { - if(length==1) { - return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback); - } else { - return 0xffff; /* illegal: more than a single byte for an SBCS converter */ - } - } -#endif - - /* set up the local pointers */ - stateTable=sharedData->mbcs.stateTable; - unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits; - - /* converter state */ - offset=0; - state=sharedData->mbcs.dbcsOnlyState; - - /* conversion loop */ - for(i=0;;) { - entry=stateTable[state][(uint8_t)source[i++]]; - if(MBCS_ENTRY_IS_TRANSITION(entry)) { - state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); - offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); - - if(i==length) { - return 0xffff; /* truncated character */ - } - } else { - /* - * An if-else-if chain provides more reliable performance for - * the most common cases compared to a switch. - */ - action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); - if(action==MBCS_STATE_VALID_16) { - offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[offset]; - if(c!=0xfffe) { - /* done */ - } else if(UCNV_TO_U_USE_FALLBACK(cnv)) { - c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset); - /* else done with 0xfffe */ - } - break; - } else if(action==MBCS_STATE_VALID_DIRECT_16) { - /* output BMP code point */ - c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - break; - } else if(action==MBCS_STATE_VALID_16_PAIR) { - offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); - c=unicodeCodeUnits[offset++]; - if(c<0xd800) { - /* output BMP code point below 0xd800 */ - } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { - /* output roundtrip or fallback supplementary code point */ - c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00)); - } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { - /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ - c=unicodeCodeUnits[offset]; - } else if(c==0xffff) { - return 0xffff; - } else { - c=0xfffe; - } - break; - } else if(action==MBCS_STATE_VALID_DIRECT_20) { - /* output supplementary code point */ - c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); - break; - } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { - if(!TO_U_USE_FALLBACK(useFallback)) { - c=0xfffe; - break; - } - /* output BMP code point */ - c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); - break; - } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { - if(!TO_U_USE_FALLBACK(useFallback)) { - c=0xfffe; - break; - } - /* output supplementary code point */ - c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); - break; - } else if(action==MBCS_STATE_UNASSIGNED) { - c=0xfffe; - break; - } - - /* - * forbid MBCS_STATE_CHANGE_ONLY for this function, - * and MBCS_STATE_ILLEGAL and reserved action codes - */ - return 0xffff; - } - } - - if(i!=length) { - /* illegal for this function: not all input consumed */ - return 0xffff; - } - - if(c==0xfffe) { - /* try an extension mapping */ - const int32_t *cx=sharedData->mbcs.extIndexes; - if(cx!=NULL) { - return ucnv_extSimpleMatchToU(cx, source, length, useFallback); - } - } - - return c; -} - -/* MBCS-from-Unicode conversion functions ----------------------------------- */ - -/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */ -static void -ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t *offsets; - - const uint16_t *table; - const uint16_t *mbcsIndex; - const uint8_t *bytes; - - UChar32 c; - - int32_t sourceIndex, nextSourceIndex; - - uint32_t stage2Entry; - uint32_t asciiRoundtrips; - uint32_t value; - uint8_t unicodeMask; - - /* use optimized function if possible */ - cnv=pArgs->converter; - unicodeMask=cnv->sharedData->mbcs.unicodeMask; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - table=cnv->sharedData->mbcs.fromUnicodeTable; - mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; - } else { - bytes=cnv->sharedData->mbcs.fromUnicodeBytes; - } - asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; - - /* get the converter state from UConverter */ - c=cnv->fromUChar32; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= c==0 ? 0 : -1; - nextSourceIndex=0; - - /* conversion loop */ - if(c!=0 && targetCapacity>0) { - goto getTrail; - } - - while(source0) { - /* - * Get a correct Unicode code point: - * a single UChar for a BMP code point or - * a matched surrogate pair for a "supplementary code point". - */ - c=*source++; - ++nextSourceIndex; - if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - sourceIndex=nextSourceIndex; - } - --targetCapacity; - c=0; - continue; - } - /* - * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX - * to avoid dealing with surrogates. - * MBCS_FAST_MAX must be >=0xd7ff. - */ - if(c<=0xd7ff) { - value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c); - /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ - if(value==0) { - goto unassigned; - } - /* output the value */ - } else { - /* - * This also tests if the codepage maps single surrogates. - * If it does, then surrogates are not paired but mapped separately. - * Note that in this case unmatched surrogates are not detected. - */ - if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { - if(U16_IS_SURROGATE_LEAD(c)) { -getTrail: - if(sourcesource=source; - c=_extFromU(cnv, cnv->sharedData, - c, &source, sourceLimit, - &target, target+targetCapacity, - &offsets, sourceIndex, - pArgs->flush, - pErrorCode); - nextSourceIndex+=(int32_t)(source-pArgs->source); - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - break; - } else { - /* a mapping was written to the target, continue */ - - /* recalculate the targetCapacity after an extension mapping */ - targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); - - /* normal end of conversion: prepare for a new character */ - sourceIndex=nextSourceIndex; - continue; - } - } - } - - /* write the output character bytes from value and length */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(value<=0xff) { - /* this is easy because we know that there is enough space */ - *target++=(uint8_t)value; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - --targetCapacity; - } else /* length==2 */ { - *target++=(uint8_t)(value>>8); - if(2<=targetCapacity) { - *target++=(uint8_t)value; - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex; - } - targetCapacity-=2; - } else { - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - cnv->charErrorBuffer[0]=(char)value; - cnv->charErrorBufferLength=1; - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - c=0; - break; - } - } - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - continue; - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* set the converter state back into UConverter */ - cnv->fromUChar32=c; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */ -static void -ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t *offsets; - - const uint16_t *table; - const uint16_t *results; - - UChar32 c; - - int32_t sourceIndex, nextSourceIndex; - - uint16_t value, minValue; - UBool hasSupplementary; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - table=cnv->sharedData->mbcs.fromUnicodeTable; - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; - } else { - results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; - } - - if(cnv->useFallback) { - /* use all roundtrip and fallback results */ - minValue=0x800; - } else { - /* use only roundtrips and fallbacks from private-use characters */ - minValue=0xc00; - } - hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); - - /* get the converter state from UConverter */ - c=cnv->fromUChar32; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= c==0 ? 0 : -1; - nextSourceIndex=0; - - /* conversion loop */ - if(c!=0 && targetCapacity>0) { - goto getTrail; - } - - while(source0) { - /* - * Get a correct Unicode code point: - * a single UChar for a BMP code point or - * a matched surrogate pair for a "supplementary code point". - */ - c=*source++; - ++nextSourceIndex; - if(U16_IS_SURROGATE(c)) { - if(U16_IS_SURROGATE_LEAD(c)) { -getTrail: - if(source=minValue) { - /* assigned, write the output character bytes from value and length */ - /* length==1 */ - /* this is easy because we know that there is enough space */ - *target++=(uint8_t)value; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - --targetCapacity; - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - } else { /* unassigned */ -unassigned: - /* try an extension mapping */ - pArgs->source=source; - c=_extFromU(cnv, cnv->sharedData, - c, &source, sourceLimit, - &target, target+targetCapacity, - &offsets, sourceIndex, - pArgs->flush, - pErrorCode); - nextSourceIndex+=(int32_t)(source-pArgs->source); - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - break; - } else { - /* a mapping was written to the target, continue */ - - /* recalculate the targetCapacity after an extension mapping */ - targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); - - /* normal end of conversion: prepare for a new character */ - sourceIndex=nextSourceIndex; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* set the converter state back into UConverter */ - cnv->fromUChar32=c; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -/* - * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages - * that map only to and from the BMP. - * In addition to single-byte/state optimizations, the offset calculations - * become much easier. - * It would be possible to use the sbcsIndex for UTF-8-friendly tables, - * but measurements have shown that this diminishes performance - * in more cases than it improves it. - * See SVN revision 21013 (2007-feb-06) for the last version with #if switches - * for various MBCS and SBCS optimizations. - */ -static void -ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit, *lastSource; - uint8_t *target; - int32_t targetCapacity, length; - int32_t *offsets; - - const uint16_t *table; - const uint16_t *results; - - UChar32 c; - - int32_t sourceIndex; - - uint32_t asciiRoundtrips; - uint16_t value, minValue; - - /* set up the local pointers */ - cnv=pArgs->converter; - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - table=cnv->sharedData->mbcs.fromUnicodeTable; - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; - } else { - results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; - } - asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; - - if(cnv->useFallback) { - /* use all roundtrip and fallback results */ - minValue=0x800; - } else { - /* use only roundtrips and fallbacks from private-use characters */ - minValue=0xc00; - } - - /* get the converter state from UConverter */ - c=cnv->fromUChar32; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= c==0 ? 0 : -1; - lastSource=source; - - /* - * since the conversion here is 1:1 UChar:uint8_t, we need only one counter - * for the minimum of the sourceLength and targetCapacity - */ - length=(int32_t)(sourceLimit-source); - if(length0) { - goto getTrail; - } - -#if MBCS_UNROLL_SINGLE_FROM_BMP - /* unrolling makes it slower on Pentium III/Windows 2000?! */ - /* unroll the loop with the most common case */ -unrolled: - if(targetCapacity>=4) { - int32_t count, loops; - uint16_t andedValues; - - loops=count=targetCapacity>>2; - do { - c=*source++; - andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); - *target++=(uint8_t)value; - c=*source++; - andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); - *target++=(uint8_t)value; - c=*source++; - andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); - *target++=(uint8_t)value; - c=*source++; - andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); - *target++=(uint8_t)value; - - /* were all 4 entries really valid? */ - if(andedValues0); - count=loops-count; - targetCapacity-=4*count; - - if(offsets!=NULL) { - lastSource+=4*count; - while(count>0) { - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - *offsets++=sourceIndex++; - --count; - } - } - - c=0; - } -#endif - - while(targetCapacity>0) { - /* - * Get a correct Unicode code point: - * a single UChar for a BMP code point or - * a matched surrogate pair for a "supplementary code point". - */ - c=*source++; - /* - * Do not immediately check for single surrogates: - * Assume that they are unassigned and check for them in that case. - * This speeds up the conversion of assigned characters. - */ - /* convert the Unicode code point in c into codepage bytes */ - if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { - *target++=(uint8_t)c; - --targetCapacity; - c=0; - continue; - } - value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); - /* is this code point assigned, or do we use fallbacks? */ - if(value>=minValue) { - /* assigned, write the output character bytes from value and length */ - /* length==1 */ - /* this is easy because we know that there is enough space */ - *target++=(uint8_t)value; - --targetCapacity; - - /* normal end of conversion: prepare for a new character */ - c=0; - continue; - } else if(!U16_IS_SURROGATE(c)) { - /* normal, unassigned BMP character */ - } else if(U16_IS_SURROGATE_LEAD(c)) { -getTrail: - if(sourceflush) { - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - break; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - - /* c does not have a mapping */ - - /* get the number of code units for c to correctly advance sourceIndex */ - length=U16_LENGTH(c); - - /* set offsets since the start or the last extension */ - if(offsets!=NULL) { - int32_t count=(int32_t)(source-lastSource); - - /* do not set the offset for this character */ - count-=length; - - while(count>0) { - *offsets++=sourceIndex++; - --count; - } - /* offsets and sourceIndex are now set for the current character */ - } - - /* try an extension mapping */ - lastSource=source; - c=_extFromU(cnv, cnv->sharedData, - c, &source, sourceLimit, - &target, (const uint8_t *)(pArgs->targetLimit), - &offsets, sourceIndex, - pArgs->flush, - pErrorCode); - sourceIndex+=length+(int32_t)(source-lastSource); - lastSource=source; - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - break; - } else { - /* a mapping was written to the target, continue */ - - /* recalculate the targetCapacity after an extension mapping */ - targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); - length=(int32_t)(sourceLimit-source); - if(length=(uint8_t *)pArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - - /* set offsets since the start or the last callback */ - if(offsets!=NULL) { - size_t count=source-lastSource; - if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) { - /* - Caller gave us a partial supplementary character, - which this function couldn't convert in any case. - The callback will handle the offset. - */ - count--; - } - while(count>0) { - *offsets++=sourceIndex++; - --count; - } - } - - /* set the converter state back into UConverter */ - cnv->fromUChar32=c; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -U_CFUNC void -ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t *offsets; - - const uint16_t *table; - const uint16_t *mbcsIndex; - const uint8_t *p, *bytes; - uint8_t outputType; - - UChar32 c; - - int32_t prevSourceIndex, sourceIndex, nextSourceIndex; - - uint32_t stage2Entry; - uint32_t asciiRoundtrips; - uint32_t value; - /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */ - uint8_t siBytes[2] = {0, 0}; - uint8_t soBytes[2] = {0, 0}; - uint8_t siLength, soLength; - int32_t length = 0, prevLength; - uint8_t unicodeMask; - - cnv=pArgs->converter; - - if(cnv->preFromUFirstCP>=0) { - /* - * pass sourceIndex=-1 because we continue from an earlier buffer - * in the future, this may change with continuous offsets - */ - ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode); - - if(U_FAILURE(*pErrorCode) || cnv->preFromULength<0) { - return; - } - } - - /* use optimized function if possible */ - outputType=cnv->sharedData->mbcs.outputType; - unicodeMask=cnv->sharedData->mbcs.unicodeMask; - if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) { - if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { - ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode); - } else { - ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode); - } - return; - } else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) { - ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode); - return; - } - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - table=cnv->sharedData->mbcs.fromUnicodeTable; - if(cnv->sharedData->mbcs.utf8Friendly) { - mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; - } else { - mbcsIndex=NULL; - } - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; - } else { - bytes=cnv->sharedData->mbcs.fromUnicodeBytes; - } - asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; - - /* get the converter state from UConverter */ - c=cnv->fromUChar32; - - if(outputType==MBCS_OUTPUT_2_SISO) { - prevLength=cnv->fromUnicodeStatus; - if(prevLength==0) { - /* set the real value */ - prevLength=1; - } - } else { - /* prevent fromUnicodeStatus from being set to something non-0 */ - prevLength=0; - } - - /* sourceIndex=-1 if the current character began in the previous buffer */ - prevSourceIndex=-1; - sourceIndex= c==0 ? 0 : -1; - nextSourceIndex=0; - - /* Get the SI/SO character for the converter */ - siLength = static_cast(getSISOBytes(SI, cnv->options, siBytes)); - soLength = static_cast(getSISOBytes(SO, cnv->options, soBytes)); - - /* conversion loop */ - /* - * This is another piece of ugly code: - * A goto into the loop if the converter state contains a first surrogate - * from the previous function call. - * It saves me to check in each loop iteration a check of if(c==0) - * and duplicating the trail-surrogate-handling code in the else - * branch of that check. - * I could not find any other way to get around this other than - * using a function call for the conversion and callback, which would - * be even more inefficient. - * - * Markus Scherer 2000-jul-19 - */ - if(c!=0 && targetCapacity>0) { - goto getTrail; - } - - while(source0) { - /* - * Get a correct Unicode code point: - * a single UChar for a BMP code point or - * a matched surrogate pair for a "supplementary code point". - */ - c=*source++; - ++nextSourceIndex; - if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - prevSourceIndex=sourceIndex; - sourceIndex=nextSourceIndex; - } - --targetCapacity; - c=0; - continue; - } - /* - * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX - * to avoid dealing with surrogates. - * MBCS_FAST_MAX must be >=0xd7ff. - */ - if(c<=0xd7ff && mbcsIndex!=NULL) { - value=mbcsIndex[c>>6]; - - /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */ - /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ - switch(outputType) { - case MBCS_OUTPUT_2: - value=((const uint16_t *)bytes)[value +(c&0x3f)]; - if(value<=0xff) { - if(value==0) { - goto unassigned; - } else { - length=1; - } - } else { - length=2; - } - break; - case MBCS_OUTPUT_2_SISO: - /* 1/2-byte stateful with Shift-In/Shift-Out */ - /* - * Save the old state in the converter object - * right here, then change the local prevLength state variable if necessary. - * Then, if this character turns out to be unassigned or a fallback that - * is not taken, the callback code must not save the new state in the converter - * because the new state is for a character that is not output. - * However, the callback must still restore the state from the converter - * in case the callback function changed it for its output. - */ - cnv->fromUnicodeStatus=prevLength; /* save the old state */ - value=((const uint16_t *)bytes)[value +(c&0x3f)]; - if(value<=0xff) { - if(value==0) { - goto unassigned; - } else if(prevLength<=1) { - length=1; - } else { - /* change from double-byte mode to single-byte */ - if (siLength == 1) { - value|=(uint32_t)siBytes[0]<<8; - length = 2; - } else if (siLength == 2) { - value|=(uint32_t)siBytes[1]<<8; - value|=(uint32_t)siBytes[0]<<16; - length = 3; - } - prevLength=1; - } - } else { - if(prevLength==2) { - length=2; - } else { - /* change from single-byte mode to double-byte */ - if (soLength == 1) { - value|=(uint32_t)soBytes[0]<<16; - length = 3; - } else if (soLength == 2) { - value|=(uint32_t)soBytes[1]<<16; - value|=(uint32_t)soBytes[0]<<24; - length = 4; - } - prevLength=2; - } - } - break; - case MBCS_OUTPUT_DBCS_ONLY: - /* table with single-byte results, but only DBCS mappings used */ - value=((const uint16_t *)bytes)[value +(c&0x3f)]; - if(value<=0xff) { - /* no mapping or SBCS result, not taken for DBCS-only */ - goto unassigned; - } else { - length=2; - } - break; - case MBCS_OUTPUT_3: - p=bytes+(value+(c&0x3f))*3; - value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; - if(value<=0xff) { - if(value==0) { - goto unassigned; - } else { - length=1; - } - } else if(value<=0xffff) { - length=2; - } else { - length=3; - } - break; - case MBCS_OUTPUT_4: - value=((const uint32_t *)bytes)[value +(c&0x3f)]; - if(value<=0xff) { - if(value==0) { - goto unassigned; - } else { - length=1; - } - } else if(value<=0xffff) { - length=2; - } else if(value<=0xffffff) { - length=3; - } else { - length=4; - } - break; - case MBCS_OUTPUT_3_EUC: - value=((const uint16_t *)bytes)[value +(c&0x3f)]; - /* EUC 16-bit fixed-length representation */ - if(value<=0xff) { - if(value==0) { - goto unassigned; - } else { - length=1; - } - } else if((value&0x8000)==0) { - value|=0x8e8000; - length=3; - } else if((value&0x80)==0) { - value|=0x8f0080; - length=3; - } else { - length=2; - } - break; - case MBCS_OUTPUT_4_EUC: - p=bytes+(value+(c&0x3f))*3; - value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; - /* EUC 16-bit fixed-length representation applied to the first two bytes */ - if(value<=0xff) { - if(value==0) { - goto unassigned; - } else { - length=1; - } - } else if(value<=0xffff) { - length=2; - } else if((value&0x800000)==0) { - value|=0x8e800000; - length=4; - } else if((value&0x8000)==0) { - value|=0x8f008000; - length=4; - } else { - length=3; - } - break; - default: - /* must not occur */ - /* - * To avoid compiler warnings that value & length may be - * used without having been initialized, we set them here. - * In reality, this is unreachable code. - * Not having a default branch also causes warnings with - * some compilers. - */ - value=0; - length=0; - break; - } - /* output the value */ - } else { - /* - * This also tests if the codepage maps single surrogates. - * If it does, then surrogates are not paired but mapped separately. - * Note that in this case unmatched surrogates are not detected. - */ - if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { - if(U16_IS_SURROGATE_LEAD(c)) { -getTrail: - if(sourcefromUnicodeStatus=prevLength; /* save the old state */ - /* callback(unassigned) */ - goto unassigned; - } - /* convert this supplementary code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } else { - /* no more input */ - break; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - break; - } - } - - /* convert the Unicode code point in c into codepage bytes */ - - /* - * The basic lookup is a triple-stage compact array (trie) lookup. - * For details see the beginning of this file. - * - * Single-byte codepages are handled with a different data structure - * by _MBCSSingle... functions. - * - * The result consists of a 32-bit value from stage 2 and - * a pointer to as many bytes as are stored per character. - * The pointer points to the character's bytes in stage 3. - * Bits 15..0 of the stage 2 entry contain the stage 3 index - * for that pointer, while bits 31..16 are flags for which of - * the 16 characters in the block are roundtrip-assigned. - * - * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t - * respectively as uint32_t, in the platform encoding. - * For 3-byte codepages, the bytes are always stored in big-endian order. - * - * For EUC encodings that use only either 0x8e or 0x8f as the first - * byte of their longest byte sequences, the first two bytes in - * this third stage indicate with their 7th bits whether these bytes - * are to be written directly or actually need to be preceeded by - * one of the two Single-Shift codes. With this, the third stage - * stores one byte fewer per character than the actual maximum length of - * EUC byte sequences. - * - * Other than that, leading zero bytes are removed and the other - * bytes output. A single zero byte may be output if the "assigned" - * bit in stage 2 was on. - * The data structure does not support zero byte output as a fallback, - * and also does not allow output of leading zeros. - */ - stage2Entry=MBCS_STAGE_2_FROM_U(table, c); - - /* get the bytes and the length for the output */ - switch(outputType) { - case MBCS_OUTPUT_2: - value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); - if(value<=0xff) { - length=1; - } else { - length=2; - } - break; - case MBCS_OUTPUT_2_SISO: - /* 1/2-byte stateful with Shift-In/Shift-Out */ - /* - * Save the old state in the converter object - * right here, then change the local prevLength state variable if necessary. - * Then, if this character turns out to be unassigned or a fallback that - * is not taken, the callback code must not save the new state in the converter - * because the new state is for a character that is not output. - * However, the callback must still restore the state from the converter - * in case the callback function changed it for its output. - */ - cnv->fromUnicodeStatus=prevLength; /* save the old state */ - value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); - if(value<=0xff) { - if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) { - /* no mapping, leave value==0 */ - length=0; - } else if(prevLength<=1) { - length=1; - } else { - /* change from double-byte mode to single-byte */ - if (siLength == 1) { - value|=(uint32_t)siBytes[0]<<8; - length = 2; - } else if (siLength == 2) { - value|=(uint32_t)siBytes[1]<<8; - value|=(uint32_t)siBytes[0]<<16; - length = 3; - } - prevLength=1; - } - } else { - if(prevLength==2) { - length=2; - } else { - /* change from single-byte mode to double-byte */ - if (soLength == 1) { - value|=(uint32_t)soBytes[0]<<16; - length = 3; - } else if (soLength == 2) { - value|=(uint32_t)soBytes[1]<<16; - value|=(uint32_t)soBytes[0]<<24; - length = 4; - } - prevLength=2; - } - } - break; - case MBCS_OUTPUT_DBCS_ONLY: - /* table with single-byte results, but only DBCS mappings used */ - value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); - if(value<=0xff) { - /* no mapping or SBCS result, not taken for DBCS-only */ - value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ - length=0; - } else { - length=2; - } - break; - case MBCS_OUTPUT_3: - p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); - value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; - if(value<=0xff) { - length=1; - } else if(value<=0xffff) { - length=2; - } else { - length=3; - } - break; - case MBCS_OUTPUT_4: - value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c); - if(value<=0xff) { - length=1; - } else if(value<=0xffff) { - length=2; - } else if(value<=0xffffff) { - length=3; - } else { - length=4; - } - break; - case MBCS_OUTPUT_3_EUC: - value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); - /* EUC 16-bit fixed-length representation */ - if(value<=0xff) { - length=1; - } else if((value&0x8000)==0) { - value|=0x8e8000; - length=3; - } else if((value&0x80)==0) { - value|=0x8f0080; - length=3; - } else { - length=2; - } - break; - case MBCS_OUTPUT_4_EUC: - p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); - value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; - /* EUC 16-bit fixed-length representation applied to the first two bytes */ - if(value<=0xff) { - length=1; - } else if(value<=0xffff) { - length=2; - } else if((value&0x800000)==0) { - value|=0x8e800000; - length=4; - } else if((value&0x8000)==0) { - value|=0x8f008000; - length=4; - } else { - length=3; - } - break; - default: - /* must not occur */ - /* - * To avoid compiler warnings that value & length may be - * used without having been initialized, we set them here. - * In reality, this is unreachable code. - * Not having a default branch also causes warnings with - * some compilers. - */ - value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ - length=0; - break; - } - - /* is this code point assigned, or do we use fallbacks? */ - if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 || - (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) - ) { - /* - * We allow a 0 byte output if the "assigned" bit is set for this entry. - * There is no way with this data structure for fallback output - * to be a zero byte. - */ - -unassigned: - /* try an extension mapping */ - pArgs->source=source; - c=_extFromU(cnv, cnv->sharedData, - c, &source, sourceLimit, - &target, target+targetCapacity, - &offsets, sourceIndex, - pArgs->flush, - pErrorCode); - nextSourceIndex+=(int32_t)(source-pArgs->source); - prevLength=cnv->fromUnicodeStatus; /* restore SISO state */ - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - break; - } else { - /* a mapping was written to the target, continue */ - - /* recalculate the targetCapacity after an extension mapping */ - targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); - - /* normal end of conversion: prepare for a new character */ - if(offsets!=NULL) { - prevSourceIndex=sourceIndex; - sourceIndex=nextSourceIndex; - } - continue; - } - } - } - - /* write the output character bytes from value and length */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(length<=targetCapacity) { - if(offsets==NULL) { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(value>>24); - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(value>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(value>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)value; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - } else { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(value>>24); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(value>>16); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(value>>8); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)value; - *offsets++=sourceIndex; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - } - targetCapacity-=length; - } else { - uint8_t *charErrorBuffer; - - /* - * We actually do this backwards here: - * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the - * regular target. - */ - /* we know that 1<=targetCapacitycharErrorBuffer; - switch(length) { - /* each branch falls through to the next one */ - case 3: - *charErrorBuffer++=(uint8_t)(value>>16); - U_FALLTHROUGH; - case 2: - *charErrorBuffer++=(uint8_t)(value>>8); - U_FALLTHROUGH; - case 1: - *charErrorBuffer=(uint8_t)value; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - cnv->charErrorBufferLength=(int8_t)length; - - /* now output what fits into the regular target */ - value>>=8*length; /* length was reduced by targetCapacity */ - switch(targetCapacity) { - /* each branch falls through to the next one */ - case 3: - *target++=(uint8_t)(value>>16); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(value>>8); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)value; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - c=0; - break; - } - - /* normal end of conversion: prepare for a new character */ - c=0; - if(offsets!=NULL) { - prevSourceIndex=sourceIndex; - sourceIndex=nextSourceIndex; - } - continue; - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* - * the end of the input stream and detection of truncated input - * are handled by the framework, but for EBCDIC_STATEFUL conversion - * we need to emit an SI at the very end - * - * conditions: - * successful - * EBCDIC_STATEFUL in DBCS mode - * end of input and no truncated input - */ - if( U_SUCCESS(*pErrorCode) && - outputType==MBCS_OUTPUT_2_SISO && prevLength==2 && - pArgs->flush && source>=sourceLimit && c==0 - ) { - /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */ - if(targetCapacity>0) { - *target++=(uint8_t)siBytes[0]; - if (siLength == 2) { - if (targetCapacity<2) { - cnv->charErrorBuffer[0]=(uint8_t)siBytes[1]; - cnv->charErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } else { - *target++=(uint8_t)siBytes[1]; - } - } - if(offsets!=NULL) { - /* set the last source character's index (sourceIndex points at sourceLimit now) */ - *offsets++=prevSourceIndex; - } - } else { - /* target is full */ - cnv->charErrorBuffer[0]=(uint8_t)siBytes[0]; - if (siLength == 2) { - cnv->charErrorBuffer[1]=(uint8_t)siBytes[1]; - } - cnv->charErrorBufferLength=siLength; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - prevLength=1; /* we switched into SBCS */ - } - - /* set the converter state back into UConverter */ - cnv->fromUChar32=c; - cnv->fromUnicodeStatus=prevLength; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; -} - -/* - * This is another simple conversion function for internal use by other - * conversion implementations. - * It does not use the converter state nor call callbacks. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * It handles conversion extensions but not GB 18030. - * - * It converts one single Unicode code point into codepage bytes, encoded - * as one 32-bit value. The function returns the number of bytes in *pValue: - * 1..4 the number of bytes in *pValue - * 0 unassigned (*pValue undefined) - * -1 illegal (currently not used, *pValue undefined) - * - * *pValue will contain the resulting bytes with the last byte in bits 7..0, - * the second to last byte in bits 15..8, etc. - * Currently, the function assumes but does not check that 0<=c<=0x10ffff. - */ -U_CFUNC int32_t -ucnv_MBCSFromUChar32(UConverterSharedData *sharedData, - UChar32 c, uint32_t *pValue, - UBool useFallback) { - const int32_t *cx; - const uint16_t *table; -#if 0 -/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ - const uint8_t *p; -#endif - uint32_t stage2Entry; - uint32_t value; - int32_t length; - - /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { - table=sharedData->mbcs.fromUnicodeTable; - - /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ - if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) { - value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); - /* is this code point assigned, or do we use fallbacks? */ - if(useFallback ? value>=0x800 : value>=0xc00) { - *pValue=value&0xff; - return 1; - } - } else /* outputType!=MBCS_OUTPUT_1 */ { - stage2Entry=MBCS_STAGE_2_FROM_U(table, c); - - /* get the bytes and the length for the output */ - switch(sharedData->mbcs.outputType) { - case MBCS_OUTPUT_2: - value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - if(value<=0xff) { - length=1; - } else { - length=2; - } - break; -#if 0 -/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ - case MBCS_OUTPUT_DBCS_ONLY: - /* table with single-byte results, but only DBCS mappings used */ - value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - if(value<=0xff) { - /* no mapping or SBCS result, not taken for DBCS-only */ - value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ - length=0; - } else { - length=2; - } - break; - case MBCS_OUTPUT_3: - p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; - if(value<=0xff) { - length=1; - } else if(value<=0xffff) { - length=2; - } else { - length=3; - } - break; - case MBCS_OUTPUT_4: - value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - if(value<=0xff) { - length=1; - } else if(value<=0xffff) { - length=2; - } else if(value<=0xffffff) { - length=3; - } else { - length=4; - } - break; - case MBCS_OUTPUT_3_EUC: - value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - /* EUC 16-bit fixed-length representation */ - if(value<=0xff) { - length=1; - } else if((value&0x8000)==0) { - value|=0x8e8000; - length=3; - } else if((value&0x80)==0) { - value|=0x8f0080; - length=3; - } else { - length=2; - } - break; - case MBCS_OUTPUT_4_EUC: - p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); - value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; - /* EUC 16-bit fixed-length representation applied to the first two bytes */ - if(value<=0xff) { - length=1; - } else if(value<=0xffff) { - length=2; - } else if((value&0x800000)==0) { - value|=0x8e800000; - length=4; - } else if((value&0x8000)==0) { - value|=0x8f008000; - length=4; - } else { - length=3; - } - break; -#endif - default: - /* must not occur */ - return -1; - } - - /* is this code point assigned, or do we use fallbacks? */ - if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || - (FROM_U_USE_FALLBACK(useFallback, c) && value!=0) - ) { - /* - * We allow a 0 byte output if the "assigned" bit is set for this entry. - * There is no way with this data structure for fallback output - * to be a zero byte. - */ - /* assigned */ - *pValue=value; - return length; - } - } - } - - cx=sharedData->mbcs.extIndexes; - if(cx!=NULL) { - length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback); - return length>=0 ? length : -length; /* return abs(length); */ - } - - /* unassigned */ - return 0; -} - - -#if 0 -/* - * This function has been moved to ucnv2022.c for inlining. - * This implementation is here only for documentation purposes - */ - -/** - * This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * It does not handle conversion extensions (_extFromU()). - * - * It returns the codepage byte for the code point, or -1 if it is unassigned. - */ -U_CFUNC int32_t -ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData, - UChar32 c, - UBool useFallback) { - const uint16_t *table; - int32_t value; - - /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { - return -1; - } - - /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ - table=sharedData->mbcs.fromUnicodeTable; - - /* get the byte for the output */ - value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); - /* is this code point assigned, or do we use fallbacks? */ - if(useFallback ? value>=0x800 : value>=0xc00) { - return value&0xff; - } else { - return -1; - } -} -#endif - -/* MBCS-from-UTF-8 conversion functions ------------------------------------- */ - -/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ -static const UChar32 -utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; - -static void U_CALLCONV -ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - UConverter *utf8, *cnv; - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - - const uint16_t *table, *sbcsIndex; - const uint16_t *results; - - int8_t oldToULength, toULength, toULimit; - - UChar32 c; - uint8_t b, t1, t2; - - uint32_t asciiRoundtrips; - uint16_t value, minValue = 0; - UBool hasSupplementary; - - /* set up the local pointers */ - utf8=pToUArgs->converter; - cnv=pFromUArgs->converter; - source=(uint8_t *)pToUArgs->source; - sourceLimit=(uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - table=cnv->sharedData->mbcs.fromUnicodeTable; - sbcsIndex=cnv->sharedData->mbcs.sbcsIndex; - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; - } else { - results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; - } - asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; - - if(cnv->useFallback) { - /* use all roundtrip and fallback results */ - minValue=0x800; - } else { - /* use only roundtrips and fallbacks from private-use characters */ - minValue=0xc00; - } - hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); - - /* get the converter state from the UTF-8 UConverter */ - if(utf8->toULength > 0) { - toULength=oldToULength=utf8->toULength; - toULimit=(int8_t)utf8->mode; - c=(UChar32)utf8->toUnicodeStatus; - } else { - toULength=oldToULength=toULimit=0; - c = 0; - } - - // The conversion loop checks source0) { - uint8_t b1=*(sourceLimit-1); - if(U8_IS_SINGLE(b1)) { - // common ASCII character - } else if(U8_IS_TRAIL(b1) && length>=2) { - uint8_t b2=*(sourceLimit-2); - if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - // truncated 3-byte sequence - sourceLimit-=2; - } - } else if(0xc2<=b1 && b1<0xf0) { - // truncated 2- or 3-byte sequence - --sourceLimit; - } - } - } - - if(c!=0 && targetCapacity>0) { - utf8->toUnicodeStatus=0; - utf8->toULength=0; - goto moreBytes; - /* - * Note: We could avoid the goto by duplicating some of the moreBytes - * code, but only up to the point of collecting a complete UTF-8 - * sequence; then recurse for the toUBytes[toULength] - * and then continue with normal conversion. - * - * If so, move this code to just after initializing the minimum - * set of local variables for reading the UTF-8 input - * (utf8, source, target, limits but not cnv, table, minValue, etc.). - * - * Potential advantages: - * - avoid the goto - * - oldToULength could become a local variable in just those code blocks - * that deal with buffer boundaries - * - possibly faster if the goto prevents some compiler optimizations - * (this would need measuring to confirm) - * Disadvantage: - * - code duplication - */ - } - - /* conversion loop */ - while(source0) { - b=*source++; - if(U8_IS_SINGLE(b)) { - /* convert ASCII */ - if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { - *target++=(uint8_t)b; - --targetCapacity; - continue; - } else { - c=b; - value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c); - } - } else { - if(b<0xe0) { - if( /* handle U+0080..U+07FF inline */ - b>=0xc2 && - (t1=(uint8_t)(*source-0x80)) <= 0x3f - ) { - c=b&0x1f; - ++source; - value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1); - if(value>=minValue) { - *target++=(uint8_t)value; - --targetCapacity; - continue; - } else { - c=(c<<6)|t1; - } - } else { - c=-1; - } - } else if(b==0xe0) { - if( /* handle U+0800..U+0FFF inline */ - (t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 && - (t2=(uint8_t)(source[1]-0x80)) <= 0x3f - ) { - c=t1; - source+=2; - value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2); - if(value>=minValue) { - *target++=(uint8_t)value; - --targetCapacity; - continue; - } else { - c=(c<<6)|t2; - } - } else { - c=-1; - } - } else { - c=-1; - } - - if(c<0) { - /* handle "complicated" and error cases, and continuing partial characters */ - oldToULength=0; - toULength=1; - toULimit=U8_COUNT_BYTES_NON_ASCII(b); - c=b; -moreBytes: - while(toULengthsourceLimit) { - b=*source; - if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) { - ++source; - ++toULength; - c=(c<<6)+b; - } else { - break; /* sequence too short, stop with toULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - return; - } - } - - if(toULength==toULimit) { - c-=utf8_offsets[toULength]; - if(toULength<=3) { /* BMP */ - value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); - } else { - /* supplementary code point */ - if(!hasSupplementary) { - /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - value=0; - } else { - value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); - } - } - } else { - /* error handling: illegal UTF-8 byte sequence */ - source-=(toULength-oldToULength); - while(oldToULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toULength=toULength; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return; - } - } - } - - if(value>=minValue) { - /* output the mapping for c */ - *target++=(uint8_t)value; - --targetCapacity; - } else { - /* valueUTF-16->charset conversion. - */ - static const UChar nul=0; - const UChar *noSource=&nul; - c=_extFromU(cnv, cnv->sharedData, - c, &noSource, noSource, - &target, target+targetCapacity, - NULL, -1, - pFromUArgs->flush, - pErrorCode); - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - cnv->fromUChar32=c; - break; - } else if(cnv->preFromUFirstCP>=0) { - /* - * Partial match, return and revert to pivoting. - * In normal from-UTF-16 conversion, we would just continue - * but then exit the loop because the extension match would - * have consumed the source. - */ - *pErrorCode=U_USING_DEFAULT_WARNING; - break; - } else { - /* a mapping was written to the target, continue */ - - /* recalculate the targetCapacity after an extension mapping */ - targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* - * The sourceLimit may have been adjusted before the conversion loop - * to stop before a truncated sequence. - * If so, then collect the truncated sequence now. - */ - if(U_SUCCESS(*pErrorCode) && - cnv->preFromUFirstCP<0 && - source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { - c=utf8->toUBytes[0]=b=*source++; - toULength=1; - toULimit=U8_COUNT_BYTES(b); - while(sourcetoUBytes[toULength++]=b=*source++; - c=(c<<6)+b; - } - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - } - - /* write back the updated pointers */ - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; -} - -static void U_CALLCONV -ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, - UConverterToUnicodeArgs *pToUArgs, - UErrorCode *pErrorCode) { - UConverter *utf8, *cnv; - const uint8_t *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - - const uint16_t *table, *mbcsIndex; - const uint16_t *results; - - int8_t oldToULength, toULength, toULimit; - - UChar32 c; - uint8_t b, t1, t2; - - uint32_t stage2Entry; - uint32_t asciiRoundtrips; - uint16_t value = 0; - UBool hasSupplementary; - - /* set up the local pointers */ - utf8=pToUArgs->converter; - cnv=pFromUArgs->converter; - source=(uint8_t *)pToUArgs->source; - sourceLimit=(uint8_t *)pToUArgs->sourceLimit; - target=(uint8_t *)pFromUArgs->target; - targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); - - table=cnv->sharedData->mbcs.fromUnicodeTable; - mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; - if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { - results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; - } else { - results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; - } - asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; - - hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); - - /* get the converter state from the UTF-8 UConverter */ - if(utf8->toULength > 0) { - toULength=oldToULength=utf8->toULength; - toULimit=(int8_t)utf8->mode; - c=(UChar32)utf8->toUnicodeStatus; - } else { - toULength=oldToULength=toULimit=0; - c = 0; - } - - // The conversion loop checks source0) { - uint8_t b1=*(sourceLimit-1); - if(U8_IS_SINGLE(b1)) { - // common ASCII character - } else if(U8_IS_TRAIL(b1) && length>=2) { - uint8_t b2=*(sourceLimit-2); - if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - // truncated 3-byte sequence - sourceLimit-=2; - } - } else if(0xc2<=b1 && b1<0xf0) { - // truncated 2- or 3-byte sequence - --sourceLimit; - } - } - } - - if(c!=0 && targetCapacity>0) { - utf8->toUnicodeStatus=0; - utf8->toULength=0; - goto moreBytes; - /* See note in ucnv_SBCSFromUTF8() about this goto. */ - } - - /* conversion loop */ - while(source0) { - b=*source++; - if(U8_IS_SINGLE(b)) { - /* convert ASCII */ - if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { - *target++=b; - --targetCapacity; - continue; - } else { - value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b); - if(value==0) { - c=b; - goto unassigned; - } - } - } else { - if(b>=0xe0) { - if( /* handle U+0800..U+D7FF inline */ - b<=0xed && // do not assume maxFastUChar>0xd7ff - U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) && - (t2=(uint8_t)(source[1]-0x80)) <= 0x3f - ) { - c=((b&0xf)<<6)|(t1&0x3f); - source+=2; - value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2); - if(value==0) { - c=(c<<6)|t2; - goto unassigned; - } - } else { - c=-1; - } - } else { - if( /* handle U+0080..U+07FF inline */ - b>=0xc2 && - (t1=(uint8_t)(*source-0x80)) <= 0x3f - ) { - c=b&0x1f; - ++source; - value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1); - if(value==0) { - c=(c<<6)|t1; - goto unassigned; - } - } else { - c=-1; - } - } - - if(c<0) { - /* handle "complicated" and error cases, and continuing partial characters */ - oldToULength=0; - toULength=1; - toULimit=U8_COUNT_BYTES_NON_ASCII(b); - c=b; -moreBytes: - while(toULengthsourceLimit) { - b=*source; - if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) { - ++source; - ++toULength; - c=(c<<6)+b; - } else { - break; /* sequence too short, stop with toULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - return; - } - } - - if(toULength==toULimit) { - c-=utf8_offsets[toULength]; - if(toULength<=3) { /* BMP */ - stage2Entry=MBCS_STAGE_2_FROM_U(table, c); - } else { - /* supplementary code point */ - if(!hasSupplementary) { - /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ - stage2Entry=0; - } else { - stage2Entry=MBCS_STAGE_2_FROM_U(table, c); - } - } - } else { - /* error handling: illegal UTF-8 byte sequence */ - source-=(toULength-oldToULength); - while(oldToULengthtoUBytes[oldToULength++]=*source++; - } - utf8->toULength=toULength; - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - return; - } - - /* get the bytes and the length for the output */ - /* MBCS_OUTPUT_2 */ - value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c); - - /* is this code point assigned, or do we use fallbacks? */ - if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || - (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) - ) { - goto unassigned; - } - } - } - - /* write the output character bytes from value and length */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(value<=0xff) { - /* this is easy because we know that there is enough space */ - *target++=(uint8_t)value; - --targetCapacity; - } else /* length==2 */ { - *target++=(uint8_t)(value>>8); - if(2<=targetCapacity) { - *target++=(uint8_t)value; - targetCapacity-=2; - } else { - cnv->charErrorBuffer[0]=(char)value; - cnv->charErrorBufferLength=1; - - /* target overflow */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - continue; - -unassigned: - { - /* - * Try an extension mapping. - * Pass in no source because we don't have UTF-16 input. - * If we have a partial match on c, we will return and revert - * to UTF-8->UTF-16->charset conversion. - */ - static const UChar nul=0; - const UChar *noSource=&nul; - c=_extFromU(cnv, cnv->sharedData, - c, &noSource, noSource, - &target, target+targetCapacity, - NULL, -1, - pFromUArgs->flush, - pErrorCode); - - if(U_FAILURE(*pErrorCode)) { - /* not mappable or buffer overflow */ - cnv->fromUChar32=c; - break; - } else if(cnv->preFromUFirstCP>=0) { - /* - * Partial match, return and revert to pivoting. - * In normal from-UTF-16 conversion, we would just continue - * but then exit the loop because the extension match would - * have consumed the source. - */ - *pErrorCode=U_USING_DEFAULT_WARNING; - break; - } else { - /* a mapping was written to the target, continue */ - - /* recalculate the targetCapacity after an extension mapping */ - targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); - continue; - } - } - } else { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - /* - * The sourceLimit may have been adjusted before the conversion loop - * to stop before a truncated sequence. - * If so, then collect the truncated sequence now. - */ - if(U_SUCCESS(*pErrorCode) && - cnv->preFromUFirstCP<0 && - source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { - c=utf8->toUBytes[0]=b=*source++; - toULength=1; - toULimit=U8_COUNT_BYTES(b); - while(sourcetoUBytes[toULength++]=b=*source++; - c=(c<<6)+b; - } - utf8->toUnicodeStatus=c; - utf8->toULength=toULength; - utf8->mode=toULimit; - } - - /* write back the updated pointers */ - pToUArgs->source=(char *)source; - pFromUArgs->target=(char *)target; -} - -/* miscellaneous ------------------------------------------------------------ */ - -static void U_CALLCONV -ucnv_MBCSGetStarters(const UConverter* cnv, - UBool starters[256], - UErrorCode *) { - const int32_t *state0; - int i; - - state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState]; - for(i=0; i<256; ++i) { - /* all bytes that cause a state transition from state 0 are lead bytes */ - starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]); - } -} - -/* - * This is an internal function that allows other converter implementations - * to check whether a byte is a lead byte. - */ -U_CFUNC UBool -ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) { - return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]); -} - -static void U_CALLCONV -ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, - int32_t offsetIndex, - UErrorCode *pErrorCode) { - UConverter *cnv=pArgs->converter; - char *p, *subchar; - char buffer[4]; - int32_t length; - - /* first, select between subChar and subChar1 */ - if( cnv->subChar1!=0 && - (cnv->sharedData->mbcs.extIndexes!=NULL ? - cnv->useSubChar1 : - (cnv->invalidUCharBuffer[0]<=0xff)) - ) { - /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */ - subchar=(char *)&cnv->subChar1; - length=1; - } else { - /* select subChar in all other cases */ - subchar=(char *)cnv->subChars; - length=cnv->subCharLen; - } - - /* reset the selector for the next code point */ - cnv->useSubChar1=FALSE; - - if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) { - p=buffer; - - /* fromUnicodeStatus contains prevLength */ - switch(length) { - case 1: - if(cnv->fromUnicodeStatus==2) { - /* DBCS mode and SBCS sub char: change to SBCS */ - cnv->fromUnicodeStatus=1; - *p++=UCNV_SI; - } - *p++=subchar[0]; - break; - case 2: - if(cnv->fromUnicodeStatus<=1) { - /* SBCS mode and DBCS sub char: change to DBCS */ - cnv->fromUnicodeStatus=2; - *p++=UCNV_SO; - } - *p++=subchar[0]; - *p++=subchar[1]; - break; - default: - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - subchar=buffer; - length=(int32_t)(p-buffer); - } - - ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode); -} - -U_CFUNC UConverterType -ucnv_MBCSGetType(const UConverter* converter) { - /* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */ - if(converter->sharedData->mbcs.countStates==1) { - return (UConverterType)UCNV_SBCS; - } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) { - return (UConverterType)UCNV_EBCDIC_STATEFUL; - } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) { - return (UConverterType)UCNV_DBCS; - } - return (UConverterType)UCNV_MBCS; -} - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ucnvmbcs.h b/deps/node/deps/icu-small/source/common/ucnvmbcs.h deleted file mode 100644 index a750b92e..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvmbcs.h +++ /dev/null @@ -1,605 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucnvmbcs.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000jul07 -* created by: Markus W. Scherer -*/ - -#ifndef __UCNVMBCS_H__ -#define __UCNVMBCS_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "ucnv_cnv.h" -#include "ucnv_ext.h" - -/** - * ICU conversion (.cnv) data file structure, following the usual UDataInfo - * header. - * - * Format version: 6.2 - * - * struct UConverterStaticData -- struct containing the converter name, IBM CCSID, - * min/max bytes per character, etc. - * see ucnv_bld.h - * - * -------------------- - * - * The static data is followed by conversionType-specific data structures. - * At the moment, there are only variations of MBCS converters. They all have - * the same toUnicode structures, while the fromUnicode structures for SBCS - * differ from those for other MBCS-style converters. - * - * _MBCSHeader.version 5 is optional and not backward-compatible - * (as usual for changes in the major version field). - * - * Versions 5.m work like versions 4.m except: - * - The _MBCSHeader has variable length (and is always longer than in version 4). - * See the struct _MBCSHeader further description below. - * - There is a set of flags which indicate further incompatible changes. - * (Reader code must reject the file if it does not recognize them all.) - * - In particular, one of these flags indicates that most of the fromUnicode - * data is missing and must be reconstituted from the toUnicode data - * and from the utf8Friendly mbcsIndex at load time. - * (This only works with a utf8Friendly table.) - * In this case, makeconv may increase maxFastUChar automatically to U+FFFF. - * - * The first of these versions is 5.3, which is like 4.3 except for the differences above. - * - * When possible, makeconv continues to generate version 4.m files. - * - * _MBCSHeader.version 5.4/4.4 supports "good one-way" mappings (|4) - * in the extension tables (fromUTableValues bit 30). See ucnv_ext.h for details. - * - * _MBCSHeader.version 4.3 optionally modifies the fromUnicode data structures - * slightly and optionally adds a table for conversion to MBCS (non-SBCS) - * charsets. - * - * The modifications are to make the data utf8Friendly. Not every 4.3 file - * file contains utf8Friendly data. - * It is utf8Friendly if _MBCSHeader.version[2]!=0. - * In this case, the data structures are utf8Friendly up to the code point - * maxFastUChar=((_MBCSHeader.version[2]<<8)|0xff) - * - * A utf8Friendly file has fromUnicode stage 3 entries for code points up to - * maxFastUChar allocated in blocks of 64 for indexing with the 6 bits from - * a UTF-8 trail byte. ASCII is allocated linearly with 128 contiguous entries. - * - * In addition, a utf8Friendly MBCS file contains an additional - * uint16_t mbcsIndex[(maxFastUChar+1)>>6]; - * which replaces the stage 1 and 2 tables for indexing with bits from the - * UTF-8 lead byte and middle trail byte. Unlike the older MBCS stage 2 table, - * the mbcsIndex does not contain roundtrip flags. Therefore, all fallbacks - * from code points up to maxFastUChar (and roundtrips to 0x00) are moved to - * the extension data structure. This also allows for faster roundtrip - * conversion from UTF-16. - * - * SBCS files do not contain an additional sbcsIndex[] array because the - * proportional size increase would be noticeable, but the runtime - * code builds one for the code point range for which the runtime conversion - * code is optimized. - * - * For SBCS, maxFastUChar should be at least U+0FFF. The initial makeconv - * implementation sets it to U+1FFF. Because the sbcsIndex is not stored in - * the file, a larger maxFastUChar only affects stage 3 block allocation size - * and is free in empty blocks. (Larger blocks with sparse contents cause larger - * files.) U+1FFF includes almost all of the small scripts. - * U+0FFF covers UTF-8 two-byte sequences and three-byte sequences starting with - * 0xe0. This includes most scripts with legacy SBCS charsets. - * The initial runtime implementation using 4.3 files only builds an sbcsIndex - * for code points up to U+0FFF. - * - * For MBCS, maxFastUChar should be at least U+D7FF (=initial value). - * This boundary is convenient because practically all of the commonly used - * characters are below it, and because it is the boundary to surrogate - * code points, above which special handling is necessary anyway. - * (Surrogate pair assembly for UTF-16, validity checking for UTF-8.) - * - * maxFastUChar could be up to U+FFFF to cover the whole BMP, which could be - * useful especially for conversion from UTF-8 when the input can be assumed - * to be valid, because the surrogate range would then not have to be - * checked. - * (With maxFastUChar=0xffff, makeconv would have to check for mbcsIndex value - * overflow because with the all-unassigned block 0 and nearly full mappings - * from the BMP it is theoretically possible that an index into stage 3 - * exceeds 16 bits.) - * - * _MBCSHeader.version 4.2 adds an optional conversion extension data structure. - * If it is present, then an ICU version reading header versions 4.0 or 4.1 - * will be able to use the base table and ignore the extension. - * - * The unicodeMask in the static data is part of the base table data structure. - * Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the - * fromUnicode stage 1 array. - * The static data unicodeMask refers only to the base table's properties if - * a base table is included. - * In an extension-only file, the static data unicodeMask is 0. - * The extension data indexes have a separate field with the unicodeMask flags. - * - * MBCS-style data structure following the static data. - * Offsets are counted in bytes from the beginning of the MBCS header structure. - * Details about usage in comments in ucnvmbcs.c. - * - * struct _MBCSHeader (see the definition in this header file below) - * contains 32-bit fields as follows: - * 8 values: - * 0 uint8_t[4] MBCS version in UVersionInfo format (currently 4.3.x.0) - * 1 uint32_t countStates - * 2 uint32_t countToUFallbacks - * 3 uint32_t offsetToUCodeUnits - * 4 uint32_t offsetFromUTable - * 5 uint32_t offsetFromUBytes - * 6 uint32_t flags, bits: - * 31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher - * 0 for older versions and if - * there is not extension structure - * 7.. 0 outputType - * 7 uint32_t fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher - * counts bytes in fromUBytes[] - * - * New and required in version 5: - * 8 uint32_t options, bits: - * 31..16 reserved for flags that can be added without breaking - * backward compatibility - * 15.. 6 reserved for flags whose addition will break - * backward compatibility - * 6 MBCS_OPT_FROM_U -- if set, - * then most of the fromUnicode data is omitted; - * fullStage2Length is present and the missing - * bottom part of stage 2 must be reconstituted from - * the toUnicode data; - * stage 3 is missing completely as well; - * not used for SBCS tables - * 5.. 0 length of the _MBCSHeader (number of uint32_t) - * - * New and optional in version 5: - * 9 uint32_t fullStage2Length: used if MBCS_OPT_FROM_U is set - * specifies the full length of stage 2 - * including the omitted part - * - * if(outputType==MBCS_OUTPUT_EXT_ONLY) { - * -- base table name for extension-only table - * char baseTableName[variable]; -- with NUL plus padding for 4-alignment - * - * -- all _MBCSHeader fields except for version and flags are 0 - * } else { - * -- normal base table with optional extension - * - * int32_t stateTable[countStates][256]; - * - * struct _MBCSToUFallback { (fallbacks are sorted by offset) - * uint32_t offset; - * UChar32 codePoint; - * } toUFallbacks[countToUFallbacks]; - * - * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2]; - * (padded to an even number of units) - * - * -- stage 1 tables - * if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) { - * -- stage 1 table for all of Unicode - * uint16_t fromUTable[0x440]; (32-bit-aligned) - * } else { - * -- BMP-only tables have a smaller stage 1 table - * uint16_t fromUTable[0x40]; (32-bit-aligned) - * } - * - * -- stage 2 tables - * length determined by top of stage 1 and bottom of stage 3 tables - * if(outputType==MBCS_OUTPUT_1) { - * -- SBCS: pure indexes - * uint16_t stage 2 indexes[?]; - * } else { - * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes - * uint32_t stage 2 flags and indexes[?]; - * if(options&MBCS_OPT_NO_FROM_U) { - * stage 2 really has length fullStage2Length - * and the omitted lower part must be reconstituted from - * the toUnicode data - * } - * } - * - * -- stage 3 tables with byte results - * if(outputType==MBCS_OUTPUT_1) { - * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c - * uint16_t fromUBytes[fromUBytesLength/2]; - * } else if(!(options&MBCS_OPT_NO_FROM_U)) { - * -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c - * uint8_t fromUBytes[fromUBytesLength]; or - * uint16_t fromUBytes[fromUBytesLength/2]; or - * uint32_t fromUBytes[fromUBytesLength/4]; - * } else { - * fromUBytes[] must be reconstituted from the toUnicode data - * } - * - * -- optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher - * if(outputType!=MBCS_OUTPUT_1 && - * _MBCSHeader.version[1]>=3 && - * (maxFastUChar=_MBCSHeader.version[2])!=0 - * ) { - * maxFastUChar=(maxFastUChar<<8)|0xff; - * uint16_t mbcsIndex[(maxFastUChar+1)>>6]; - * } - * } - * - * -- extension table, details see ucnv_ext.h - * int32_t indexes[>=32]; ... - */ - -/* MBCS converter data and state -------------------------------------------- */ - -enum { - MBCS_MAX_STATE_COUNT=128 -}; - -/** - * MBCS action codes for conversions to Unicode. - * These values are in bits 23..20 of the state table entries. - */ -enum { - MBCS_STATE_VALID_DIRECT_16, - MBCS_STATE_VALID_DIRECT_20, - - MBCS_STATE_FALLBACK_DIRECT_16, - MBCS_STATE_FALLBACK_DIRECT_20, - - MBCS_STATE_VALID_16, - MBCS_STATE_VALID_16_PAIR, - - MBCS_STATE_UNASSIGNED, - MBCS_STATE_ILLEGAL, - - MBCS_STATE_CHANGE_ONLY -}; - -/* Macros for state table entries */ -#define MBCS_ENTRY_TRANSITION(state, offset) (int32_t)(((int32_t)(state)<<24L)|(offset)) -#define MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, offset) (int32_t)(((entry)&0xff000000)|(offset)) -#define MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, offset) (int32_t)((entry)+(offset)) - -#define MBCS_ENTRY_FINAL(state, action, value) (int32_t)(0x80000000|((int32_t)(state)<<24L)|((action)<<20L)|(value)) -#define MBCS_ENTRY_SET_FINAL(entry) (int32_t)((entry)|0x80000000) -#define MBCS_ENTRY_FINAL_SET_ACTION(entry, action) (int32_t)(((entry)&0xff0fffff)|((int32_t)(action)<<20L)) -#define MBCS_ENTRY_FINAL_SET_VALUE(entry, value) (int32_t)(((entry)&0xfff00000)|(value)) -#define MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, action, value) (int32_t)(((entry)&0xff000000)|((int32_t)(action)<<20L)|(value)) - -#define MBCS_ENTRY_SET_STATE(entry, state) (int32_t)(((entry)&0x80ffffff)|((int32_t)(state)<<24L)) - -#define MBCS_ENTRY_STATE(entry) ((((uint32_t)entry)>>24)&0x7f) - -#define MBCS_ENTRY_IS_TRANSITION(entry) ((entry)>=0) -#define MBCS_ENTRY_IS_FINAL(entry) ((entry)<0) - -#define MBCS_ENTRY_TRANSITION_STATE(entry) (((uint32_t)entry)>>24) -#define MBCS_ENTRY_TRANSITION_OFFSET(entry) ((entry)&0xffffff) - -#define MBCS_ENTRY_FINAL_STATE(entry) ((((uint32_t)entry)>>24)&0x7f) -#define MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry) ((entry)<(int32_t)0x80100000) -#define MBCS_ENTRY_FINAL_ACTION(entry) ((((uint32_t)entry)>>20)&0xf) -#define MBCS_ENTRY_FINAL_VALUE(entry) ((entry)&0xfffff) -#define MBCS_ENTRY_FINAL_VALUE_16(entry) (uint16_t)(entry) - -#define IS_ASCII_ROUNDTRIP(b, asciiRoundtrips) (((asciiRoundtrips) & (1<<((b)>>2)))!=0) - -/* single-byte fromUnicode: get the 16-bit result word */ -#define MBCS_SINGLE_RESULT_FROM_U(table, results, c) (results)[ (table)[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)&0xf) ] - -/* single-byte fromUnicode using the sbcsIndex */ -#define SBCS_RESULT_FROM_LOW_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ] - -/* single-byte fromUTF8 using the sbcsIndex; l and t must be masked externally; can be l=0 and t<=0x7f */ -#define SBCS_RESULT_FROM_UTF8(table, results, l, t) (results)[ (table)[l] +(t) ] - -/* multi-byte fromUnicode: get the 32-bit stage 2 entry */ -#define MBCS_STAGE_2_FROM_U(table, c) ((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] -#define MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf)) )) !=0) - -#define MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c) ((uint16_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)] -#define MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c) ((uint32_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)] - -#define MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c) ((bytes)+(16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf))*3) - -/* double-byte fromUnicode using the mbcsIndex */ -#define DBCS_RESULT_FROM_MOST_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ] - -/* double-byte fromUTF8 using the mbcsIndex; l and t1 combined into lt1; lt1 and t2 must be masked externally */ -#define DBCS_RESULT_FROM_UTF8(table, results, lt1, t2) (results)[ (table)[lt1] +(t2) ] - - -/** - * MBCS output types for conversions from Unicode. - * These per-converter types determine the storage method in stage 3 of the lookup table, - * mostly how many bytes are stored per entry. - */ -enum { - MBCS_OUTPUT_1, /* 0 */ - MBCS_OUTPUT_2, /* 1 */ - MBCS_OUTPUT_3, /* 2 */ - MBCS_OUTPUT_4, /* 3 */ - - MBCS_OUTPUT_3_EUC=8, /* 8 */ - MBCS_OUTPUT_4_EUC, /* 9 */ - - MBCS_OUTPUT_2_SISO=12, /* c */ - MBCS_OUTPUT_2_HZ, /* d */ - - MBCS_OUTPUT_EXT_ONLY, /* e */ - - MBCS_OUTPUT_COUNT, - - MBCS_OUTPUT_DBCS_ONLY=0xdb /* runtime-only type for DBCS-only handling of SISO tables */ -}; - -/** - * Fallbacks to Unicode are stored outside the normal state table and code point structures - * in a vector of items of this type. They are sorted by offset. - */ -typedef struct { - uint32_t offset; - UChar32 codePoint; -} _MBCSToUFallback; - -/** Constants for fast and UTF-8-friendly conversion. */ -enum { - SBCS_FAST_MAX=0x0fff, /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */ - SBCS_FAST_LIMIT=SBCS_FAST_MAX+1, /* =0x1000 */ - MBCS_FAST_MAX=0xd7ff, /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */ - MBCS_FAST_LIMIT=MBCS_FAST_MAX+1 /* =0xd800 */ -}; - -/** - * This is the MBCS part of the UConverterTable union (a runtime data structure). - * It keeps all the per-converter data and points into the loaded mapping tables. - * - * utf8Friendly data structures added with _MBCSHeader.version 4.3 - */ -typedef struct UConverterMBCSTable { - /* toUnicode */ - uint8_t countStates, dbcsOnlyState, stateTableOwned; - uint32_t countToUFallbacks; - - const int32_t (*stateTable)/*[countStates]*/[256]; - int32_t (*swapLFNLStateTable)/*[countStates]*/[256]; /* for swaplfnl */ - const uint16_t *unicodeCodeUnits/*[countUnicodeResults]*/; - const _MBCSToUFallback *toUFallbacks; - - /* fromUnicode */ - const uint16_t *fromUnicodeTable; - const uint16_t *mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */ - uint16_t sbcsIndex[SBCS_FAST_LIMIT>>6]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */ - const uint8_t *fromUnicodeBytes; - uint8_t *swapLFNLFromUnicodeBytes; /* for swaplfnl */ - uint32_t fromUBytesLength; - uint8_t outputType, unicodeMask; - UBool utf8Friendly; /* for utf8Friendly data */ - UChar maxFastUChar; /* for utf8Friendly data */ - - /* roundtrips */ - uint32_t asciiRoundtrips; - - /* reconstituted data that was omitted from the .cnv file */ - uint8_t *reconstitutedData; - - /* converter name for swaplfnl */ - char *swapLFNLName; - - /* extension data */ - struct UConverterSharedData *baseSharedData; - const int32_t *extIndexes; -} UConverterMBCSTable; - -#define UCNV_MBCS_TABLE_INITIALIZER { \ - /* toUnicode */ \ - 0, 0, 0, \ - 0, \ - \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - \ - /* fromUnicode */ \ - NULL, \ - NULL, \ - { 0 }, \ - NULL, \ - NULL, \ - 0, \ - 0, 0, \ - FALSE, \ - 0, \ - \ - /* roundtrips */ \ - 0, \ - \ - /* reconstituted data that was omitted from the .cnv file */ \ - NULL, \ - \ - /* converter name for swaplfnl */ \ - NULL, \ - \ - /* extension data */ \ - NULL, \ - NULL \ -} - -enum { - MBCS_OPT_LENGTH_MASK=0x3f, - MBCS_OPT_NO_FROM_U=0x40, - /* - * If any of the following options bits are set, - * then the file must be rejected. - */ - MBCS_OPT_INCOMPATIBLE_MASK=0xffc0, - /* - * Remove bits from this mask as more options are recognized - * by all implementations that use this constant. - */ - MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80 -}; - -enum { - MBCS_HEADER_V4_LENGTH=8, - MBCS_HEADER_V5_MIN_LENGTH=9 -}; - -/** - * MBCS data header. See data format description above. - */ -typedef struct { - UVersionInfo version; - uint32_t countStates, - countToUFallbacks, - offsetToUCodeUnits, - offsetFromUTable, - offsetFromUBytes, - flags, - fromUBytesLength; - - /* new and required in version 5 */ - uint32_t options; - - /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */ - uint32_t fullStage2Length; /* number of 32-bit units */ -} _MBCSHeader; - -#define UCNV_MBCS_HEADER_INITIALIZER { { 0 }, 0, 0, 0, 0, 0, 0, 0, 0, 0 } - -/* - * This is a simple version of _MBCSGetNextUChar() that is used - * by other converter implementations. - * It only returns an "assigned" result if it consumes the entire input. - * It does not use state from the converter, nor error codes. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * It handles conversion extensions but not GB 18030. - * - * Return value: - * U+fffe unassigned - * U+ffff illegal - * otherwise the Unicode code point - */ -U_CFUNC UChar32 -ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData, - const char *source, int32_t length, - UBool useFallback); - -/** - * This version of _MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * It does not handle conversion extensions (_extToU()). - */ -U_CFUNC UChar32 -ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData, - uint8_t b, UBool useFallback); - -/** - * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. - * It works for single-byte, single-state codepages that only map - * to and from BMP code points, and it always - * returns fallback values. - */ -#define _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, b) \ - (UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->mbcs.stateTable[0][(uint8_t)(b)]) - -/** - * This is an internal function that allows other converter implementations - * to check whether a byte is a lead byte. - */ -U_CFUNC UBool -ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte); - -/** This is a macro version of _MBCSIsLeadByte(). */ -#define _MBCS_IS_LEAD_BYTE(sharedData, byte) \ - (UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)]) - -/* - * This is another simple conversion function for internal use by other - * conversion implementations. - * It does not use the converter state nor call callbacks. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * It handles conversion extensions but not GB 18030. - * - * It converts one single Unicode code point into codepage bytes, encoded - * as one 32-bit value. The function returns the number of bytes in *pValue: - * 1..4 the number of bytes in *pValue - * 0 unassigned (*pValue undefined) - * -1 illegal (currently not used, *pValue undefined) - * - * *pValue will contain the resulting bytes with the last byte in bits 7..0, - * the second to last byte in bits 15..8, etc. - * Currently, the function assumes but does not check that 0<=c<=0x10ffff. - */ -U_CFUNC int32_t -ucnv_MBCSFromUChar32(UConverterSharedData *sharedData, - UChar32 c, uint32_t *pValue, - UBool useFallback); - -/** - * This version of _MBCSFromUChar32() is optimized for single-byte codepages. - * It does not handle the EBCDIC swaplfnl option (set in UConverter). - * - * It returns the codepage byte for the code point, or -1 if it is unassigned. - */ -U_CFUNC int32_t -ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData, - UChar32 c, - UBool useFallback); - -/** - * SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but - * we cheat a little about the type, returning the old types if appropriate. - */ -U_CFUNC UConverterType -ucnv_MBCSGetType(const UConverter* converter); - -U_CFUNC void -ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode); -U_CFUNC void -ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode); - -/* - * Internal function returning a UnicodeSet for toUnicode() conversion. - * Currently only used for ISO-2022-CN, and only handles roundtrip mappings. - * In the future, if we add support for fallback sets, this function - * needs to be updated. - * Handles extensions. - * Does not empty the set first. - */ -U_CFUNC void -ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode); - -/* - * Same as ucnv_MBCSGetUnicodeSetForUnicode() but - * the set can be filtered by encoding scheme. - * Used by stateful converters which share regular conversion tables - * but only use a subset of their mappings. - */ -U_CFUNC void -ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, - const USetAdder *sa, - UConverterUnicodeSet which, - UConverterSetFilter filter, - UErrorCode *pErrorCode); - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnvscsu.cpp b/deps/node/deps/icu-small/source/common/ucnvscsu.cpp deleted file mode 100644 index eb7b7ad5..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvscsu.cpp +++ /dev/null @@ -1,2045 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ucnvscsu.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000nov18 -* created by: Markus W. Scherer -* -* This is an implementation of the Standard Compression Scheme for Unicode -* as defined in http://www.unicode.org/unicode/reports/tr6/ . -* Reserved commands and window settings are treated as illegal sequences and -* will result in callback calls. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cmemory.h" - -/* SCSU definitions --------------------------------------------------------- */ - -/* SCSU command byte values */ -enum { - SQ0=0x01, /* Quote from window pair 0 */ - SQ7=0x08, /* Quote from window pair 7 */ - SDX=0x0B, /* Define a window as extended */ - Srs=0x0C, /* reserved */ - SQU=0x0E, /* Quote a single Unicode character */ - SCU=0x0F, /* Change to Unicode mode */ - SC0=0x10, /* Select window 0 */ - SC7=0x17, /* Select window 7 */ - SD0=0x18, /* Define and select window 0 */ - SD7=0x1F, /* Define and select window 7 */ - - UC0=0xE0, /* Select window 0 */ - UC7=0xE7, /* Select window 7 */ - UD0=0xE8, /* Define and select window 0 */ - UD7=0xEF, /* Define and select window 7 */ - UQU=0xF0, /* Quote a single Unicode character */ - UDX=0xF1, /* Define a Window as extended */ - Urs=0xF2 /* reserved */ -}; - -enum { - /* - * Unicode code points from 3400 to E000 are not adressible by - * dynamic window, since in these areas no short run alphabets are - * found. Therefore add gapOffset to all values from gapThreshold. - */ - gapThreshold=0x68, - gapOffset=0xAC00, - - /* values between reservedStart and fixedThreshold are reserved */ - reservedStart=0xA8, - - /* use table of predefined fixed offsets for values from fixedThreshold */ - fixedThreshold=0xF9 -}; - -/* constant offsets for the 8 static windows */ -static const uint32_t staticOffsets[8]={ - 0x0000, /* ASCII for quoted tags */ - 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ - 0x0100, /* Latin Extended-A */ - 0x0300, /* Combining Diacritical Marks */ - 0x2000, /* General Punctuation */ - 0x2080, /* Currency Symbols */ - 0x2100, /* Letterlike Symbols and Number Forms */ - 0x3000 /* CJK Symbols and punctuation */ -}; - -/* initial offsets for the 8 dynamic (sliding) windows */ -static const uint32_t initialDynamicOffsets[8]={ - 0x0080, /* Latin-1 */ - 0x00C0, /* Latin Extended A */ - 0x0400, /* Cyrillic */ - 0x0600, /* Arabic */ - 0x0900, /* Devanagari */ - 0x3040, /* Hiragana */ - 0x30A0, /* Katakana */ - 0xFF00 /* Fullwidth ASCII */ -}; - -/* Table of fixed predefined Offsets */ -static const uint32_t fixedOffsets[]={ - /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ - /* 0xFA */ 0x0250, /* IPA extensions */ - /* 0xFB */ 0x0370, /* Greek */ - /* 0xFC */ 0x0530, /* Armenian */ - /* 0xFD */ 0x3040, /* Hiragana */ - /* 0xFE */ 0x30A0, /* Katakana */ - /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ -}; - -/* state values */ -enum { - readCommand, - quotePairOne, - quotePairTwo, - quoteOne, - definePairOne, - definePairTwo, - defineOne -}; - -typedef struct SCSUData { - /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ - uint32_t toUDynamicOffsets[8]; - uint32_t fromUDynamicOffsets[8]; - - /* state machine state - toUnicode */ - UBool toUIsSingleByteMode; - uint8_t toUState; - int8_t toUQuoteWindow, toUDynamicWindow; - uint8_t toUByteOne; - uint8_t toUPadding[3]; - - /* state machine state - fromUnicode */ - UBool fromUIsSingleByteMode; - int8_t fromUDynamicWindow; - - /* - * windowUse[] keeps track of the use of the dynamic windows: - * At nextWindowUseIndex there is the least recently used window, - * and the following windows (in a wrapping manner) are more and more - * recently used. - * At nextWindowUseIndex-1 there is the most recently used window. - */ - uint8_t locale; - int8_t nextWindowUseIndex; - int8_t windowUse[8]; -} SCSUData; - -static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; -static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; - -enum { - lGeneric, l_ja -}; - -/* SCSU setup functions ----------------------------------------------------- */ -U_CDECL_BEGIN -static void U_CALLCONV -_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { - SCSUData *scsu=(SCSUData *)cnv->extraInfo; - - if(choice<=UCNV_RESET_TO_UNICODE) { - /* reset toUnicode */ - uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); - - scsu->toUIsSingleByteMode=TRUE; - scsu->toUState=readCommand; - scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; - scsu->toUByteOne=0; - - cnv->toULength=0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - /* reset fromUnicode */ - uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); - - scsu->fromUIsSingleByteMode=TRUE; - scsu->fromUDynamicWindow=0; - - scsu->nextWindowUseIndex=0; - switch(scsu->locale) { - case l_ja: - uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); - break; - default: - uprv_memcpy(scsu->windowUse, initialWindowUse, 8); - break; - } - - cnv->fromUChar32=0; - } -} - -static void U_CALLCONV -_SCSUOpen(UConverter *cnv, - UConverterLoadArgs *pArgs, - UErrorCode *pErrorCode) { - const char *locale=pArgs->locale; - if(pArgs->onlyTestIsLoadable) { - return; - } - cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); - if(cnv->extraInfo!=NULL) { - if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { - ((SCSUData *)cnv->extraInfo)->locale=l_ja; - } else { - ((SCSUData *)cnv->extraInfo)->locale=lGeneric; - } - _SCSUReset(cnv, UCNV_RESET_BOTH); - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - - /* Set the substitution character U+fffd as a Unicode string. */ - cnv->subUChars[0]=0xfffd; - cnv->subCharLen=-1; -} - -static void U_CALLCONV -_SCSUClose(UConverter *cnv) { - if(cnv->extraInfo!=NULL) { - if(!cnv->isExtraLocal) { - uprv_free(cnv->extraInfo); - } - cnv->extraInfo=NULL; - } -} - -/* SCSU-to-Unicode conversion functions ------------------------------------- */ - -static void U_CALLCONV -_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - int32_t *offsets; - UBool isSingleByteMode; - uint8_t state, byteOne; - int8_t quoteWindow, dynamicWindow; - - int32_t sourceIndex, nextSourceIndex; - - uint8_t b; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - offsets=pArgs->offsets; - - /* get the state machine state */ - isSingleByteMode=scsu->toUIsSingleByteMode; - state=scsu->toUState; - quoteWindow=scsu->toUQuoteWindow; - dynamicWindow=scsu->toUDynamicWindow; - byteOne=scsu->toUByteOne; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex=state==readCommand ? 0 : -1; - nextSourceIndex=0; - - /* - * conversion "loop" - * - * For performance, this is not a normal C loop. - * Instead, there are two code blocks for the two SCSU modes. - * The function branches to either one, and a change of the mode is done with a goto to - * the other branch. - * - * Each branch has two conventional loops: - * - a fast-path loop for the most common codes in the mode - * - a loop for all other codes in the mode - * When the fast-path runs into a code that it cannot handle, its loop ends and it - * runs into the following loop to handle the other codes. - * The end of the input or output buffer is also handled by the slower loop. - * The slow loop jumps (goto) to the fast-path loop again as soon as possible. - * - * The callback handling is done by returning with an error code. - * The conversion framework actually calls the callback function. - */ - if(isSingleByteMode) { - /* fast path for single-byte mode */ - if(state==readCommand) { -fastSingle: - while(source=0x20) { - ++source; - ++nextSourceIndex; - if(b<=0x7f) { - /* write US-ASCII graphic character or DEL */ - *target++=(UChar)b; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - sourceIndex=nextSourceIndex; - } - } - - /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ -singleByteMode: - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - ++nextSourceIndex; - switch(state) { - case readCommand: - /* redundant conditions are commented out */ - /* here: b<0x20 because otherwise we would be in fastSingle */ - if((1UL<toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - - /* store the first byte of a multibyte sequence in toUBytes[] */ - cnv->toUBytes[0]=b; - cnv->toULength=1; - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - case quoteOne: - if(b<0x80) { - /* all static offsets are in the BMP */ - *target++=(UChar)(staticOffsets[quoteWindow]+b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - case definePairOne: - dynamicWindow=(int8_t)((b>>5)&7); - byteOne=(uint8_t)(b&0x1f); - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=definePairTwo; - break; - case definePairTwo: - scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - case defineOne: - if(b==0) { - /* callback(illegal): Reserved window offset value 0 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; - } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { - scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; - } else if(b>=fixedThreshold) { - scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; - } else { - /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastSingle; - } - } - } else { - /* fast path for Unicode mode */ - if(state==readCommand) { -fastUnicode: - while(source+1(Urs-UC0)) { - *target++=(UChar)((b<<8)|source[1]); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - sourceIndex=nextSourceIndex; - nextSourceIndex+=2; - source+=2; - } - } - - /* normal state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - ++nextSourceIndex; - switch(state) { - case readCommand: - if((uint8_t)(b-UC0)>(Urs-UC0)) { - byteOne=b; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairTwo; - } else if(/* UC0<=b && */ b<=UC7) { - dynamicWindow=(int8_t)(b-UC0); - sourceIndex=nextSourceIndex; - isSingleByteMode=TRUE; - goto fastSingle; - } else if(/* UD0<=b && */ b<=UD7) { - dynamicWindow=(int8_t)(b-UD0); - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=defineOne; - goto singleByteMode; - } else if(b==UDX) { - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=definePairOne; - goto singleByteMode; - } else if(b==UQU) { - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairOne; - } else /* Urs */ { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - sourceIndex=nextSourceIndex; - state=readCommand; - goto fastUnicode; - } - } - } -endloop: - - /* set the converter state back into UConverter */ - if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { - /* reset to deal with the next character */ - state=readCommand; - } else if(state==readCommand) { - /* not in a multi-byte sequence, reset toULength */ - cnv->toULength=0; - } - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=state; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=byteOne; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - return; -} - -/* - * Identical to _SCSUToUnicodeWithOffsets but without offset handling. - * If a change is made in the original function, then either - * change this function the same way or - * re-copy the original function and remove the variables - * offsets, sourceIndex, and nextSourceIndex. - */ -static void U_CALLCONV -_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const uint8_t *source, *sourceLimit; - UChar *target; - const UChar *targetLimit; - UBool isSingleByteMode; - uint8_t state, byteOne; - int8_t quoteWindow, dynamicWindow; - - uint8_t b; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - source=(const uint8_t *)pArgs->source; - sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetLimit=pArgs->targetLimit; - - /* get the state machine state */ - isSingleByteMode=scsu->toUIsSingleByteMode; - state=scsu->toUState; - quoteWindow=scsu->toUQuoteWindow; - dynamicWindow=scsu->toUDynamicWindow; - byteOne=scsu->toUByteOne; - - /* - * conversion "loop" - * - * For performance, this is not a normal C loop. - * Instead, there are two code blocks for the two SCSU modes. - * The function branches to either one, and a change of the mode is done with a goto to - * the other branch. - * - * Each branch has two conventional loops: - * - a fast-path loop for the most common codes in the mode - * - a loop for all other codes in the mode - * When the fast-path runs into a code that it cannot handle, its loop ends and it - * runs into the following loop to handle the other codes. - * The end of the input or output buffer is also handled by the slower loop. - * The slow loop jumps (goto) to the fast-path loop again as soon as possible. - * - * The callback handling is done by returning with an error code. - * The conversion framework actually calls the callback function. - */ - if(isSingleByteMode) { - /* fast path for single-byte mode */ - if(state==readCommand) { -fastSingle: - while(source=0x20) { - ++source; - if(b<=0x7f) { - /* write US-ASCII graphic character or DEL */ - *target++=(UChar)b; - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - } - } - - /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ -singleByteMode: - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - switch(state) { - case readCommand: - /* redundant conditions are commented out */ - /* here: b<0x20 because otherwise we would be in fastSingle */ - if((1UL<toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - - /* store the first byte of a multibyte sequence in toUBytes[] */ - cnv->toUBytes[0]=b; - cnv->toULength=1; - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - state=readCommand; - goto fastSingle; - case quoteOne: - if(b<0x80) { - /* all static offsets are in the BMP */ - *target++=(UChar)(staticOffsets[quoteWindow]+b); - } else { - /* write from dynamic window */ - uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); - if(c<=0xffff) { - *target++=(UChar)c; - } else { - /* output surrogate pair */ - *target++=(UChar)(0xd7c0+(c>>10)); - if(targetUCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); - cnv->UCharErrorBufferLength=1; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } - } - } - state=readCommand; - goto fastSingle; - case definePairOne: - dynamicWindow=(int8_t)((b>>5)&7); - byteOne=(uint8_t)(b&0x1f); - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=definePairTwo; - break; - case definePairTwo: - scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); - state=readCommand; - goto fastSingle; - case defineOne: - if(b==0) { - /* callback(illegal): Reserved window offset value 0 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; - } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { - scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; - } else if(b>=fixedThreshold) { - scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; - } else { - /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ - cnv->toUBytes[1]=b; - cnv->toULength=2; - goto endloop; - } - state=readCommand; - goto fastSingle; - } - } - } else { - /* fast path for Unicode mode */ - if(state==readCommand) { -fastUnicode: - while(source+1(Urs-UC0)) { - *target++=(UChar)((b<<8)|source[1]); - source+=2; - } - } - - /* normal state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - b=*source++; - switch(state) { - case readCommand: - if((uint8_t)(b-UC0)>(Urs-UC0)) { - byteOne=b; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairTwo; - } else if(/* UC0<=b && */ b<=UC7) { - dynamicWindow=(int8_t)(b-UC0); - isSingleByteMode=TRUE; - goto fastSingle; - } else if(/* UD0<=b && */ b<=UD7) { - dynamicWindow=(int8_t)(b-UD0); - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=defineOne; - goto singleByteMode; - } else if(b==UDX) { - isSingleByteMode=TRUE; - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=definePairOne; - goto singleByteMode; - } else if(b==UQU) { - cnv->toUBytes[0]=b; - cnv->toULength=1; - state=quotePairOne; - } else /* Urs */ { - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->toUBytes[0]=b; - cnv->toULength=1; - goto endloop; - } - break; - case quotePairOne: - byteOne=b; - cnv->toUBytes[1]=b; - cnv->toULength=2; - state=quotePairTwo; - break; - case quotePairTwo: - *target++=(UChar)((byteOne<<8)|b); - state=readCommand; - goto fastUnicode; - } - } - } -endloop: - - /* set the converter state back into UConverter */ - if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { - /* reset to deal with the next character */ - state=readCommand; - } else if(state==readCommand) { - /* not in a multi-byte sequence, reset toULength */ - cnv->toULength=0; - } - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=state; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=byteOne; - - /* write back the updated pointers */ - pArgs->source=(const char *)source; - pArgs->target=target; - return; -} -U_CDECL_END -/* SCSU-from-Unicode conversion functions ----------------------------------- */ - -/* - * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve - * reasonable results. The lookahead is minimal. - * Many cases are simple: - * A character fits directly into the current mode, a dynamic or static window, - * or is not compressible. These cases are tested first. - * Real compression heuristics are applied to the rest, in code branches for - * single/Unicode mode and BMP/supplementary code points. - * The heuristics used here are extremely simple. - */ - -/* get the number of the window that this character is in, or -1 */ -static int8_t -getWindow(const uint32_t offsets[8], uint32_t c) { - int i; - for(i=0; i<8; ++i) { - if((uint32_t)(c-offsets[i])<=0x7f) { - return (int8_t)(i); - } - } - return -1; -} - -/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ -static UBool -isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { - return (UBool)(c<=offset+0x7f && - (c>=offset || (c<=0x7f && - (c>=0x20 || (1UL<windowUse[scsu->nextWindowUseIndex]; - if(++scsu->nextWindowUseIndex==8) { - scsu->nextWindowUseIndex=0; - } - return window; -} - -/* - * useDynamicWindow() adjusts - * windowUse[] and nextWindowUseIndex for the algorithm to choose - * the next dynamic window to be defined; - * a subclass may override it and provide its own algorithm. - */ -static void -useDynamicWindow(SCSUData *scsu, int8_t window) { - /* - * move the existing window, which just became the most recently used one, - * up in windowUse[] to nextWindowUseIndex-1 - */ - - /* first, find the index of the window - backwards to favor the more recently used windows */ - int i, j; - - i=scsu->nextWindowUseIndex; - do { - if(--i<0) { - i=7; - } - } while(scsu->windowUse[i]!=window); - - /* now copy each windowUse[i+1] to [i] */ - j=i+1; - if(j==8) { - j=0; - } - while(j!=scsu->nextWindowUseIndex) { - scsu->windowUse[i]=scsu->windowUse[j]; - i=j; - if(++j==8) { j=0; } - } - - /* finally, set the window into the most recently used index */ - scsu->windowUse[i]=window; -} - -/* - * calculate the offset and the code for a dynamic window that contains the character - * takes fixed offsets into account - * the offset of the window is stored in the offset variable, - * the code is returned - * - * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code - */ -static int -getDynamicOffset(uint32_t c, uint32_t *pOffset) { - int i; - - for(i=0; i<7; ++i) { - if((uint32_t)(c-fixedOffsets[i])<=0x7f) { - *pOffset=fixedOffsets[i]; - return 0xf9+i; - } - } - - if(c<0x80) { - /* No dynamic window for US-ASCII. */ - return -1; - } else if(c<0x3400 || - (uint32_t)(c-0x10000)<(0x14000-0x10000) || - (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) - ) { - /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ - *pOffset=c&0x7fffff80; - return (int)(c>>7); - } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { - /* For these characters we need to take the gapOffset into account. */ - *pOffset=c&0x7fffff80; - return (int)((c-gapOffset)>>7); - } else { - return -1; - } -} -U_CDECL_BEGIN -/* - * Idea for compression: - * - save SCSUData and other state before really starting work - * - at endloop, see if compression could be better with just unicode mode - * - don't do this if a callback has been called - * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning - * - different buffer handling! - * - * Drawback or need for corrective handling: - * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and - * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible - * not only for compression but also for HTML/XML documents with following charset/encoding announcers. - * - * How to achieve both? - * - Only replace the result after an SDX or SCU? - */ - -static void U_CALLCONV -_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - int32_t *offsets; - - UBool isSingleByteMode; - uint8_t dynamicWindow; - uint32_t currentOffset; - - uint32_t c, delta; - - int32_t sourceIndex, nextSourceIndex; - - int32_t length; - - /* variables for compression heuristics */ - uint32_t offset; - UChar lead, trail; - int code; - int8_t window; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - offsets=pArgs->offsets; - - /* get the state machine state */ - isSingleByteMode=scsu->fromUIsSingleByteMode; - dynamicWindow=scsu->fromUDynamicWindow; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - - c=cnv->fromUChar32; - - /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= c==0 ? 0 : -1; - nextSourceIndex=0; - - /* similar conversion "loop" as in toUnicode */ -loop: - if(isSingleByteMode) { - if(c!=0 && targetCapacity>0) { - goto getTrailSingle; - } - - /* state machine for single-byte mode */ -/* singleByteMode: */ - while(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* might check if there are more characters in this window to come */ - /* define an extended window with this character */ - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* change to Unicode mode and output this (lead, trail) pair */ - isSingleByteMode=FALSE; - *target++=(uint8_t)SCU; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - --targetCapacity; - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else if(c<0xa0) { - /* quote C1 control character */ - c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ - length=2; - goto outputBytes; - } else if(c==0xfeff || c>=0xfff0) { - /* quote signature character=byte order mark and specials */ - c|=SQU<<16; - length=3; - goto outputBytes; - } else { - /* compress all other BMP characters */ - if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { - /* there is a window defined that contains this character - switch to it or quote from it? */ - if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { - /* change to dynamic window */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else { - /* quote from dynamic window */ - c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; - length=2; - goto outputBytes; - } - } else if((window=getWindow(staticOffsets, c))>=0) { - /* quote from static window */ - c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && - (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) - ) { - /* - * this character is not compressible (a BMP ideograph or similar); - * switch to Unicode mode if this is the last character in the block - * or there is at least one more ideograph following immediately - */ - isSingleByteMode=FALSE; - c|=SCU<<16; - length=3; - goto outputBytes; - } else { - /* quote Unicode */ - c|=SQU<<16; - length=3; - goto outputBytes; - } - } - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - } - } else { - if(c!=0 && targetCapacity>0) { - goto getTrailUnicode; - } - - /* state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=2) { - *target++=(uint8_t)(c>>8); - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - *offsets++=sourceIndex; - } - targetCapacity-=2; - } else { - length=2; - goto outputBytes; - } - } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { - /* compress BMP character if the following one is not an uncompressible ideograph */ - if(!(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - isSingleByteMode=TRUE; - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - isSingleByteMode=TRUE; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } - } - - /* don't know how to compress this character, just write it directly */ - length=2; - goto outputBytes; - } else if(c<0xe000) { - /* c is a surrogate */ - if(U16_IS_SURROGATE_LEAD(c)) { -getTrailUnicode: - lead=(UChar)c; - if(sourcefromUDynamicOffsets, c))>=0 && - !(sourcefromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if(source=0 - ) { - /* two supplementary characters in (probably) the same window - define an extended one */ - isSingleByteMode=TRUE; - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* don't know how to compress this character, just write it directly */ - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else /* 0xe000<=c<0xf300 */ { - /* quote to avoid SCSU tags */ - c|=UQU<<16; - length=3; - goto outputBytes; - } - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - } - } -endloop: - - /* set the converter state back into UConverter */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; - - cnv->fromUChar32=c; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - return; - -outputBytes: - /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(length<=targetCapacity) { - if(offsets==NULL) { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - } else { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(c>>24); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(c>>16); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - *offsets++=sourceIndex; - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - *offsets++=sourceIndex; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - } - targetCapacity-=length; - - /* normal end of conversion: prepare for a new character */ - c=0; - sourceIndex=nextSourceIndex; - goto loop; - } else { - uint8_t *p; - - /* - * We actually do this backwards here: - * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the - * regular target. - */ - /* we know that 0<=targetCapacitycharErrorBuffer; - switch(length) { - /* each branch falls through to the next one */ - case 4: - *p++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *p++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *p++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *p=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - cnv->charErrorBufferLength=(int8_t)length; - - /* now output what fits into the regular target */ - c>>=8*length; /* length was reduced by targetCapacity */ - switch(targetCapacity) { - /* each branch falls through to the next one */ - case 3: - *target++=(uint8_t)(c>>16); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - if(offsets!=NULL) { - *offsets++=sourceIndex; - } - U_FALLTHROUGH; - default: - break; - } - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - c=0; - goto endloop; - } -} - -/* - * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. - * If a change is made in the original function, then either - * change this function the same way or - * re-copy the original function and remove the variables - * offsets, sourceIndex, and nextSourceIndex. - */ -static void U_CALLCONV -_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - UConverter *cnv; - SCSUData *scsu; - const UChar *source, *sourceLimit; - uint8_t *target; - int32_t targetCapacity; - - UBool isSingleByteMode; - uint8_t dynamicWindow; - uint32_t currentOffset; - - uint32_t c, delta; - - int32_t length; - - /* variables for compression heuristics */ - uint32_t offset; - UChar lead, trail; - int code; - int8_t window; - - /* set up the local pointers */ - cnv=pArgs->converter; - scsu=(SCSUData *)cnv->extraInfo; - - /* set up the local pointers */ - source=pArgs->source; - sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); - - /* get the state machine state */ - isSingleByteMode=scsu->fromUIsSingleByteMode; - dynamicWindow=scsu->fromUDynamicWindow; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - - c=cnv->fromUChar32; - - /* similar conversion "loop" as in toUnicode */ -loop: - if(isSingleByteMode) { - if(c!=0 && targetCapacity>0) { - goto getTrailSingle; - } - - /* state machine for single-byte mode */ -/* singleByteMode: */ - while(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* might check if there are more characters in this window to come */ - /* define an extended window with this character */ - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* change to Unicode mode and output this (lead, trail) pair */ - isSingleByteMode=FALSE; - *target++=(uint8_t)SCU; - --targetCapacity; - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else if(c<0xa0) { - /* quote C1 control character */ - c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ - length=2; - goto outputBytes; - } else if(c==0xfeff || c>=0xfff0) { - /* quote signature character=byte order mark and specials */ - c|=SQU<<16; - length=3; - goto outputBytes; - } else { - /* compress all other BMP characters */ - if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { - /* there is a window defined that contains this character - switch to it or quote from it? */ - if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { - /* change to dynamic window */ - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else { - /* quote from dynamic window */ - c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; - length=2; - goto outputBytes; - } - } else if((window=getWindow(staticOffsets, c))>=0) { - /* quote from static window */ - c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && - (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) - ) { - /* - * this character is not compressible (a BMP ideograph or similar); - * switch to Unicode mode if this is the last character in the block - * or there is at least one more ideograph following immediately - */ - isSingleByteMode=FALSE; - c|=SCU<<16; - length=3; - goto outputBytes; - } else { - /* quote Unicode */ - c|=SQU<<16; - length=3; - goto outputBytes; - } - } - - /* normal end of conversion: prepare for a new character */ - c=0; - } - } else { - if(c!=0 && targetCapacity>0) { - goto getTrailUnicode; - } - - /* state machine for Unicode mode */ -/* unicodeByteMode: */ - while(source=2) { - *target++=(uint8_t)(c>>8); - *target++=(uint8_t)c; - targetCapacity-=2; - } else { - length=2; - goto outputBytes; - } - } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { - /* compress BMP character if the following one is not an uncompressible ideograph */ - if(!(sourcefromUDynamicOffsets, c))>=0) { - /* there is a dynamic window that contains this character, change to it */ - isSingleByteMode=TRUE; - dynamicWindow=window; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if((code=getDynamicOffset(c, &offset))>=0) { - /* define a dynamic window with this character */ - isSingleByteMode=TRUE; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=3; - goto outputBytes; - } - } - - /* don't know how to compress this character, just write it directly */ - length=2; - goto outputBytes; - } else if(c<0xe000) { - /* c is a surrogate */ - if(U16_IS_SURROGATE_LEAD(c)) { -getTrailUnicode: - lead=(UChar)c; - if(sourcefromUDynamicOffsets, c))>=0 && - !(sourcefromUDynamicOffsets[dynamicWindow]; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; - length=2; - goto outputBytes; - } else if(source=0 - ) { - /* two supplementary characters in (probably) the same window - define an extended one */ - isSingleByteMode=TRUE; - code-=0x200; - dynamicWindow=getNextDynamicWindow(scsu); - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; - useDynamicWindow(scsu, dynamicWindow); - c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; - length=4; - goto outputBytes; - } else { - /* don't know how to compress this character, just write it directly */ - c=((uint32_t)lead<<16)|trail; - length=4; - goto outputBytes; - } - } else /* 0xe000<=c<0xf300 */ { - /* quote to avoid SCSU tags */ - c|=UQU<<16; - length=3; - goto outputBytes; - } - - /* normal end of conversion: prepare for a new character */ - c=0; - } - } -endloop: - - /* set the converter state back into UConverter */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; - - cnv->fromUChar32=c; - - /* write back the updated pointers */ - pArgs->source=source; - pArgs->target=(char *)target; - return; - -outputBytes: - /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ - /* from the first if in the loop we know that targetCapacity>0 */ - if(length<=targetCapacity) { - switch(length) { - /* each branch falls through to the next one */ - case 4: - *target++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *target++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - targetCapacity-=length; - - /* normal end of conversion: prepare for a new character */ - c=0; - goto loop; - } else { - uint8_t *p; - - /* - * We actually do this backwards here: - * In order to save an intermediate variable, we output - * first to the overflow buffer what does not fit into the - * regular target. - */ - /* we know that 0<=targetCapacitycharErrorBuffer; - switch(length) { - /* each branch falls through to the next one */ - case 4: - *p++=(uint8_t)(c>>24); - U_FALLTHROUGH; - case 3: - *p++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *p++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *p=(uint8_t)c; - U_FALLTHROUGH; - default: - /* will never occur */ - break; - } - cnv->charErrorBufferLength=(int8_t)length; - - /* now output what fits into the regular target */ - c>>=8*length; /* length was reduced by targetCapacity */ - switch(targetCapacity) { - /* each branch falls through to the next one */ - case 3: - *target++=(uint8_t)(c>>16); - U_FALLTHROUGH; - case 2: - *target++=(uint8_t)(c>>8); - U_FALLTHROUGH; - case 1: - *target++=(uint8_t)c; - U_FALLTHROUGH; - default: - break; - } - - /* target overflow */ - targetCapacity=0; - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - c=0; - goto endloop; - } -} - -/* miscellaneous ------------------------------------------------------------ */ - -static const char * U_CALLCONV -_SCSUGetName(const UConverter *cnv) { - SCSUData *scsu=(SCSUData *)cnv->extraInfo; - - switch(scsu->locale) { - case l_ja: - return "SCSU,locale=ja"; - default: - return "SCSU"; - } -} - -/* structure for SafeClone calculations */ -struct cloneSCSUStruct -{ - UConverter cnv; - SCSUData mydata; -}; - -static UConverter * U_CALLCONV -_SCSUSafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneSCSUStruct * localClone; - int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); - - if (U_FAILURE(*status)){ - return 0; - } - - if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - return 0; - } - - localClone = (struct cloneSCSUStruct *)stackBuffer; - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); - localClone->cnv.extraInfo = &localClone->mydata; - localClone->cnv.isExtraLocal = TRUE; - - return &localClone->cnv; -} -U_CDECL_END - -static const UConverterImpl _SCSUImpl={ - UCNV_SCSU, - - NULL, - NULL, - - _SCSUOpen, - _SCSUClose, - _SCSUReset, - - _SCSUToUnicode, - _SCSUToUnicodeWithOffsets, - _SCSUFromUnicode, - _SCSUFromUnicodeWithOffsets, - NULL, - - NULL, - _SCSUGetName, - NULL, - _SCSUSafeClone, - ucnv_getCompleteUnicodeSet, - NULL, - NULL -}; - -static const UConverterStaticData _SCSUStaticData={ - sizeof(UConverterStaticData), - "SCSU", - 1212, /* CCSID for SCSU */ - UCNV_IBM, UCNV_SCSU, - 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ - /* - * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode - * substitution string. - */ - { 0x0e, 0xff, 0xfd, 0 }, 3, - FALSE, FALSE, - 0, - 0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ -}; - -const UConverterSharedData _SCSUData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl); - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucnvsel.cpp b/deps/node/deps/icu-small/source/common/ucnvsel.cpp deleted file mode 100644 index 6ccee1ae..00000000 --- a/deps/node/deps/icu-small/source/common/ucnvsel.cpp +++ /dev/null @@ -1,823 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2008-2011, International Business Machines -* Corporation, Google and others. All Rights Reserved. -* -******************************************************************************* -*/ -// Author : eldawy@google.com (Mohamed Eldawy) -// ucnvsel.cpp -// -// Purpose: To generate a list of encodings capable of handling -// a given Unicode text -// -// Started 09-April-2008 - -/** - * \file - * - * This is an implementation of an encoding selector. - * The goal is, given a unicode string, find the encodings - * this string can be mapped to. To make processing faster - * a trie is built when you call ucnvsel_open() that - * stores all encodings a codepoint can map to - */ - -#include "unicode/ucnvsel.h" - -#if !UCONFIG_NO_CONVERSION - -#include - -#include "unicode/uchar.h" -#include "unicode/uniset.h" -#include "unicode/ucnv.h" -#include "unicode/ustring.h" -#include "unicode/uchriter.h" -#include "utrie2.h" -#include "propsvec.h" -#include "uassert.h" -#include "ucmndata.h" -#include "udataswp.h" -#include "uenumimp.h" -#include "cmemory.h" -#include "cstring.h" - -U_NAMESPACE_USE - -struct UConverterSelector { - UTrie2 *trie; // 16 bit trie containing offsets into pv - uint32_t* pv; // table of bits! - int32_t pvCount; - char** encodings; // which encodings did user ask to use? - int32_t encodingsCount; - int32_t encodingStrLength; - uint8_t* swapped; - UBool ownPv, ownEncodingStrings; -}; - -static void generateSelectorData(UConverterSelector* result, - UPropsVectors *upvec, - const USet* excludedCodePoints, - const UConverterUnicodeSet whichSet, - UErrorCode* status) { - if (U_FAILURE(*status)) { - return; - } - - int32_t columns = (result->encodingsCount+31)/32; - - // set errorValue to all-ones - for (int32_t col = 0; col < columns; col++) { - upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, - col, static_cast(~0), static_cast(~0), status); - } - - for (int32_t i = 0; i < result->encodingsCount; ++i) { - uint32_t mask; - uint32_t column; - int32_t item_count; - int32_t j; - UConverter* test_converter = ucnv_open(result->encodings[i], status); - if (U_FAILURE(*status)) { - return; - } - USet* unicode_point_set; - unicode_point_set = uset_open(1, 0); // empty set - - ucnv_getUnicodeSet(test_converter, unicode_point_set, - whichSet, status); - if (U_FAILURE(*status)) { - ucnv_close(test_converter); - return; - } - - column = i / 32; - mask = 1 << (i%32); - // now iterate over intervals on set i! - item_count = uset_getItemCount(unicode_point_set); - - for (j = 0; j < item_count; ++j) { - UChar32 start_char; - UChar32 end_char; - UErrorCode smallStatus = U_ZERO_ERROR; - uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0, - &smallStatus); - if (U_FAILURE(smallStatus)) { - // this will be reached for the converters that fill the set with - // strings. Those should be ignored by our system - } else { - upvec_setValue(upvec, start_char, end_char, column, static_cast(~0), mask, - status); - } - } - ucnv_close(test_converter); - uset_close(unicode_point_set); - if (U_FAILURE(*status)) { - return; - } - } - - // handle excluded encodings! Simply set their values to all 1's in the upvec - if (excludedCodePoints) { - int32_t item_count = uset_getItemCount(excludedCodePoints); - for (int32_t j = 0; j < item_count; ++j) { - UChar32 start_char; - UChar32 end_char; - - uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0, - status); - for (int32_t col = 0; col < columns; col++) { - upvec_setValue(upvec, start_char, end_char, col, static_cast(~0), static_cast(~0), - status); - } - } - } - - // alright. Now, let's put things in the same exact form you'd get when you - // unserialize things. - result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status); - result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status); - result->pvCount *= columns; // number of uint32_t = rows * columns - result->ownPv = TRUE; -} - -/* open a selector. If converterListSize is 0, build for all converters. - If excludedCodePoints is NULL, don't exclude any codepoints */ -U_CAPI UConverterSelector* U_EXPORT2 -ucnvsel_open(const char* const* converterList, int32_t converterListSize, - const USet* excludedCodePoints, - const UConverterUnicodeSet whichSet, UErrorCode* status) { - // check if already failed - if (U_FAILURE(*status)) { - return NULL; - } - // ensure args make sense! - if (converterListSize < 0 || (converterList == NULL && converterListSize != 0)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - // allocate a new converter - LocalUConverterSelectorPointer newSelector( - (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector))); - if (newSelector.isNull()) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector)); - - if (converterListSize == 0) { - converterList = NULL; - converterListSize = ucnv_countAvailable(); - } - newSelector->encodings = - (char**)uprv_malloc(converterListSize * sizeof(char*)); - if (!newSelector->encodings) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - newSelector->encodings[0] = NULL; // now we can call ucnvsel_close() - - // make a backup copy of the list of converters - int32_t totalSize = 0; - int32_t i; - for (i = 0; i < converterListSize; i++) { - totalSize += - (int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)) + 1; - } - // 4-align the totalSize to 4-align the size of the serialized form - int32_t encodingStrPadding = totalSize & 3; - if (encodingStrPadding != 0) { - encodingStrPadding = 4 - encodingStrPadding; - } - newSelector->encodingStrLength = totalSize += encodingStrPadding; - char* allStrings = (char*) uprv_malloc(totalSize); - if (!allStrings) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - for (i = 0; i < converterListSize; i++) { - newSelector->encodings[i] = allStrings; - uprv_strcpy(newSelector->encodings[i], - converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)); - allStrings += uprv_strlen(newSelector->encodings[i]) + 1; - } - while (encodingStrPadding > 0) { - *allStrings++ = 0; - --encodingStrPadding; - } - - newSelector->ownEncodingStrings = TRUE; - newSelector->encodingsCount = converterListSize; - UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status); - generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status); - upvec_close(upvec); - - if (U_FAILURE(*status)) { - return NULL; - } - - return newSelector.orphan(); -} - -/* close opened selector */ -U_CAPI void U_EXPORT2 -ucnvsel_close(UConverterSelector *sel) { - if (!sel) { - return; - } - if (sel->ownEncodingStrings) { - uprv_free(sel->encodings[0]); - } - uprv_free(sel->encodings); - if (sel->ownPv) { - uprv_free(sel->pv); - } - utrie2_close(sel->trie); - uprv_free(sel->swapped); - uprv_free(sel); -} - -static const UDataInfo dataInfo = { - sizeof(UDataInfo), - 0, - - U_IS_BIG_ENDIAN, - U_CHARSET_FAMILY, - U_SIZEOF_UCHAR, - 0, - - { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */ - { 1, 0, 0, 0 }, /* formatVersion */ - { 0, 0, 0, 0 } /* dataVersion */ -}; - -enum { - UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes - UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors - UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names - UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding - UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader - UCNVSEL_INDEX_COUNT = 16 -}; - -/* - * Serialized form of a UConverterSelector, formatVersion 1: - * - * The serialized form begins with a standard ICU DataHeader with a UDataInfo - * as the template above. - * This is followed by: - * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above - * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes - * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors - * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding - */ - -/* serialize a selector */ -U_CAPI int32_t U_EXPORT2 -ucnvsel_serialize(const UConverterSelector* sel, - void* buffer, int32_t bufferCapacity, UErrorCode* status) { - // check if already failed - if (U_FAILURE(*status)) { - return 0; - } - // ensure args make sense! - uint8_t *p = (uint8_t *)buffer; - if (bufferCapacity < 0 || - (bufferCapacity > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0))) - ) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - // add up the size of the serialized form - int32_t serializedTrieSize = utrie2_serialize(sel->trie, NULL, 0, status); - if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) { - return 0; - } - *status = U_ZERO_ERROR; - - DataHeader header; - uprv_memset(&header, 0, sizeof(header)); - header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15); - header.dataHeader.magic1 = 0xda; - header.dataHeader.magic2 = 0x27; - uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo)); - - int32_t indexes[UCNVSEL_INDEX_COUNT] = { - serializedTrieSize, - sel->pvCount, - sel->encodingsCount, - sel->encodingStrLength - }; - - int32_t totalSize = - header.dataHeader.headerSize + - (int32_t)sizeof(indexes) + - serializedTrieSize + - sel->pvCount * 4 + - sel->encodingStrLength; - indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize; - if (totalSize > bufferCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - return totalSize; - } - // ok, save! - int32_t length = header.dataHeader.headerSize; - uprv_memcpy(p, &header, sizeof(header)); - uprv_memset(p + sizeof(header), 0, length - sizeof(header)); - p += length; - - length = (int32_t)sizeof(indexes); - uprv_memcpy(p, indexes, length); - p += length; - - utrie2_serialize(sel->trie, p, serializedTrieSize, status); - p += serializedTrieSize; - - length = sel->pvCount * 4; - uprv_memcpy(p, sel->pv, length); - p += length; - - uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength); - p += sel->encodingStrLength; - - return totalSize; -} - -/** - * swap a selector into the desired Endianness and Asciiness of - * the system. Just as FYI, selectors are always saved in the format - * of the system that created them. They are only converted if used - * on another system. In other words, selectors created on different - * system can be different even if the params are identical (endianness - * and Asciiness differences only) - * - * @param ds pointer to data swapper containing swapping info - * @param inData pointer to incoming data - * @param length length of inData in bytes - * @param outData pointer to output data. Capacity should - * be at least equal to capacity of inData - * @param status an in/out ICU UErrorCode - * @return 0 on failure, number of bytes swapped on success - * number of bytes swapped can be smaller than length - */ -static int32_t -ucnvsel_swap(const UDataSwapper *ds, - const void *inData, int32_t length, - void *outData, UErrorCode *status) { - /* udata_swapDataHeader checks the arguments */ - int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); - if(U_FAILURE(*status)) { - return 0; - } - - /* check data format and format version */ - const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); - if(!( - pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ - pInfo->dataFormat[1] == 0x53 && - pInfo->dataFormat[2] == 0x65 && - pInfo->dataFormat[3] == 0x6c - )) { - udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3]); - *status = U_INVALID_FORMAT_ERROR; - return 0; - } - if(pInfo->formatVersion[0] != 1) { - udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n", - pInfo->formatVersion[0]); - *status = U_UNSUPPORTED_ERROR; - return 0; - } - - if(length >= 0) { - length -= headerSize; - if(length < 16*4) { - udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", - length); - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - const uint8_t *inBytes = (const uint8_t *)inData + headerSize; - uint8_t *outBytes = (uint8_t *)outData + headerSize; - - /* read the indexes */ - const int32_t *inIndexes = (const int32_t *)inBytes; - int32_t indexes[16]; - int32_t i; - for(i = 0; i < 16; ++i) { - indexes[i] = udata_readInt32(ds, inIndexes[i]); - } - - /* get the total length of the data */ - int32_t size = indexes[UCNVSEL_INDEX_SIZE]; - if(length >= 0) { - if(length < size) { - udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", - length); - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - /* copy the data for inaccessible bytes */ - if(inBytes != outBytes) { - uprv_memcpy(outBytes, inBytes, size); - } - - int32_t offset = 0, count; - - /* swap the int32_t indexes[] */ - count = UCNVSEL_INDEX_COUNT*4; - ds->swapArray32(ds, inBytes, count, outBytes, status); - offset += count; - - /* swap the UTrie2 */ - count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; - utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status); - offset += count; - - /* swap the uint32_t pv[] */ - count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; - ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); - offset += count; - - /* swap the encoding names */ - count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; - ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); - offset += count; - - U_ASSERT(offset == size); - } - - return headerSize + size; -} - -/* unserialize a selector */ -U_CAPI UConverterSelector* U_EXPORT2 -ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) { - // check if already failed - if (U_FAILURE(*status)) { - return NULL; - } - // ensure args make sense! - const uint8_t *p = (const uint8_t *)buffer; - if (length <= 0 || - (length > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0))) - ) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - // header - if (length < 32) { - // not even enough space for a minimal header - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return NULL; - } - const DataHeader *pHeader = (const DataHeader *)p; - if (!( - pHeader->dataHeader.magic1==0xda && - pHeader->dataHeader.magic2==0x27 && - pHeader->info.dataFormat[0] == 0x43 && - pHeader->info.dataFormat[1] == 0x53 && - pHeader->info.dataFormat[2] == 0x65 && - pHeader->info.dataFormat[3] == 0x6c - )) { - /* header not valid or dataFormat not recognized */ - *status = U_INVALID_FORMAT_ERROR; - return NULL; - } - if (pHeader->info.formatVersion[0] != 1) { - *status = U_UNSUPPORTED_ERROR; - return NULL; - } - uint8_t* swapped = NULL; - if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN || - pHeader->info.charsetFamily != U_CHARSET_FAMILY - ) { - // swap the data - UDataSwapper *ds = - udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status); - int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL, status); - if (U_FAILURE(*status)) { - udata_closeSwapper(ds); - return NULL; - } - if (length < totalSize) { - udata_closeSwapper(ds); - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return NULL; - } - swapped = (uint8_t*)uprv_malloc(totalSize); - if (swapped == NULL) { - udata_closeSwapper(ds); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - ucnvsel_swap(ds, p, length, swapped, status); - udata_closeSwapper(ds); - if (U_FAILURE(*status)) { - uprv_free(swapped); - return NULL; - } - p = swapped; - pHeader = (const DataHeader *)p; - } - if (length < (pHeader->dataHeader.headerSize + 16 * 4)) { - // not even enough space for the header and the indexes - uprv_free(swapped); - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return NULL; - } - p += pHeader->dataHeader.headerSize; - length -= pHeader->dataHeader.headerSize; - // indexes - const int32_t *indexes = (const int32_t *)p; - if (length < indexes[UCNVSEL_INDEX_SIZE]) { - uprv_free(swapped); - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return NULL; - } - p += UCNVSEL_INDEX_COUNT * 4; - // create and populate the selector object - UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)); - char **encodings = - (char **)uprv_malloc( - indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *)); - if (sel == NULL || encodings == NULL) { - uprv_free(swapped); - uprv_free(sel); - uprv_free(encodings); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(sel, 0, sizeof(UConverterSelector)); - sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT]; - sel->encodings = encodings; - sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT]; - sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; - sel->swapped = swapped; - // trie - sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, - p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL, - status); - p += indexes[UCNVSEL_INDEX_TRIE_SIZE]; - if (U_FAILURE(*status)) { - ucnvsel_close(sel); - return NULL; - } - // bit vectors - sel->pv = (uint32_t *)p; - p += sel->pvCount * 4; - // encoding names - char* s = (char*)p; - for (int32_t i = 0; i < sel->encodingsCount; ++i) { - sel->encodings[i] = s; - s += uprv_strlen(s) + 1; - } - p += sel->encodingStrLength; - - return sel; -} - -// a bunch of functions for the enumeration thingie! Nothing fancy here. Just -// iterate over the selected encodings -struct Enumerator { - int16_t* index; - int16_t length; - int16_t cur; - const UConverterSelector* sel; -}; - -U_CDECL_BEGIN - -static void U_CALLCONV -ucnvsel_close_selector_iterator(UEnumeration *enumerator) { - uprv_free(((Enumerator*)(enumerator->context))->index); - uprv_free(enumerator->context); - uprv_free(enumerator); -} - - -static int32_t U_CALLCONV -ucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) { - // check if already failed - if (U_FAILURE(*status)) { - return 0; - } - return ((Enumerator*)(enumerator->context))->length; -} - - -static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator, - int32_t* resultLength, - UErrorCode* status) { - // check if already failed - if (U_FAILURE(*status)) { - return NULL; - } - - int16_t cur = ((Enumerator*)(enumerator->context))->cur; - const UConverterSelector* sel; - const char* result; - if (cur >= ((Enumerator*)(enumerator->context))->length) { - return NULL; - } - sel = ((Enumerator*)(enumerator->context))->sel; - result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ]; - ((Enumerator*)(enumerator->context))->cur++; - if (resultLength) { - *resultLength = (int32_t)uprv_strlen(result); - } - return result; -} - -static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator, - UErrorCode* status) { - // check if already failed - if (U_FAILURE(*status)) { - return ; - } - ((Enumerator*)(enumerator->context))->cur = 0; -} - -U_CDECL_END - - -static const UEnumeration defaultEncodings = { - NULL, - NULL, - ucnvsel_close_selector_iterator, - ucnvsel_count_encodings, - uenum_unextDefault, - ucnvsel_next_encoding, - ucnvsel_reset_iterator -}; - - -// internal fn to intersect two sets of masks -// returns whether the mask has reduced to all zeros -static UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) { - int32_t i; - uint32_t oredDest = 0; - for (i = 0 ; i < len ; ++i) { - oredDest |= (dest[i] &= source1[i]); - } - return oredDest == 0; -} - -// internal fn to count how many 1's are there in a mask -// algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html -static int16_t countOnes(uint32_t* mask, int32_t len) { - int32_t i, totalOnes = 0; - for (i = 0 ; i < len ; ++i) { - uint32_t ent = mask[i]; - for (; ent; totalOnes++) - { - ent &= ent - 1; // clear the least significant bit set - } - } - return static_cast(totalOnes); -} - - -/* internal function! */ -static UEnumeration *selectForMask(const UConverterSelector* sel, - uint32_t *mask, UErrorCode *status) { - // this is the context we will use. Store a table of indices to which - // encodings are legit. - struct Enumerator* result = (Enumerator*)uprv_malloc(sizeof(Enumerator)); - if (result == NULL) { - uprv_free(mask); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - result->index = NULL; // this will be allocated later! - result->length = result->cur = 0; - result->sel = sel; - - UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); - if (en == NULL) { - // TODO(markus): Combine Enumerator and UEnumeration into one struct. - uprv_free(mask); - uprv_free(result); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - memcpy(en, &defaultEncodings, sizeof(UEnumeration)); - en->context = result; - - int32_t columns = (sel->encodingsCount+31)/32; - int16_t numOnes = countOnes(mask, columns); - // now, we know the exact space we need for index - if (numOnes > 0) { - result->index = (int16_t*) uprv_malloc(numOnes * sizeof(int16_t)); - - int32_t i, j; - int16_t k = 0; - for (j = 0 ; j < columns; j++) { - uint32_t v = mask[j]; - for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) { - if ((v & 1) != 0) { - result->index[result->length++] = k; - } - v >>= 1; - } - } - } //otherwise, index will remain NULL (and will never be touched by - //the enumerator code anyway) - uprv_free(mask); - return en; -} - -/* check a string against the selector - UTF16 version */ -U_CAPI UEnumeration * U_EXPORT2 -ucnvsel_selectForString(const UConverterSelector* sel, - const UChar *s, int32_t length, UErrorCode *status) { - // check if already failed - if (U_FAILURE(*status)) { - return NULL; - } - // ensure args make sense! - if (sel == NULL || (s == NULL && length != 0)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - int32_t columns = (sel->encodingsCount+31)/32; - uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); - if (mask == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(mask, ~0, columns *4); - - if(s!=NULL) { - const UChar *limit; - if (length >= 0) { - limit = s + length; - } else { - limit = NULL; - } - - while (limit == NULL ? *s != 0 : s != limit) { - UChar32 c; - uint16_t pvIndex; - UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex); - if (intersectMasks(mask, sel->pv+pvIndex, columns)) { - break; - } - } - } - return selectForMask(sel, mask, status); -} - -/* check a string against the selector - UTF8 version */ -U_CAPI UEnumeration * U_EXPORT2 -ucnvsel_selectForUTF8(const UConverterSelector* sel, - const char *s, int32_t length, UErrorCode *status) { - // check if already failed - if (U_FAILURE(*status)) { - return NULL; - } - // ensure args make sense! - if (sel == NULL || (s == NULL && length != 0)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - int32_t columns = (sel->encodingsCount+31)/32; - uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); - if (mask == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(mask, ~0, columns *4); - - if (length < 0) { - length = (int32_t)uprv_strlen(s); - } - - if(s!=NULL) { - const char *limit = s + length; - - while (s != limit) { - uint16_t pvIndex; - UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex); - if (intersectMasks(mask, sel->pv+pvIndex, columns)) { - break; - } - } - } - return selectForMask(sel, mask, status); -} - -#endif // !UCONFIG_NO_CONVERSION diff --git a/deps/node/deps/icu-small/source/common/ucol_data.h b/deps/node/deps/icu-small/source/common/ucol_data.h deleted file mode 100644 index 83f54abb..00000000 --- a/deps/node/deps/icu-small/source/common/ucol_data.h +++ /dev/null @@ -1,89 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2000-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ucol_data.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011jul02 -* created by: Markus Scherer -* -* Private implementation header for C/C++ collation. -* Some file data structure definitions were moved here from i18n/ucol_imp.h -* so that the common library (via ucol_swp.cpp) need not depend on the i18n library at all. -* -* We do not want to move the collation swapper to the i18n library because -* a) the resource bundle swapper depends on it and would have to move too, and -* b) we might want to eventually implement runtime data swapping, -* which might (or might not) be easier if all swappers are in the common library. -*/ - -#ifndef __UCOL_DATA_H__ -#define __UCOL_DATA_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION - -/* let us know whether reserved fields are reset to zero or junked */ -#define UCOL_HEADER_MAGIC 0x20030618 - -typedef struct { - int32_t size; - /* all the offsets are in bytes */ - /* to get the address add to the header address and cast properly */ - uint32_t options; /* these are the default options for the collator */ - uint32_t UCAConsts; /* structure which holds values for indirect positioning and implicit ranges */ - uint32_t contractionUCACombos; /* this one is needed only for UCA, to copy the appropriate contractions */ - uint32_t magic; /* magic number - lets us know whether reserved data is reset or junked */ - uint32_t mappingPosition; /* const uint8_t *mappingPosition; */ - uint32_t expansion; /* uint32_t *expansion; */ - uint32_t contractionIndex; /* UChar *contractionIndex; */ - uint32_t contractionCEs; /* uint32_t *contractionCEs; */ - uint32_t contractionSize; /* needed for various closures */ - /*int32_t latinOneMapping;*/ /* this is now handled in the trie itself *//* fast track to latin1 chars */ - - uint32_t endExpansionCE; /* array of last collation element in - expansion */ - uint32_t expansionCESize; /* array of maximum expansion size - corresponding to the expansion - collation elements with last element - in endExpansionCE*/ - int32_t endExpansionCECount; /* size of endExpansionCE */ - uint32_t unsafeCP; /* hash table of unsafe code points */ - uint32_t contrEndCP; /* hash table of final code points */ - /* in contractions. */ - - int32_t contractionUCACombosSize; /* number of UCA contraction items. */ - /*Length is contractionUCACombosSize*contractionUCACombosWidth*sizeof(UChar) */ - UBool jamoSpecial; /* is jamoSpecial */ - UBool isBigEndian; /* is this data big endian? from the UDataInfo header*/ - uint8_t charSetFamily; /* what is the charset family of this data from the UDataInfo header*/ - uint8_t contractionUCACombosWidth; /* width of UCA combos field */ - UVersionInfo version; - UVersionInfo UCAVersion; /* version of the UCA, read from file */ - UVersionInfo UCDVersion; /* UCD version, obtained by u_getUnicodeVersion */ - UVersionInfo formatVersion; /* format version from the UDataInfo header */ - uint32_t scriptToLeadByte; /* offset to script to lead collation byte mapping data */ - uint32_t leadByteToScript; /* offset to lead collation byte to script mapping data */ - uint8_t reserved[76]; /* for future use */ -} UCATableHeader; - -typedef struct { - uint32_t byteSize; - uint32_t tableSize; - uint32_t contsSize; - uint32_t table; - uint32_t conts; - UVersionInfo UCAVersion; /* version of the UCA, read from file */ - uint8_t padding[8]; -} InverseUCATableHeader; - -#endif /* !UCONFIG_NO_COLLATION */ - -#endif /* __UCOL_DATA_H__ */ diff --git a/deps/node/deps/icu-small/source/common/ucol_swp.cpp b/deps/node/deps/icu-small/source/common/ucol_swp.cpp deleted file mode 100644 index 97b5c4af..00000000 --- a/deps/node/deps/icu-small/source/common/ucol_swp.cpp +++ /dev/null @@ -1,615 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucol_swp.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003sep10 -* created by: Markus W. Scherer -* -* Swap collation binaries. -*/ - -#include "unicode/udata.h" /* UDataInfo */ -#include "utrie.h" -#include "utrie2.h" -#include "udataswp.h" -#include "cmemory.h" -#include "ucol_data.h" -#include "ucol_swp.h" - -/* swapping ----------------------------------------------------------------- */ - -#if !UCONFIG_NO_COLLATION - -U_CAPI UBool U_EXPORT2 -ucol_looksLikeCollationBinary(const UDataSwapper *ds, - const void *inData, int32_t length) { - if(ds==NULL || inData==NULL || length<-1) { - return FALSE; - } - - // First check for format version 4+ which has a standard data header. - UErrorCode errorCode=U_ZERO_ERROR; - (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode); - if(U_SUCCESS(errorCode)) { - const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4); - if(info.dataFormat[0]==0x55 && // dataFormat="UCol" - info.dataFormat[1]==0x43 && - info.dataFormat[2]==0x6f && - info.dataFormat[3]==0x6c) { - return TRUE; - } - } - - // Else check for format version 3. - const UCATableHeader *inHeader=(const UCATableHeader *)inData; - - /* - * The collation binary must contain at least the UCATableHeader, - * starting with its size field. - * sizeof(UCATableHeader)==42*4 in ICU 2.8 - * check the length against the header size before reading the size field - */ - UCATableHeader header; - uprv_memset(&header, 0, sizeof(header)); - if(length<0) { - header.size=udata_readInt32(ds, inHeader->size); - } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { - return FALSE; - } - - header.magic=ds->readUInt32(inHeader->magic); - if(!( - header.magic==UCOL_HEADER_MAGIC && - inHeader->formatVersion[0]==3 /*&& - inHeader->formatVersion[1]>=0*/ - )) { - return FALSE; - } - - if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { - return FALSE; - } - - return TRUE; -} - -namespace { - -/* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */ -int32_t -swapFormatVersion3(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *inBytes; - uint8_t *outBytes; - - const UCATableHeader *inHeader; - UCATableHeader *outHeader; - UCATableHeader header; - - uint32_t count; - - /* argument checking in case we were not called from ucol_swap() */ - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData; - outBytes=(uint8_t *)outData; - - inHeader=(const UCATableHeader *)inData; - outHeader=(UCATableHeader *)outData; - - /* - * The collation binary must contain at least the UCATableHeader, - * starting with its size field. - * sizeof(UCATableHeader)==42*4 in ICU 2.8 - * check the length against the header size before reading the size field - */ - uprv_memset(&header, 0, sizeof(header)); - if(length<0) { - header.size=udata_readInt32(ds, inHeader->size); - } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { - udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - header.magic=ds->readUInt32(inHeader->magic); - if(!( - header.magic==UCOL_HEADER_MAGIC && - inHeader->formatVersion[0]==3 /*&& - inHeader->formatVersion[1]>=0*/ - )) { - udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n", - header.magic, - inHeader->formatVersion[0], inHeader->formatVersion[1]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { - udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n", - inHeader->isBigEndian, inHeader->charSetFamily); - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - - if(length>=0) { - /* copy everything, takes care of data that needs no swapping */ - if(inBytes!=outBytes) { - uprv_memcpy(outBytes, inBytes, header.size); - } - - /* swap the necessary pieces in the order of their occurrence in the data */ - - /* read more of the UCATableHeader (the size field was read above) */ - header.options= ds->readUInt32(inHeader->options); - header.UCAConsts= ds->readUInt32(inHeader->UCAConsts); - header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos); - header.mappingPosition= ds->readUInt32(inHeader->mappingPosition); - header.expansion= ds->readUInt32(inHeader->expansion); - header.contractionIndex= ds->readUInt32(inHeader->contractionIndex); - header.contractionCEs= ds->readUInt32(inHeader->contractionCEs); - header.contractionSize= ds->readUInt32(inHeader->contractionSize); - header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE); - header.expansionCESize= ds->readUInt32(inHeader->expansionCESize); - header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount); - header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize); - header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte); - header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript); - - /* swap the 32-bit integers in the header */ - ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader), - outHeader, pErrorCode); - ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript), - &(outHeader->scriptToLeadByte), pErrorCode); - /* set the output platform properties */ - outHeader->isBigEndian=ds->outIsBigEndian; - outHeader->charSetFamily=ds->outCharset; - - /* swap the options */ - if(header.options!=0) { - ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options, - outBytes+header.options, pErrorCode); - } - - /* swap the expansions */ - if(header.mappingPosition!=0 && header.expansion!=0) { - if(header.contractionIndex!=0) { - /* expansions bounded by contractions */ - count=header.contractionIndex-header.expansion; - } else { - /* no contractions: expansions bounded by the main trie */ - count=header.mappingPosition-header.expansion; - } - ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count, - outBytes+header.expansion, pErrorCode); - } - - /* swap the contractions */ - if(header.contractionSize!=0) { - /* contractionIndex: UChar[] */ - ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2, - outBytes+header.contractionIndex, pErrorCode); - - /* contractionCEs: CEs[] */ - ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4, - outBytes+header.contractionCEs, pErrorCode); - } - - /* swap the main trie */ - if(header.mappingPosition!=0) { - count=header.endExpansionCE-header.mappingPosition; - utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count, - outBytes+header.mappingPosition, pErrorCode); - } - - /* swap the max expansion table */ - if(header.endExpansionCECount!=0) { - ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4, - outBytes+header.endExpansionCE, pErrorCode); - } - - /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */ - - /* swap UCA constants */ - if(header.UCAConsts!=0) { - /* - * if UCAConsts!=0 then contractionUCACombos because we are swapping - * the UCA data file, and we know that the UCA contains contractions - */ - ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts, - outBytes+header.UCAConsts, pErrorCode); - } - - /* swap UCA contractions */ - if(header.contractionUCACombosSize!=0) { - count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR; - ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count, - outBytes+header.contractionUCACombos, pErrorCode); - } - - /* swap the script to lead bytes */ - if(header.scriptToLeadByte!=0) { - int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16 - int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16 - ds->swapArray16(ds, inBytes+header.scriptToLeadByte, - 4 + (4 * indexCount) + (2 * dataCount), - outBytes+header.scriptToLeadByte, pErrorCode); - } - - /* swap the lead byte to scripts */ - if(header.leadByteToScript!=0) { - int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16 - int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16 - ds->swapArray16(ds, inBytes+header.leadByteToScript, - 4 + (2 * indexCount) + (2 * dataCount), - outBytes+header.leadByteToScript, pErrorCode); - } - } - - return header.size; -} - -// swap formatVersion 4 or 5 ----------------------------------------------- *** - -// The following are copied from CollationDataReader, trading an awkward copy of constants -// for an awkward relocation of the i18n collationdatareader.h file into the common library. -// Keep them in sync! - -enum { - IX_INDEXES_LENGTH, // 0 - IX_OPTIONS, - IX_RESERVED2, - IX_RESERVED3, - - IX_JAMO_CE32S_START, // 4 - IX_REORDER_CODES_OFFSET, - IX_REORDER_TABLE_OFFSET, - IX_TRIE_OFFSET, - - IX_RESERVED8_OFFSET, // 8 - IX_CES_OFFSET, - IX_RESERVED10_OFFSET, - IX_CE32S_OFFSET, - - IX_ROOT_ELEMENTS_OFFSET, // 12 - IX_CONTEXTS_OFFSET, - IX_UNSAFE_BWD_OFFSET, - IX_FAST_LATIN_TABLE_OFFSET, - - IX_SCRIPTS_OFFSET, // 16 - IX_COMPRESSIBLE_BYTES_OFFSET, - IX_RESERVED18_OFFSET, - IX_TOTAL_SIZE -}; - -int32_t -swapFormatVersion4(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return 0; } - - const uint8_t *inBytes=(const uint8_t *)inData; - uint8_t *outBytes=(uint8_t *)outData; - - const int32_t *inIndexes=(const int32_t *)inBytes; - int32_t indexes[IX_TOTAL_SIZE+1]; - - // Need at least IX_INDEXES_LENGTH and IX_OPTIONS. - if(0<=length && length<8) { - udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " - "(%d after header) for collation data\n", - length); - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]); - if(0<=length && length<(indexesLength*4)) { - udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " - "(%d after header) for collation data\n", - length); - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - for(int32_t i=1; i<=IX_TOTAL_SIZE && iIX_TOTAL_SIZE) { - size=indexes[IX_TOTAL_SIZE]; - } else if(indexesLength>IX_REORDER_CODES_OFFSET) { - size=indexes[indexesLength-1]; - } else { - size=indexesLength*4; - } - if(length<0) { return size; } - - if(lengthswapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode); - - // The following is a modified version of CollationDataReader::read(). - // Here we use indexes[] not inIndexes[] because - // the inIndexes[] may not be in this machine's endianness. - int32_t index; // one of the indexes[] slots - int32_t offset; // byte offset for the index part - // int32_t length; // number of bytes in the index part - - index = IX_REORDER_CODES_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - // Skip the IX_REORDER_TABLE_OFFSET byte array. - - index = IX_TRIE_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - index = IX_RESERVED8_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length); - errorCode = U_UNSUPPORTED_ERROR; - return 0; - } - - index = IX_CES_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - index = IX_RESERVED10_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length); - errorCode = U_UNSUPPORTED_ERROR; - return 0; - } - - index = IX_CE32S_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - index = IX_ROOT_ELEMENTS_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - index = IX_CONTEXTS_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - index = IX_UNSAFE_BWD_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - index = IX_FAST_LATIN_TABLE_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - index = IX_SCRIPTS_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); - } - - // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array. - - index = IX_RESERVED18_OFFSET; - offset = indexes[index]; - length = indexes[index + 1] - offset; - if(length > 0) { - udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length); - errorCode = U_UNSUPPORTED_ERROR; - return 0; - } - - return size; -} - -} // namespace - -/* swap ICU collation data like ucadata.icu */ -U_CAPI int32_t U_EXPORT2 -ucol_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { return 0; } - - /* udata_swapDataHeader checks the arguments */ - int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - // Try to swap the old format version which did not have a standard data header. - *pErrorCode=U_ZERO_ERROR; - return swapFormatVersion3(ds, inData, length, outData, pErrorCode); - } - - /* check data format and format version */ - const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4); - if(!( - info.dataFormat[0]==0x55 && // dataFormat="UCol" - info.dataFormat[1]==0x43 && - info.dataFormat[2]==0x6f && - info.dataFormat[3]==0x6c && - (3<=info.formatVersion[0] && info.formatVersion[0]<=5) - )) { - udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x " - "(format version %02x.%02x) is not recognized as collation data\n", - info.dataFormat[0], info.dataFormat[1], - info.dataFormat[2], info.dataFormat[3], - info.formatVersion[0], info.formatVersion[1]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inData=(const char *)inData+headerSize; - if(length>=0) { length-=headerSize; } - outData=(char *)outData+headerSize; - int32_t collationSize; - if(info.formatVersion[0]>=4) { - collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode); - } else { - collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode); - } - if(U_SUCCESS(*pErrorCode)) { - return headerSize+collationSize; - } else { - return 0; - } -} - -/* swap inverse UCA collation data (invuca.icu) */ -U_CAPI int32_t U_EXPORT2 -ucol_swapInverseUCA(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - const InverseUCATableHeader *inHeader; - InverseUCATableHeader *outHeader; - InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} }; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ - pInfo->dataFormat[1]==0x6e && - pInfo->dataFormat[2]==0x76 && - pInfo->dataFormat[3]==0x43 && - pInfo->formatVersion[0]==2 && - pInfo->formatVersion[1]>=1 - )) { - udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0], pInfo->formatVersion[1]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - inHeader=(const InverseUCATableHeader *)inBytes; - outHeader=(InverseUCATableHeader *)outBytes; - - /* - * The inverse UCA collation binary must contain at least the InverseUCATableHeader, - * starting with its size field. - * sizeof(UCATableHeader)==8*4 in ICU 2.8 - * check the length against the header size before reading the size field - */ - if(length<0) { - header.byteSize=udata_readInt32(ds, inHeader->byteSize); - } else if( - ((length-headerSize)<(8*4) || - (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) - ) { - udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - if(length>=0) { - /* copy everything, takes care of data that needs no swapping */ - if(inBytes!=outBytes) { - uprv_memcpy(outBytes, inBytes, header.byteSize); - } - - /* swap the necessary pieces in the order of their occurrence in the data */ - - /* read more of the InverseUCATableHeader (the byteSize field was read above) */ - header.tableSize= ds->readUInt32(inHeader->tableSize); - header.contsSize= ds->readUInt32(inHeader->contsSize); - header.table= ds->readUInt32(inHeader->table); - header.conts= ds->readUInt32(inHeader->conts); - - /* swap the 32-bit integers in the header */ - ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); - - /* swap the inverse table; tableSize counts uint32_t[3] rows */ - ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, - outBytes+header.table, pErrorCode); - - /* swap the continuation table; contsSize counts UChars */ - ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, - outBytes+header.conts, pErrorCode); - } - - return headerSize+header.byteSize; -} - -#endif /* #if !UCONFIG_NO_COLLATION */ diff --git a/deps/node/deps/icu-small/source/common/ucol_swp.h b/deps/node/deps/icu-small/source/common/ucol_swp.h deleted file mode 100644 index fd8be9aa..00000000 --- a/deps/node/deps/icu-small/source/common/ucol_swp.h +++ /dev/null @@ -1,58 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucol_swp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003sep10 -* created by: Markus W. Scherer -* -* Swap collation binaries. -*/ - -#ifndef __UCOL_SWP_H__ -#define __UCOL_SWP_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION - -#include "udataswp.h" - -/* - * Does the data look like a collation binary? - * @internal - */ -U_INTERNAL UBool U_EXPORT2 -ucol_looksLikeCollationBinary(const UDataSwapper *ds, - const void *inData, int32_t length); - -/** - * Swap ICU collation data like ucadata.icu. See udataswp.h. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ucol_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swap inverse UCA collation data (invuca.icu). See udataswp.h. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ucol_swapInverseUCA(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -#endif /* #if !UCONFIG_NO_COLLATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucptrie.cpp b/deps/node/deps/icu-small/source/common/ucptrie.cpp deleted file mode 100644 index 13496ad5..00000000 --- a/deps/node/deps/icu-small/source/common/ucptrie.cpp +++ /dev/null @@ -1,590 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// ucptrie.cpp (modified from utrie2.cpp) -// created: 2017dec29 Markus W. Scherer - -// #define UCPTRIE_DEBUG -#ifdef UCPTRIE_DEBUG -# include -#endif - -#include "unicode/utypes.h" -#include "unicode/ucptrie.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "uassert.h" -#include "ucptrie_impl.h" - -U_CAPI UCPTrie * U_EXPORT2 -ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, - const void *data, int32_t length, int32_t *pActualLength, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - - if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) || - type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type || - valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - - // Enough data for a trie header? - if (length < (int32_t)sizeof(UCPTrieHeader)) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - // Check the signature. - const UCPTrieHeader *header = (const UCPTrieHeader *)data; - if (header->signature != UCPTRIE_SIG) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - int32_t options = header->options; - int32_t typeInt = (options >> 6) & 3; - int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK; - if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 || - (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - UCPTrieType actualType = (UCPTrieType)typeInt; - UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt; - if (type < 0) { - type = actualType; - } - if (valueWidth < 0) { - valueWidth = actualValueWidth; - } - if (type != actualType || valueWidth != actualValueWidth) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - // Get the length values and offsets. - UCPTrie tempTrie; - uprv_memset(&tempTrie, 0, sizeof(tempTrie)); - tempTrie.indexLength = header->indexLength; - tempTrie.dataLength = - ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength; - tempTrie.index3NullOffset = header->index3NullOffset; - tempTrie.dataNullOffset = - ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset; - - tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2; - tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12; - tempTrie.type = type; - tempTrie.valueWidth = valueWidth; - - // Calculate the actual length. - int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2; - if (valueWidth == UCPTRIE_VALUE_BITS_16) { - actualLength += tempTrie.dataLength * 2; - } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { - actualLength += tempTrie.dataLength * 4; - } else { - actualLength += tempTrie.dataLength; - } - if (length < actualLength) { - *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes. - return nullptr; - } - - // Allocate the trie. - UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie)); - if (trie == nullptr) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); -#ifdef UCPTRIE_DEBUG - trie->name = "fromSerialized"; -#endif - - // Set the pointers to its index and data arrays. - const uint16_t *p16 = (const uint16_t *)(header + 1); - trie->index = p16; - p16 += trie->indexLength; - - // Get the data. - int32_t nullValueOffset = trie->dataNullOffset; - if (nullValueOffset >= trie->dataLength) { - nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - } - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - trie->data.ptr16 = p16; - trie->nullValue = trie->data.ptr16[nullValueOffset]; - break; - case UCPTRIE_VALUE_BITS_32: - trie->data.ptr32 = (const uint32_t *)p16; - trie->nullValue = trie->data.ptr32[nullValueOffset]; - break; - case UCPTRIE_VALUE_BITS_8: - trie->data.ptr8 = (const uint8_t *)p16; - trie->nullValue = trie->data.ptr8[nullValueOffset]; - break; - default: - // Unreachable because valueWidth was checked above. - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - if (pActualLength != nullptr) { - *pActualLength = actualLength; - } - return trie; -} - -U_CAPI void U_EXPORT2 -ucptrie_close(UCPTrie *trie) { - uprv_free(trie); -} - -U_CAPI UCPTrieType U_EXPORT2 -ucptrie_getType(const UCPTrie *trie) { - return (UCPTrieType)trie->type; -} - -U_CAPI UCPTrieValueWidth U_EXPORT2 -ucptrie_getValueWidth(const UCPTrie *trie) { - return (UCPTrieValueWidth)trie->valueWidth; -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) { - int32_t i1 = c >> UCPTRIE_SHIFT_1; - if (trie->type == UCPTRIE_TYPE_FAST) { - U_ASSERT(0xffff < c && c < trie->highStart); - i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; - } else { - U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); - i1 += UCPTRIE_SMALL_INDEX_LENGTH; - } - int32_t i3Block = trie->index[ - (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; - int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; - int32_t dataBlock; - if ((i3Block & 0x8000) == 0) { - // 16-bit indexes - dataBlock = trie->index[i3Block + i3]; - } else { - // 18-bit indexes stored in groups of 9 entries per 8 indexes. - i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); - i3 &= 7; - dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000; - dataBlock |= trie->index[i3Block + i3]; - } - return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK); -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) { - UChar32 c = (lt1 << 12) | (t2 << 6) | t3; - if (c >= trie->highStart) { - // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher. - return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - } - return ucptrie_internalSmallIndex(trie, c); -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, - const uint8_t *start, const uint8_t *src) { - int32_t i, length; - // Support 64-bit pointers by avoiding cast of arbitrary difference. - if ((src - start) <= 7) { - i = length = (int32_t)(src - start); - } else { - i = length = 7; - start = src - 7; - } - c = utf8_prevCharSafeBody(start, 0, &i, c, -1); - i = length - i; // Number of bytes read backward from src. - int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c); - return (idx << 3) | i; -} - -namespace { - -inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) { - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - return data.ptr16[dataIndex]; - case UCPTRIE_VALUE_BITS_32: - return data.ptr32[dataIndex]; - case UCPTRIE_VALUE_BITS_8: - return data.ptr8[dataIndex]; - default: - // Unreachable if the trie is properly initialized. - return 0xffffffff; - } -} - -} // namespace - -U_CAPI uint32_t U_EXPORT2 -ucptrie_get(const UCPTrie *trie, UChar32 c) { - int32_t dataIndex; - if ((uint32_t)c <= 0x7f) { - // linear ASCII - dataIndex = c; - } else { - UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX; - dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c); - } - return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex); -} - -namespace { - -constexpr int32_t MAX_UNICODE = 0x10ffff; - -inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue, - UCPMapValueFilter *filter, const void *context) { - if (value == trieNullValue) { - value = nullValue; - } else if (filter != nullptr) { - value = filter(context, value); - } - return value; -} - -UChar32 getRange(const void *t, UChar32 start, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - if ((uint32_t)start > MAX_UNICODE) { - return U_SENTINEL; - } - const UCPTrie *trie = reinterpret_cast(t); - UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; - if (start >= trie->highStart) { - if (pValue != nullptr) { - int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - uint32_t value = getValue(trie->data, valueWidth, di); - if (filter != nullptr) { value = filter(context, value); } - *pValue = value; - } - return MAX_UNICODE; - } - - uint32_t nullValue = trie->nullValue; - if (filter != nullptr) { nullValue = filter(context, nullValue); } - const uint16_t *index = trie->index; - - int32_t prevI3Block = -1; - int32_t prevBlock = -1; - UChar32 c = start; - uint32_t value; - bool haveValue = false; - do { - int32_t i3Block; - int32_t i3; - int32_t i3BlockLength; - int32_t dataBlockLength; - if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) { - i3Block = 0; - i3 = c >> UCPTRIE_FAST_SHIFT; - i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ? - UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; - dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; - } else { - // Use the multi-stage index. - int32_t i1 = c >> UCPTRIE_SHIFT_1; - if (trie->type == UCPTRIE_TYPE_FAST) { - U_ASSERT(0xffff < c && c < trie->highStart); - i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; - } else { - U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); - i1 += UCPTRIE_SMALL_INDEX_LENGTH; - } - i3Block = trie->index[ - (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; - if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) { - // The index-3 block is the same as the previous one, and filled with value. - U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); - c += UCPTRIE_CP_PER_INDEX_2_ENTRY; - continue; - } - prevI3Block = i3Block; - if (i3Block == trie->index3NullOffset) { - // This is the index-3 null block. - if (haveValue) { - if (nullValue != value) { - return c - 1; - } - } else { - value = nullValue; - if (pValue != nullptr) { *pValue = nullValue; } - haveValue = true; - } - prevBlock = trie->dataNullOffset; - c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); - continue; - } - i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; - i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH; - dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - } - // Enumerate data blocks for one index-3 block. - do { - int32_t block; - if ((i3Block & 0x8000) == 0) { - block = index[i3Block + i3]; - } else { - // 18-bit indexes stored in groups of 9 entries per 8 indexes. - int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); - int32_t gi = i3 & 7; - block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000; - block |= index[group + gi]; - } - if (block == prevBlock && (c - start) >= dataBlockLength) { - // The block is the same as the previous one, and filled with value. - U_ASSERT((c & (dataBlockLength - 1)) == 0); - c += dataBlockLength; - } else { - int32_t dataMask = dataBlockLength - 1; - prevBlock = block; - if (block == trie->dataNullOffset) { - // This is the data null block. - if (haveValue) { - if (nullValue != value) { - return c - 1; - } - } else { - value = nullValue; - if (pValue != nullptr) { *pValue = nullValue; } - haveValue = true; - } - c = (c + dataBlockLength) & ~dataMask; - } else { - int32_t di = block + (c & dataMask); - uint32_t value2 = getValue(trie->data, valueWidth, di); - value2 = maybeFilterValue(value2, trie->nullValue, nullValue, - filter, context); - if (haveValue) { - if (value2 != value) { - return c - 1; - } - } else { - value = value2; - if (pValue != nullptr) { *pValue = value; } - haveValue = true; - } - while ((++c & dataMask) != 0) { - if (maybeFilterValue(getValue(trie->data, valueWidth, ++di), - trie->nullValue, nullValue, - filter, context) != value) { - return c - 1; - } - } - } - } - } while (++i3 < i3BlockLength); - } while (c < trie->highStart); - U_ASSERT(haveValue); - int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - uint32_t highValue = getValue(trie->data, valueWidth, di); - if (maybeFilterValue(highValue, trie->nullValue, nullValue, - filter, context) != value) { - return c - 1; - } else { - return MAX_UNICODE; - } -} - -} // namespace - -U_CFUNC UChar32 -ucptrie_internalGetRange(UCPTrieGetRange *getRange, - const void *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - if (option == UCPMAP_RANGE_NORMAL) { - return getRange(trie, start, filter, context, pValue); - } - uint32_t value; - if (pValue == nullptr) { - // We need to examine the range value even if the caller does not want it. - pValue = &value; - } - UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; - UChar32 end = getRange(trie, start, filter, context, pValue); - if (end < 0xd7ff || start > surrEnd) { - return end; - } - // The range overlaps with surrogates, or ends just before the first one. - if (*pValue == surrogateValue) { - if (end >= surrEnd) { - // Surrogates followed by a non-surrogateValue range, - // or surrogates are part of a larger surrogateValue range. - return end; - } - } else { - if (start <= 0xd7ff) { - return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates. - } - // Start is a surrogate with a non-surrogateValue code *unit* value. - // Return a surrogateValue code *point* range. - *pValue = surrogateValue; - if (end > surrEnd) { - return surrEnd; // Surrogate range ends before non-surrogateValue rest of range. - } - } - // See if the surrogateValue surrogate range can be merged with - // an immediately following range. - uint32_t value2; - UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2); - if (value2 == surrogateValue) { - return end2; - } - return surrEnd; -} - -U_CAPI UChar32 U_EXPORT2 -ucptrie_getRange(const UCPTrie *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - return ucptrie_internalGetRange(getRange, trie, start, - option, surrogateValue, - filter, context, pValue); -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_toBinary(const UCPTrie *trie, - void *data, int32_t capacity, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - - UCPTrieType type = (UCPTrieType)trie->type; - UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; - if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || - valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth || - capacity < 0 || - (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2; - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - length += trie->dataLength * 2; - break; - case UCPTRIE_VALUE_BITS_32: - length += trie->dataLength * 4; - break; - case UCPTRIE_VALUE_BITS_8: - length += trie->dataLength; - break; - default: - // unreachable - break; - } - if (capacity < length) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return length; - } - - char *bytes = (char *)data; - UCPTrieHeader *header = (UCPTrieHeader *)bytes; - header->signature = UCPTRIE_SIG; // "Tri3" - header->options = (uint16_t)( - ((trie->dataLength & 0xf0000) >> 4) | - ((trie->dataNullOffset & 0xf0000) >> 8) | - (trie->type << 6) | - valueWidth); - header->indexLength = (uint16_t)trie->indexLength; - header->dataLength = (uint16_t)trie->dataLength; - header->index3NullOffset = trie->index3NullOffset; - header->dataNullOffset = (uint16_t)trie->dataNullOffset; - header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2; - bytes += sizeof(UCPTrieHeader); - - uprv_memcpy(bytes, trie->index, trie->indexLength * 2); - bytes += trie->indexLength * 2; - - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2); - break; - case UCPTRIE_VALUE_BITS_32: - uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4); - break; - case UCPTRIE_VALUE_BITS_8: - uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength); - break; - default: - // unreachable - break; - } - return length; -} - -namespace { - -#ifdef UCPTRIE_DEBUG -long countNull(const UCPTrie *trie) { - uint32_t nullValue=trie->nullValue; - int32_t length=trie->dataLength; - long count=0; - switch (trie->valueWidth) { - case UCPTRIE_VALUE_BITS_16: - for(int32_t i=0; idata.ptr16[i]==nullValue) { ++count; } - } - break; - case UCPTRIE_VALUE_BITS_32: - for(int32_t i=0; idata.ptr32[i]==nullValue) { ++count; } - } - break; - case UCPTRIE_VALUE_BITS_8: - for(int32_t i=0; idata.ptr8[i]==nullValue) { ++count; } - } - break; - default: - // unreachable - break; - } - return count; -} - -U_CFUNC void -ucptrie_printLengths(const UCPTrie *trie, const char *which) { - long indexLength=trie->indexLength; - long dataLength=(long)trie->dataLength; - long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+ - dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 : - trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1); - printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n", - which, trie->name, indexLength, dataLength, countNull(trie), totalLength); -} -#endif - -} // namespace - -// UCPMap ---- -// Initially, this is the same as UCPTrie. This may well change. - -U_CAPI uint32_t U_EXPORT2 -ucpmap_get(const UCPMap *map, UChar32 c) { - return ucptrie_get(reinterpret_cast(map), c); -} - -U_CAPI UChar32 U_EXPORT2 -ucpmap_getRange(const UCPMap *map, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - return ucptrie_getRange(reinterpret_cast(map), start, - option, surrogateValue, - filter, context, pValue); -} diff --git a/deps/node/deps/icu-small/source/common/ucptrie_impl.h b/deps/node/deps/icu-small/source/common/ucptrie_impl.h deleted file mode 100644 index 1fe6a18a..00000000 --- a/deps/node/deps/icu-small/source/common/ucptrie_impl.h +++ /dev/null @@ -1,289 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// ucptrie_impl.h (modified from utrie2_impl.h) -// created: 2017dec29 Markus W. Scherer - -#ifndef __UCPTRIE_IMPL_H__ -#define __UCPTRIE_IMPL_H__ - -#include "unicode/ucptrie.h" -#ifdef UCPTRIE_DEBUG -#include "unicode/umutablecptrie.h" -#endif - -// UCPTrie signature values, in platform endianness and opposite endianness. -// The UCPTrie signature ASCII byte values spell "Tri3". -#define UCPTRIE_SIG 0x54726933 -#define UCPTRIE_OE_SIG 0x33697254 - -/** - * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie. - * @internal - */ -struct UCPTrieHeader { - /** "Tri3" in big-endian US-ASCII (0x54726933) */ - uint32_t signature; - - /** - * Options bit field: - * Bits 15..12: Data length bits 19..16. - * Bits 11..8: Data null block offset bits 19..16. - * Bits 7..6: UCPTrieType - * Bits 5..3: Reserved (0). - * Bits 2..0: UCPTrieValueWidth - */ - uint16_t options; - - /** Total length of the index tables. */ - uint16_t indexLength; - - /** Data length bits 15..0. */ - uint16_t dataLength; - - /** Index-3 null block offset, 0x7fff or 0xffff if none. */ - uint16_t index3NullOffset; - - /** Data null block offset bits 15..0, 0xfffff if none. */ - uint16_t dataNullOffset; - - /** - * First code point of the single-value range ending with U+10ffff, - * rounded up and then shifted right by UCPTRIE_SHIFT_2. - */ - uint16_t shiftedHighStart; -}; - -/** - * Constants for use with UCPTrieHeader.options. - * @internal - */ -enum { - UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000, - UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00, - UCPTRIE_OPTIONS_RESERVED_MASK = 0x38, - UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7, - /** - * Value for index3NullOffset which indicates that there is no index-3 null block. - * Bit 15 is unused for this value because this bit is used if the index-3 contains - * 18-bit indexes. - */ - UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff, - UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff -}; - -// Internal constants. -enum { - /** The length of the BMP index table. 1024=0x400 */ - UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT, - - UCPTRIE_SMALL_LIMIT = 0x1000, - UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT, - - /** Shift size for getting the index-3 table offset. */ - UCPTRIE_SHIFT_3 = 4, - - /** Shift size for getting the index-2 table offset. */ - UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3, - - /** Shift size for getting the index-1 table offset. */ - UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2, - - /** - * Difference between two shift sizes, - * for getting an index-2 offset from an index-3 offset. 5=9-4 - */ - UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3, - - /** - * Difference between two shift sizes, - * for getting an index-1 offset from an index-2 offset. 5=14-9 - */ - UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2, - - /** - * Number of index-1 entries for the BMP. (4) - * This part of the index-1 table is omitted from the serialized form. - */ - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1, - - /** Number of entries in an index-2 block. 32=0x20 */ - UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2, - - /** Mask for getting the lower bits for the in-index-2-block offset. */ - UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1, - - /** Number of code points per index-2 table entry. 512=0x200 */ - UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2, - - /** Number of entries in an index-3 block. 32=0x20 */ - UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3, - - /** Mask for getting the lower bits for the in-index-3-block offset. */ - UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1, - - /** Number of entries in a small data block. 16=0x10 */ - UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3, - - /** Mask for getting the lower bits for the in-small-data-block offset. */ - UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1 -}; - -typedef UChar32 -UCPTrieGetRange(const void *trie, UChar32 start, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue); - -U_CFUNC UChar32 -ucptrie_internalGetRange(UCPTrieGetRange *getRange, - const void *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue); - -#ifdef UCPTRIE_DEBUG -U_CFUNC void -ucptrie_printLengths(const UCPTrie *trie, const char *which); - -U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name); -#endif - -/* - * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie. - * For overview information see http://site.icu-project.org/design/struct/utrie - * - * The binary trie data should be 32-bit-aligned. - * The overall layout is: - * - * UCPTrieHeader header; -- 16 bytes, see struct definition above - * uint16_t index[header.indexLength]; - * uintXY_t data[header.dataLength]; - * - * The trie data array is an array of uint16_t, uint32_t, or uint8_t, - * specified via the UCPTrieValueWidth when building the trie. - * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned. - * The overall length of the trie data is a multiple of 4 bytes. - * (Padding is added at the end of the index array and/or near the end of the data array as needed.) - * - * The length of the data array (dataLength) is stored as an integer split across two fields - * of the header struct (high bits in header.options). - * - * The trie type can be "fast" or "small" which determines the index structure, - * specified via the UCPTrieType when building the trie. - * - * The type and valueWidth are stored in the header.options. - * There are reserved type and valueWidth values, and reserved header.options bits. - * They could be used in future format extensions. - * Code reading the trie structure must fail with an error when unknown values or options are set. - * - * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array. - * - * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup. - * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000. - * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000. - * - * All code points in the range highStart..U+10FFFF map to a single highValue - * which is stored at the second-to-last position of the data array. - * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.) - * The highStart value is header.shiftedHighStart<>UCPTRIE_SHIFT_1. - * (For 0x100000 supplementary code points U+10000..U+10ffff.) - * - * After this index-1 table follow the variable-length index-3 and index-2 tables. - * - * The supplementary index tables are omitted completely - * if there is only BMP data (highStart<=U+10000). - * - * For a "small" trie: - * - * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF. - * - * The "supplementary" index tables are always stored. - * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1. - * - * For both trie types: - * - * The last index-2 block may be a partial block, storing indexes only for code points - * below highStart. - * - * Lookup for ASCII code point c: - * - * Linear access from the start of the data array. - * - * value = data[c]; - * - * Lookup for fast-range code point c: - * - * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits, - * fetch the index array value at that offset, - * add the lower code point bits, index into the data array. - * - * value = data[index[c>>6] + (c&0x3f)]; - * - * (This works for ASCII as well.) - * - * Lookup for small-range code point c below highStart: - * - * Split the code point into four bit fields using several sets of shifts & masks - * to read consecutive values from the index-1, index-2, index-3 and data tables. - * - * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff), - * then the data block offsets are stored directly as uint16_t. - * - * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block - * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by - * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored - * in the additional word. - * - * See ucptrie_internalSmallIndex() for details. - * - * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.) - * - * Compaction: - * - * Multiple code point ranges ("blocks") that are aligned on certain boundaries - * (determined by the shifting/bit fields of code points) and - * map to the same data values normally share a single subsequence of the data array. - * Data blocks can also overlap partially. - * (Depending on the builder code finding duplicate and overlapping blocks.) - * - * Iteration over same-value ranges: - * - * Range iteration (ucptrie_getRange()) walks the structure from a start code point - * until some code point is found that maps to a different value; - * the end of the returned range is just before that. - * - * The header.dataNullOffset (split across two header fields, high bits in header.options) - * is the offset of a widely shared data block filled with one single value. - * It helps quickly skip over large ranges of data with that value. - * The builder must ensure that if the start of any data block (fast or small) - * matches the dataNullOffset, then the whole block must be filled with the null value. - * Special care must be taken if there is no fast null data block - * but a small one, which is shorter, and it matches the *start* of some fast data block. - * - * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block - * where all index entries point to the dataNullOffset. - * If there is no such data or index-3 block, then these offsets are set to - * values that cannot be reached (data offset out of range/reserved index offset), - * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively. - */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/ucurr.cpp b/deps/node/deps/icu-small/source/common/ucurr.cpp deleted file mode 100644 index 5c9bbef7..00000000 --- a/deps/node/deps/icu-small/source/common/ucurr.cpp +++ /dev/null @@ -1,2731 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/ucurr.h" -#include "unicode/locid.h" -#include "unicode/ures.h" -#include "unicode/ustring.h" -#include "unicode/parsepos.h" -#include "unicode/uniset.h" -#include "unicode/usetiter.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" -#include "charstr.h" -#include "cmemory.h" -#include "cstring.h" -#include "static_unicode_sets.h" -#include "uassert.h" -#include "umutex.h" -#include "ucln_cmn.h" -#include "uenumimp.h" -#include "uhash.h" -#include "hash.h" -#include "uinvchar.h" -#include "uresimp.h" -#include "ulist.h" -#include "uresimp.h" -#include "ureslocs.h" -#include "ulocimp.h" - -using namespace icu; - -//#define UCURR_DEBUG_EQUIV 1 -#ifdef UCURR_DEBUG_EQUIV -#include "stdio.h" -#endif -//#define UCURR_DEBUG 1 -#ifdef UCURR_DEBUG -#include "stdio.h" -#endif - -typedef struct IsoCodeEntry { - const UChar *isoCode; /* const because it's a reference to a resource bundle string. */ - UDate from; - UDate to; -} IsoCodeEntry; - -//------------------------------------------------------------ -// Constants - -// Default currency meta data of last resort. We try to use the -// defaults encoded in the meta data resource bundle. If there is a -// configuration/build error and these are not available, we use these -// hard-coded defaults (which should be identical). -static const int32_t LAST_RESORT_DATA[] = { 2, 0, 2, 0 }; - -// POW10[i] = 10^i, i=0..MAX_POW10 -static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000, - 1000000, 10000000, 100000000, 1000000000 }; - -static const int32_t MAX_POW10 = UPRV_LENGTHOF(POW10) - 1; - -#define ISO_CURRENCY_CODE_LENGTH 3 - -//------------------------------------------------------------ -// Resource tags -// - -static const char CURRENCY_DATA[] = "supplementalData"; -// Tag for meta-data, in root. -static const char CURRENCY_META[] = "CurrencyMeta"; - -// Tag for map from countries to currencies, in root. -static const char CURRENCY_MAP[] = "CurrencyMap"; - -// Tag for default meta-data, in CURRENCY_META -static const char DEFAULT_META[] = "DEFAULT"; - -// Variant for legacy pre-euro mapping in CurrencyMap -static const char VAR_PRE_EURO[] = "PREEURO"; - -// Variant for legacy euro mapping in CurrencyMap -static const char VAR_EURO[] = "EURO"; - -// Variant delimiter -static const char VAR_DELIM = '_'; -static const char VAR_DELIM_STR[] = "_"; - -// Variant for legacy euro mapping in CurrencyMap -//static const char VAR_DELIM_EURO[] = "_EURO"; - -#define VARIANT_IS_EMPTY 0 -#define VARIANT_IS_EURO 0x1 -#define VARIANT_IS_PREEURO 0x2 - -// Tag for localized display names (symbols) of currencies -static const char CURRENCIES[] = "Currencies"; -static const char CURRENCIES_NARROW[] = "Currencies%narrow"; -static const char CURRENCYPLURALS[] = "CurrencyPlurals"; - -static const UChar EUR_STR[] = {0x0045,0x0055,0x0052,0}; - -// ISO codes mapping table -static const UHashtable* gIsoCodes = NULL; -static icu::UInitOnce gIsoCodesInitOnce = U_INITONCE_INITIALIZER; - -// Currency symbol equivalances -static const icu::Hashtable* gCurrSymbolsEquiv = NULL; -static icu::UInitOnce gCurrSymbolsEquivInitOnce = U_INITONCE_INITIALIZER; - -U_NAMESPACE_BEGIN - -// EquivIterator iterates over all strings that are equivalent to a given -// string, s. Note that EquivIterator will never yield s itself. -class EquivIterator : public icu::UMemory { -public: - // Constructor. hash stores the equivalence relationships; s is the string - // for which we find equivalent strings. - inline EquivIterator(const icu::Hashtable& hash, const icu::UnicodeString& s) - : _hash(hash) { - _start = _current = &s; - } - inline ~EquivIterator() { } - - // next returns the next equivalent string or NULL if there are no more. - // If s has no equivalent strings, next returns NULL on the first call. - const icu::UnicodeString *next(); -private: - const icu::Hashtable& _hash; - const icu::UnicodeString* _start; - const icu::UnicodeString* _current; -}; - -const icu::UnicodeString * -EquivIterator::next() { - const icu::UnicodeString* _next = (const icu::UnicodeString*) _hash.get(*_current); - if (_next == NULL) { - U_ASSERT(_current == _start); - return NULL; - } - if (*_next == *_start) { - return NULL; - } - _current = _next; - return _next; -} - -U_NAMESPACE_END - -// makeEquivalent makes lhs and rhs equivalent by updating the equivalence -// relations in hash accordingly. -static void makeEquivalent( - const icu::UnicodeString &lhs, - const icu::UnicodeString &rhs, - icu::Hashtable* hash, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - if (lhs == rhs) { - // already equivalent - return; - } - icu::EquivIterator leftIter(*hash, lhs); - icu::EquivIterator rightIter(*hash, rhs); - const icu::UnicodeString *firstLeft = leftIter.next(); - const icu::UnicodeString *firstRight = rightIter.next(); - const icu::UnicodeString *nextLeft = firstLeft; - const icu::UnicodeString *nextRight = firstRight; - while (nextLeft != NULL && nextRight != NULL) { - if (*nextLeft == rhs || *nextRight == lhs) { - // Already equivalent - return; - } - nextLeft = leftIter.next(); - nextRight = rightIter.next(); - } - // Not equivalent. Must join. - icu::UnicodeString *newFirstLeft; - icu::UnicodeString *newFirstRight; - if (firstRight == NULL && firstLeft == NULL) { - // Neither lhs or rhs belong to an equivalence circle, so we form - // a new equivalnce circle of just lhs and rhs. - newFirstLeft = new icu::UnicodeString(rhs); - newFirstRight = new icu::UnicodeString(lhs); - } else if (firstRight == NULL) { - // lhs belongs to an equivalence circle, but rhs does not, so we link - // rhs into lhs' circle. - newFirstLeft = new icu::UnicodeString(rhs); - newFirstRight = new icu::UnicodeString(*firstLeft); - } else if (firstLeft == NULL) { - // rhs belongs to an equivlance circle, but lhs does not, so we link - // lhs into rhs' circle. - newFirstLeft = new icu::UnicodeString(*firstRight); - newFirstRight = new icu::UnicodeString(lhs); - } else { - // Both lhs and rhs belong to different equivalnce circles. We link - // them together to form one single, larger equivalnce circle. - newFirstLeft = new icu::UnicodeString(*firstRight); - newFirstRight = new icu::UnicodeString(*firstLeft); - } - if (newFirstLeft == NULL || newFirstRight == NULL) { - delete newFirstLeft; - delete newFirstRight; - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - hash->put(lhs, (void *) newFirstLeft, status); - hash->put(rhs, (void *) newFirstRight, status); -} - -// countEquivalent counts how many strings are equivalent to s. -// hash stores all the equivalnce relations. -// countEquivalent does not include s itself in the count. -static int32_t countEquivalent(const icu::Hashtable &hash, const icu::UnicodeString &s) { - int32_t result = 0; - icu::EquivIterator iter(hash, s); - while (iter.next() != NULL) { - ++result; - } -#ifdef UCURR_DEBUG_EQUIV - { - char tmp[200]; - s.extract(0,s.length(),tmp, "UTF-8"); - printf("CountEquivalent('%s') = %d\n", tmp, result); - } -#endif - return result; -} - -static const icu::Hashtable* getCurrSymbolsEquiv(); - -//------------------------------------------------------------ -// Code - -/** - * Cleanup callback func - */ -static UBool U_CALLCONV -isoCodes_cleanup(void) -{ - if (gIsoCodes != NULL) { - uhash_close(const_cast(gIsoCodes)); - gIsoCodes = NULL; - } - gIsoCodesInitOnce.reset(); - return TRUE; -} - -/** - * Cleanup callback func - */ -static UBool U_CALLCONV -currSymbolsEquiv_cleanup(void) -{ - delete const_cast(gCurrSymbolsEquiv); - gCurrSymbolsEquiv = NULL; - gCurrSymbolsEquivInitOnce.reset(); - return TRUE; -} - -/** - * Deleter for OlsonToMetaMappingEntry - */ -static void U_CALLCONV -deleteIsoCodeEntry(void *obj) { - IsoCodeEntry *entry = (IsoCodeEntry*)obj; - uprv_free(entry); -} - -/** - * Deleter for gCurrSymbolsEquiv. - */ -static void U_CALLCONV -deleteUnicode(void *obj) { - icu::UnicodeString *entry = (icu::UnicodeString*)obj; - delete entry; -} - -/** - * Unfortunately, we have to convert the UChar* currency code to char* - * to use it as a resource key. - */ -static inline char* -myUCharsToChars(char* resultOfLen4, const UChar* currency) { - u_UCharsToChars(currency, resultOfLen4, ISO_CURRENCY_CODE_LENGTH); - resultOfLen4[ISO_CURRENCY_CODE_LENGTH] = 0; - return resultOfLen4; -} - -/** - * Internal function to look up currency data. Result is an array of - * four integers. The first is the fraction digits. The second is the - * rounding increment, or 0 if none. The rounding increment is in - * units of 10^(-fraction_digits). The third and fourth are the same - * except that they are those used in cash transations ( cashDigits - * and cashRounding ). - */ -static const int32_t* -_findMetaData(const UChar* currency, UErrorCode& ec) { - - if (currency == 0 || *currency == 0) { - if (U_SUCCESS(ec)) { - ec = U_ILLEGAL_ARGUMENT_ERROR; - } - return LAST_RESORT_DATA; - } - - // Get CurrencyMeta resource out of root locale file. [This may - // move out of the root locale file later; if it does, update this - // code.] - UResourceBundle* currencyData = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &ec); - UResourceBundle* currencyMeta = ures_getByKey(currencyData, CURRENCY_META, currencyData, &ec); - - if (U_FAILURE(ec)) { - ures_close(currencyMeta); - // Config/build error; return hard-coded defaults - return LAST_RESORT_DATA; - } - - // Look up our currency, or if that's not available, then DEFAULT - char buf[ISO_CURRENCY_CODE_LENGTH+1]; - UErrorCode ec2 = U_ZERO_ERROR; // local error code: soft failure - UResourceBundle* rb = ures_getByKey(currencyMeta, myUCharsToChars(buf, currency), NULL, &ec2); - if (U_FAILURE(ec2)) { - ures_close(rb); - rb = ures_getByKey(currencyMeta,DEFAULT_META, NULL, &ec); - if (U_FAILURE(ec)) { - ures_close(currencyMeta); - ures_close(rb); - // Config/build error; return hard-coded defaults - return LAST_RESORT_DATA; - } - } - - int32_t len; - const int32_t *data = ures_getIntVector(rb, &len, &ec); - if (U_FAILURE(ec) || len != 4) { - // Config/build error; return hard-coded defaults - if (U_SUCCESS(ec)) { - ec = U_INVALID_FORMAT_ERROR; - } - ures_close(currencyMeta); - ures_close(rb); - return LAST_RESORT_DATA; - } - - ures_close(currencyMeta); - ures_close(rb); - return data; -} - -// ------------------------------------- - -/** - * @see VARIANT_IS_EURO - * @see VARIANT_IS_PREEURO - */ -static uint32_t -idForLocale(const char* locale, char* countryAndVariant, int capacity, UErrorCode* ec) -{ - uint32_t variantType = 0; - // !!! this is internal only, assumes buffer is not null and capacity is sufficient - // Extract the country name and variant name. We only - // recognize two variant names, EURO and PREEURO. - char variant[ULOC_FULLNAME_CAPACITY]; - ulocimp_getRegionForSupplementalData(locale, FALSE, countryAndVariant, capacity, ec); - uloc_getVariant(locale, variant, sizeof(variant), ec); - if (variant[0] != 0) { - variantType = (uint32_t)(0 == uprv_strcmp(variant, VAR_EURO)) - | ((uint32_t)(0 == uprv_strcmp(variant, VAR_PRE_EURO)) << 1); - if (variantType) - { - uprv_strcat(countryAndVariant, VAR_DELIM_STR); - uprv_strcat(countryAndVariant, variant); - } - } - return variantType; -} - -// ------------------------------------------ -// -// Registration -// -//------------------------------------------- - -// don't use ICUService since we don't need fallback - -U_CDECL_BEGIN -static UBool U_CALLCONV currency_cleanup(void); -U_CDECL_END - -#if !UCONFIG_NO_SERVICE -struct CReg; - -static UMutex gCRegLock = U_MUTEX_INITIALIZER; -static CReg* gCRegHead = 0; - -struct CReg : public icu::UMemory { - CReg *next; - UChar iso[ISO_CURRENCY_CODE_LENGTH+1]; - char id[ULOC_FULLNAME_CAPACITY]; - - CReg(const UChar* _iso, const char* _id) - : next(0) - { - int32_t len = (int32_t)uprv_strlen(_id); - if (len > (int32_t)(sizeof(id)-1)) { - len = (sizeof(id)-1); - } - uprv_strncpy(id, _id, len); - id[len] = 0; - u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH); - iso[ISO_CURRENCY_CODE_LENGTH] = 0; - } - - static UCurrRegistryKey reg(const UChar* _iso, const char* _id, UErrorCode* status) - { - if (status && U_SUCCESS(*status) && _iso && _id) { - CReg* n = new CReg(_iso, _id); - if (n) { - umtx_lock(&gCRegLock); - if (!gCRegHead) { - /* register for the first time */ - ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); - } - n->next = gCRegHead; - gCRegHead = n; - umtx_unlock(&gCRegLock); - return n; - } - *status = U_MEMORY_ALLOCATION_ERROR; - } - return 0; - } - - static UBool unreg(UCurrRegistryKey key) { - UBool found = FALSE; - umtx_lock(&gCRegLock); - - CReg** p = &gCRegHead; - while (*p) { - if (*p == key) { - *p = ((CReg*)key)->next; - delete (CReg*)key; - found = TRUE; - break; - } - p = &((*p)->next); - } - - umtx_unlock(&gCRegLock); - return found; - } - - static const UChar* get(const char* id) { - const UChar* result = NULL; - umtx_lock(&gCRegLock); - CReg* p = gCRegHead; - - /* register cleanup of the mutex */ - ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); - while (p) { - if (uprv_strcmp(id, p->id) == 0) { - result = p->iso; - break; - } - p = p->next; - } - umtx_unlock(&gCRegLock); - return result; - } - - /* This doesn't need to be thread safe. It's for u_cleanup only. */ - static void cleanup(void) { - while (gCRegHead) { - CReg* n = gCRegHead; - gCRegHead = gCRegHead->next; - delete n; - } - } -}; - -// ------------------------------------- - -U_CAPI UCurrRegistryKey U_EXPORT2 -ucurr_register(const UChar* isoCode, const char* locale, UErrorCode *status) -{ - if (status && U_SUCCESS(*status)) { - char id[ULOC_FULLNAME_CAPACITY]; - idForLocale(locale, id, sizeof(id), status); - return CReg::reg(isoCode, id, status); - } - return NULL; -} - -// ------------------------------------- - -U_CAPI UBool U_EXPORT2 -ucurr_unregister(UCurrRegistryKey key, UErrorCode* status) -{ - if (status && U_SUCCESS(*status)) { - return CReg::unreg(key); - } - return FALSE; -} -#endif /* UCONFIG_NO_SERVICE */ - -// ------------------------------------- - -/** - * Release all static memory held by currency. - */ -/*The declaration here is needed so currency_cleanup(void) - * can call this function. - */ -static UBool U_CALLCONV -currency_cache_cleanup(void); - -U_CDECL_BEGIN -static UBool U_CALLCONV currency_cleanup(void) { -#if !UCONFIG_NO_SERVICE - CReg::cleanup(); -#endif - /* - * There might be some cached currency data or isoCodes data. - */ - currency_cache_cleanup(); - isoCodes_cleanup(); - currSymbolsEquiv_cleanup(); - - return TRUE; -} -U_CDECL_END - -// ------------------------------------- - -U_CAPI int32_t U_EXPORT2 -ucurr_forLocale(const char* locale, - UChar* buff, - int32_t buffCapacity, - UErrorCode* ec) { - if (U_FAILURE(*ec)) { return 0; } - if (buffCapacity < 0 || (buff == nullptr && buffCapacity > 0)) { - *ec = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - char currency[4]; // ISO currency codes are alpha3 codes. - UErrorCode localStatus = U_ZERO_ERROR; - int32_t resLen = uloc_getKeywordValue(locale, "currency", - currency, UPRV_LENGTHOF(currency), &localStatus); - if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) { - if (resLen < buffCapacity) { - T_CString_toUpperCase(currency); - u_charsToUChars(currency, buff, resLen); - } - return u_terminateUChars(buff, buffCapacity, resLen, ec); - } - - // get country or country_variant in `id' - char id[ULOC_FULLNAME_CAPACITY]; - uint32_t variantType = idForLocale(locale, id, UPRV_LENGTHOF(id), ec); - if (U_FAILURE(*ec)) { - return 0; - } - -#if !UCONFIG_NO_SERVICE - const UChar* result = CReg::get(id); - if (result) { - if(buffCapacity > u_strlen(result)) { - u_strcpy(buff, result); - } - resLen = u_strlen(result); - return u_terminateUChars(buff, buffCapacity, resLen, ec); - } -#endif - // Remove variants, which is only needed for registration. - char *idDelim = uprv_strchr(id, VAR_DELIM); - if (idDelim) { - idDelim[0] = 0; - } - - const UChar* s = NULL; // Currency code from data file. - if (id[0] == 0) { - // No point looking in the data for an empty string. - // This is what we would get. - localStatus = U_MISSING_RESOURCE_ERROR; - } else { - // Look up the CurrencyMap element in the root bundle. - localStatus = U_ZERO_ERROR; - UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); - UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); - UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); - UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus); - s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus); - - // Get the second item when PREEURO is requested, and this is a known Euro country. - // If the requested variant is PREEURO, and this isn't a Euro country, - // assume that the country changed over to the Euro in the future. - // This is probably an old version of ICU that hasn't been updated yet. - // The latest currency is probably correct. - if (U_SUCCESS(localStatus)) { - if ((variantType & VARIANT_IS_PREEURO) && u_strcmp(s, EUR_STR) == 0) { - currencyReq = ures_getByIndex(countryArray, 1, currencyReq, &localStatus); - s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus); - } else if ((variantType & VARIANT_IS_EURO)) { - s = EUR_STR; - } - } - ures_close(currencyReq); - ures_close(countryArray); - } - - if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) { - // We don't know about it. Check to see if we support the variant. - uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec); - *ec = U_USING_FALLBACK_WARNING; - // TODO: Loop over the shortened id rather than recursing and - // looking again for a currency keyword. - return ucurr_forLocale(id, buff, buffCapacity, ec); - } - if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) { - // There is nothing to fallback to. Report the failure/warning if possible. - *ec = localStatus; - } - if (U_SUCCESS(*ec)) { - if(buffCapacity > resLen) { - u_strcpy(buff, s); - } - } - return u_terminateUChars(buff, buffCapacity, resLen, ec); -} - -// end registration - -/** - * Modify the given locale name by removing the rightmost _-delimited - * element. If there is none, empty the string ("" == root). - * NOTE: The string "root" is not recognized; do not use it. - * @return TRUE if the fallback happened; FALSE if locale is already - * root (""). - */ -static UBool fallback(char *loc) { - if (!*loc) { - return FALSE; - } - UErrorCode status = U_ZERO_ERROR; - if (uprv_strcmp(loc, "en_GB") == 0) { - // HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en" - // in order to consume the correct data strings. This hack will be removed - // when proper data sink loading is implemented here. - // NOTE: "001" adds 1 char over "GB". However, both call sites allocate - // arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001). - uprv_strcpy(loc + 3, "001"); - } else { - uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status); - } - /* - char *i = uprv_strrchr(loc, '_'); - if (i == NULL) { - i = loc; - } - *i = 0; - */ - return TRUE; -} - - -U_CAPI const UChar* U_EXPORT2 -ucurr_getName(const UChar* currency, - const char* locale, - UCurrNameStyle nameStyle, - UBool* isChoiceFormat, // fillin - int32_t* len, // fillin - UErrorCode* ec) { - - // Look up the Currencies resource for the given locale. The - // Currencies locale data looks like this: - //|en { - //| Currencies { - //| USD { "US$", "US Dollar" } - //| CHF { "Sw F", "Swiss Franc" } - //| INR { "=0#Rs|1#Re|1 2) { - *ec = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - // In the future, resource bundles may implement multi-level - // fallback. That is, if a currency is not found in the en_US - // Currencies data, then the en Currencies data will be searched. - // Currently, if a Currencies datum exists in en_US and en, the - // en_US entry hides that in en. - - // We want multi-level fallback for this resource, so we implement - // it manually. - - // Use a separate UErrorCode here that does not propagate out of - // this function. - UErrorCode ec2 = U_ZERO_ERROR; - - char loc[ULOC_FULLNAME_CAPACITY]; - uloc_getName(locale, loc, sizeof(loc), &ec2); - if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) { - *ec = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - char buf[ISO_CURRENCY_CODE_LENGTH+1]; - myUCharsToChars(buf, currency); - - /* Normalize the keyword value to uppercase */ - T_CString_toUpperCase(buf); - - const UChar* s = NULL; - ec2 = U_ZERO_ERROR; - LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2)); - - if (nameStyle == UCURR_NARROW_SYMBOL_NAME) { - CharString key; - key.append(CURRENCIES_NARROW, ec2); - key.append("/", ec2); - key.append(buf, ec2); - s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2); - } else { - ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2); - ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2); - s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2); - } - - // If we've succeeded we're done. Otherwise, try to fallback. - // If that fails (because we are already at root) then exit. - if (U_SUCCESS(ec2)) { - if (ec2 == U_USING_DEFAULT_WARNING - || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) { - *ec = ec2; - } - } - - // We no longer support choice format data in names. Data should not contain - // choice patterns. - *isChoiceFormat = FALSE; - if (U_SUCCESS(ec2)) { - U_ASSERT(s != NULL); - return s; - } - - // If we fail to find a match, use the ISO 4217 code - *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...? - *ec = U_USING_DEFAULT_WARNING; - return currency; -} - -U_CAPI const UChar* U_EXPORT2 -ucurr_getPluralName(const UChar* currency, - const char* locale, - UBool* isChoiceFormat, - const char* pluralCount, - int32_t* len, // fillin - UErrorCode* ec) { - // Look up the Currencies resource for the given locale. The - // Currencies locale data looks like this: - //|en { - //| CurrencyPlurals { - //| USD{ - //| one{"US dollar"} - //| other{"US dollars"} - //| } - //| } - //|} - - if (U_FAILURE(*ec)) { - return 0; - } - - // Use a separate UErrorCode here that does not propagate out of - // this function. - UErrorCode ec2 = U_ZERO_ERROR; - - char loc[ULOC_FULLNAME_CAPACITY]; - uloc_getName(locale, loc, sizeof(loc), &ec2); - if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) { - *ec = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - char buf[ISO_CURRENCY_CODE_LENGTH+1]; - myUCharsToChars(buf, currency); - - const UChar* s = NULL; - ec2 = U_ZERO_ERROR; - UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2); - - rb = ures_getByKey(rb, CURRENCYPLURALS, rb, &ec2); - - // Fetch resource with multi-level resource inheritance fallback - rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2); - - s = ures_getStringByKeyWithFallback(rb, pluralCount, len, &ec2); - if (U_FAILURE(ec2)) { - // fall back to "other" - ec2 = U_ZERO_ERROR; - s = ures_getStringByKeyWithFallback(rb, "other", len, &ec2); - if (U_FAILURE(ec2)) { - ures_close(rb); - // fall back to long name in Currencies - return ucurr_getName(currency, locale, UCURR_LONG_NAME, - isChoiceFormat, len, ec); - } - } - ures_close(rb); - - // If we've succeeded we're done. Otherwise, try to fallback. - // If that fails (because we are already at root) then exit. - if (U_SUCCESS(ec2)) { - if (ec2 == U_USING_DEFAULT_WARNING - || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) { - *ec = ec2; - } - U_ASSERT(s != NULL); - return s; - } - - // If we fail to find a match, use the ISO 4217 code - *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...? - *ec = U_USING_DEFAULT_WARNING; - return currency; -} - - -//======================================================================== -// Following are structure and function for parsing currency names - -#define NEED_TO_BE_DELETED 0x1 - -// TODO: a better way to define this? -#define MAX_CURRENCY_NAME_LEN 100 - -typedef struct { - const char* IsoCode; // key - UChar* currencyName; // value - int32_t currencyNameLen; // value length - int32_t flag; // flags -} CurrencyNameStruct; - - -#ifndef MIN -#define MIN(a,b) (((a)<(b)) ? (a) : (b)) -#endif - -#ifndef MAX -#define MAX(a,b) (((a)<(b)) ? (b) : (a)) -#endif - - -// Comparason function used in quick sort. -static int U_CALLCONV currencyNameComparator(const void* a, const void* b) { - const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a; - const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b; - for (int32_t i = 0; - i < MIN(currName_1->currencyNameLen, currName_2->currencyNameLen); - ++i) { - if (currName_1->currencyName[i] < currName_2->currencyName[i]) { - return -1; - } - if (currName_1->currencyName[i] > currName_2->currencyName[i]) { - return 1; - } - } - if (currName_1->currencyNameLen < currName_2->currencyNameLen) { - return -1; - } else if (currName_1->currencyNameLen > currName_2->currencyNameLen) { - return 1; - } - return 0; -} - - -// Give a locale, return the maximum number of currency names associated with -// this locale. -// It gets currency names from resource bundles using fallback. -// It is the maximum number because in the fallback chain, some of the -// currency names are duplicated. -// For example, given locale as "en_US", the currency names get from resource -// bundle in "en_US" and "en" are duplicated. The fallback mechanism will count -// all currency names in "en_US" and "en". -static void -getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_t* total_currency_symbol_count) { - U_NAMESPACE_USE - *total_currency_name_count = 0; - *total_currency_symbol_count = 0; - const UChar* s = NULL; - char locale[ULOC_FULLNAME_CAPACITY]; - uprv_strcpy(locale, loc); - const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv(); - for (;;) { - UErrorCode ec2 = U_ZERO_ERROR; - // TODO: ures_openDirect? - UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale, &ec2); - UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, NULL, &ec2); - int32_t n = ures_getSize(curr); - for (int32_t i=0; i(symbol->getBuffer()); - (*currencySymbols)[*total_currency_symbol_count].flag = 0; - (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length(); - } - } - - // Add currency long name. - s = ures_getStringByIndex(names, UCURR_LONG_NAME, &len, &ec2); - (*currencyNames)[*total_currency_name_count].IsoCode = iso; - UChar* upperName = toUpperCase(s, len, locale); - (*currencyNames)[*total_currency_name_count].currencyName = upperName; - (*currencyNames)[*total_currency_name_count].flag = NEED_TO_BE_DELETED; - (*currencyNames)[(*total_currency_name_count)++].currencyNameLen = len; - - // put (iso, 3, and iso) in to array - // Add currency ISO code. - (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; - (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)uprv_malloc(sizeof(UChar)*3); - // Must convert iso[] into Unicode - u_charsToUChars(iso, (*currencySymbols)[*total_currency_symbol_count].currencyName, 3); - (*currencySymbols)[*total_currency_symbol_count].flag = NEED_TO_BE_DELETED; - (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = 3; - - ures_close(names); - } - - // currency plurals - UErrorCode ec5 = U_ZERO_ERROR; - UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec5); - n = ures_getSize(curr_p); - for (int32_t i=0; i= currencyNames[mid].currencyNameLen) { - first = mid + 1; - } else { - if (key > currencyNames[mid].currencyName[indexInCurrencyNames]) { - first = mid + 1; - } - else if (key < currencyNames[mid].currencyName[indexInCurrencyNames]) { - last = mid - 1; - } - else { - // Find a match, and looking for ranges - // Now do two more binary searches. First, on the left side for - // the greatest L such that CurrencyNameStruct[L] < key. - int32_t L = *begin; - int32_t R = mid; - -#ifdef UCURR_DEBUG - printf("mid = %d\n", mid); -#endif - while (L < R) { - int32_t M = (L + R) / 2; -#ifdef UCURR_DEBUG - printf("L = %d, R = %d, M = %d\n", L, R, M); -#endif - if (indexInCurrencyNames >= currencyNames[M].currencyNameLen) { - L = M + 1; - } else { - if (currencyNames[M].currencyName[indexInCurrencyNames] < key) { - L = M + 1; - } else { -#ifdef UCURR_DEBUG - U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key); -#endif - R = M; - } - } - } -#ifdef UCURR_DEBUG - U_ASSERT(L == R); -#endif - *begin = L; -#ifdef UCURR_DEBUG - printf("begin = %d\n", *begin); - U_ASSERT(currencyNames[*begin].currencyName[indexInCurrencyNames] == key); -#endif - - // Now for the second search, finding the least R such that - // key < CurrencyNameStruct[R]. - L = mid; - R = *end; - while (L < R) { - int32_t M = (L + R) / 2; -#ifdef UCURR_DEBUG - printf("L = %d, R = %d, M = %d\n", L, R, M); -#endif - if (currencyNames[M].currencyNameLen < indexInCurrencyNames) { - L = M + 1; - } else { - if (currencyNames[M].currencyName[indexInCurrencyNames] > key) { - R = M; - } else { -#ifdef UCURR_DEBUG - U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key); -#endif - L = M + 1; - } - } - } -#ifdef UCURR_DEBUG - U_ASSERT(L == R); -#endif - if (currencyNames[R].currencyName[indexInCurrencyNames] > key) { - *end = R - 1; - } else { - *end = R; - } -#ifdef UCURR_DEBUG - printf("end = %d\n", *end); -#endif - - // now, found the range. check whether there is exact match - if (currencyNames[*begin].currencyNameLen == indexInCurrencyNames + 1) { - return *begin; // find range and exact match. - } - return -1; // find range, but no exact match. - } - } - } - *begin = -1; - *end = -1; - return -1; // failed to find range. -} - - -// Linear search "text" in "currencyNames". -// @param begin, end: the begin and end index in currencyNames, within which -// range should the search be performed. -// @param textLen: the length of the text to be compared -// @param maxMatchLen(IN/OUT): passing in the computed max matching length -// pass out the new max matching length -// @param maxMatchIndex: the index in currencyName which has the longest -// match with input text. -static void -linearSearch(const CurrencyNameStruct* currencyNames, - int32_t begin, int32_t end, - const UChar* text, int32_t textLen, - int32_t *partialMatchLen, - int32_t *maxMatchLen, int32_t* maxMatchIndex) { - int32_t initialPartialMatchLen = *partialMatchLen; - for (int32_t index = begin; index <= end; ++index) { - int32_t len = currencyNames[index].currencyNameLen; - if (len > *maxMatchLen && len <= textLen && - uprv_memcmp(currencyNames[index].currencyName, text, len * sizeof(UChar)) == 0) { - *partialMatchLen = MAX(*partialMatchLen, len); - *maxMatchIndex = index; - *maxMatchLen = len; -#ifdef UCURR_DEBUG - printf("maxMatchIndex = %d, maxMatchLen = %d\n", - *maxMatchIndex, *maxMatchLen); -#endif - } else { - // Check for partial matches. - for (int32_t i=initialPartialMatchLen; icurrencyNames, entry->totalCurrencyNameCount); - deleteCurrencyNames(entry->currencySymbols, entry->totalCurrencySymbolCount); - uprv_free(entry); -} - - -// Cache clean up -static UBool U_CALLCONV -currency_cache_cleanup(void) { - for (int32_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { - if (currCache[i]) { - deleteCacheEntry(currCache[i]); - currCache[i] = 0; - } - } - return TRUE; -} - - -/** - * Loads the currency name data from the cache, or from resource bundles if necessary. - * The refCount is automatically incremented. It is the caller's responsibility - * to decrement it when done! - */ -static CurrencyNameCacheEntry* -getCacheEntry(const char* locale, UErrorCode& ec) { - - int32_t total_currency_name_count = 0; - CurrencyNameStruct* currencyNames = NULL; - int32_t total_currency_symbol_count = 0; - CurrencyNameStruct* currencySymbols = NULL; - CurrencyNameCacheEntry* cacheEntry = NULL; - - umtx_lock(&gCurrencyCacheMutex); - // in order to handle racing correctly, - // not putting 'search' in a separate function. - int8_t found = -1; - for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { - if (currCache[i]!= NULL && - uprv_strcmp(locale, currCache[i]->locale) == 0) { - found = i; - break; - } - } - if (found != -1) { - cacheEntry = currCache[found]; - ++(cacheEntry->refCount); - } - umtx_unlock(&gCurrencyCacheMutex); - if (found == -1) { - collectCurrencyNames(locale, ¤cyNames, &total_currency_name_count, ¤cySymbols, &total_currency_symbol_count, ec); - if (U_FAILURE(ec)) { - return NULL; - } - umtx_lock(&gCurrencyCacheMutex); - // check again. - for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { - if (currCache[i]!= NULL && - uprv_strcmp(locale, currCache[i]->locale) == 0) { - found = i; - break; - } - } - if (found == -1) { - // insert new entry to - // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM - // and remove the existing entry - // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM - // from cache. - cacheEntry = currCache[currentCacheEntryIndex]; - if (cacheEntry) { - --(cacheEntry->refCount); - // delete if the ref count is zero - if (cacheEntry->refCount == 0) { - deleteCacheEntry(cacheEntry); - } - } - cacheEntry = (CurrencyNameCacheEntry*)uprv_malloc(sizeof(CurrencyNameCacheEntry)); - currCache[currentCacheEntryIndex] = cacheEntry; - uprv_strcpy(cacheEntry->locale, locale); - cacheEntry->currencyNames = currencyNames; - cacheEntry->totalCurrencyNameCount = total_currency_name_count; - cacheEntry->currencySymbols = currencySymbols; - cacheEntry->totalCurrencySymbolCount = total_currency_symbol_count; - cacheEntry->refCount = 2; // one for cache, one for reference - currentCacheEntryIndex = (currentCacheEntryIndex + 1) % CURRENCY_NAME_CACHE_NUM; - ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); - } else { - deleteCurrencyNames(currencyNames, total_currency_name_count); - deleteCurrencyNames(currencySymbols, total_currency_symbol_count); - cacheEntry = currCache[found]; - ++(cacheEntry->refCount); - } - umtx_unlock(&gCurrencyCacheMutex); - } - - return cacheEntry; -} - -static void releaseCacheEntry(CurrencyNameCacheEntry* cacheEntry) { - umtx_lock(&gCurrencyCacheMutex); - --(cacheEntry->refCount); - if (cacheEntry->refCount == 0) { // remove - deleteCacheEntry(cacheEntry); - } - umtx_unlock(&gCurrencyCacheMutex); -} - -U_CAPI void -uprv_parseCurrency(const char* locale, - const icu::UnicodeString& text, - icu::ParsePosition& pos, - int8_t type, - int32_t* partialMatchLen, - UChar* result, - UErrorCode& ec) { - U_NAMESPACE_USE - if (U_FAILURE(ec)) { - return; - } - CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec); - if (U_FAILURE(ec)) { - return; - } - - int32_t total_currency_name_count = cacheEntry->totalCurrencyNameCount; - CurrencyNameStruct* currencyNames = cacheEntry->currencyNames; - int32_t total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount; - CurrencyNameStruct* currencySymbols = cacheEntry->currencySymbols; - - int32_t start = pos.getIndex(); - - UChar inputText[MAX_CURRENCY_NAME_LEN]; - UChar upperText[MAX_CURRENCY_NAME_LEN]; - int32_t textLen = MIN(MAX_CURRENCY_NAME_LEN, text.length() - start); - text.extract(start, textLen, inputText); - UErrorCode ec1 = U_ZERO_ERROR; - textLen = u_strToUpper(upperText, MAX_CURRENCY_NAME_LEN, inputText, textLen, locale, &ec1); - - // Make sure partialMatchLen is initialized - *partialMatchLen = 0; - - int32_t max = 0; - int32_t matchIndex = -1; - // case in-sensitive comparision against currency names - searchCurrencyName(currencyNames, total_currency_name_count, - upperText, textLen, partialMatchLen, &max, &matchIndex); - -#ifdef UCURR_DEBUG - printf("search in names, max = %d, matchIndex = %d\n", max, matchIndex); -#endif - - int32_t maxInSymbol = 0; - int32_t matchIndexInSymbol = -1; - if (type != UCURR_LONG_NAME) { // not name only - // case sensitive comparison against currency symbols and ISO code. - searchCurrencyName(currencySymbols, total_currency_symbol_count, - inputText, textLen, - partialMatchLen, - &maxInSymbol, &matchIndexInSymbol); - } - -#ifdef UCURR_DEBUG - printf("search in symbols, maxInSymbol = %d, matchIndexInSymbol = %d\n", maxInSymbol, matchIndexInSymbol); - if(matchIndexInSymbol != -1) { - printf("== ISO=%s\n", currencySymbols[matchIndexInSymbol].IsoCode); - } -#endif - - if (max >= maxInSymbol && matchIndex != -1) { - u_charsToUChars(currencyNames[matchIndex].IsoCode, result, 4); - pos.setIndex(start + max); - } else if (maxInSymbol >= max && matchIndexInSymbol != -1) { - u_charsToUChars(currencySymbols[matchIndexInSymbol].IsoCode, result, 4); - pos.setIndex(start + maxInSymbol); - } - - // decrease reference count - releaseCacheEntry(cacheEntry); -} - -void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec) { - U_NAMESPACE_USE - if (U_FAILURE(ec)) { - return; - } - CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec); - if (U_FAILURE(ec)) { - return; - } - - for (int32_t i=0; itotalCurrencySymbolCount; i++) { - const CurrencyNameStruct& info = cacheEntry->currencySymbols[i]; - UChar32 cp; - U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp); - result.add(cp); - } - - for (int32_t i=0; itotalCurrencyNameCount; i++) { - const CurrencyNameStruct& info = cacheEntry->currencyNames[i]; - UChar32 cp; - U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp); - result.add(cp); - } - - // decrease reference count - releaseCacheEntry(cacheEntry); -} - - -/** - * Internal method. Given a currency ISO code and a locale, return - * the "static" currency name. This is usually the same as the - * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the - * format is applied to the number 2.0 (to yield the more common - * plural) to return a static name. - * - * This is used for backward compatibility with old currency logic in - * DecimalFormat and DecimalFormatSymbols. - */ -U_CAPI void -uprv_getStaticCurrencyName(const UChar* iso, const char* loc, - icu::UnicodeString& result, UErrorCode& ec) -{ - U_NAMESPACE_USE - - UBool isChoiceFormat; - int32_t len; - const UChar* currname = ucurr_getName(iso, loc, UCURR_SYMBOL_NAME, - &isChoiceFormat, &len, &ec); - if (U_SUCCESS(ec)) { - result.setTo(currname, len); - } -} - -U_CAPI int32_t U_EXPORT2 -ucurr_getDefaultFractionDigits(const UChar* currency, UErrorCode* ec) { - return ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec); -} - -U_DRAFT int32_t U_EXPORT2 -ucurr_getDefaultFractionDigitsForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) { - int32_t fracDigits = 0; - if (U_SUCCESS(*ec)) { - switch (usage) { - case UCURR_USAGE_STANDARD: - fracDigits = (_findMetaData(currency, *ec))[0]; - break; - case UCURR_USAGE_CASH: - fracDigits = (_findMetaData(currency, *ec))[2]; - break; - default: - *ec = U_UNSUPPORTED_ERROR; - } - } - return fracDigits; -} - -U_CAPI double U_EXPORT2 -ucurr_getRoundingIncrement(const UChar* currency, UErrorCode* ec) { - return ucurr_getRoundingIncrementForUsage(currency, UCURR_USAGE_STANDARD, ec); -} - -U_DRAFT double U_EXPORT2 -ucurr_getRoundingIncrementForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) { - double result = 0.0; - - const int32_t *data = _findMetaData(currency, *ec); - if (U_SUCCESS(*ec)) { - int32_t fracDigits; - int32_t increment; - switch (usage) { - case UCURR_USAGE_STANDARD: - fracDigits = data[0]; - increment = data[1]; - break; - case UCURR_USAGE_CASH: - fracDigits = data[2]; - increment = data[3]; - break; - default: - *ec = U_UNSUPPORTED_ERROR; - return result; - } - - // If the meta data is invalid, return 0.0 - if (fracDigits < 0 || fracDigits > MAX_POW10) { - *ec = U_INVALID_FORMAT_ERROR; - } else { - // A rounding value of 0 or 1 indicates no rounding. - if (increment >= 2) { - // Return (increment) / 10^(fracDigits). The only actual rounding data, - // as of this writing, is CHF { 2, 5 }. - result = double(increment) / POW10[fracDigits]; - } - } - } - - return result; -} - -U_CDECL_BEGIN - -typedef struct UCurrencyContext { - uint32_t currType; /* UCurrCurrencyType */ - uint32_t listIdx; -} UCurrencyContext; - -/* -Please keep this list in alphabetical order. -You can look at the CLDR supplemental data or ISO-4217 for the meaning of some -of these items. -ISO-4217: http://www.iso.org/iso/en/prods-services/popstds/currencycodeslist.html -*/ -static const struct CurrencyList { - const char *currency; - uint32_t currType; -} gCurrencyList[] = { - {"ADP", UCURR_COMMON|UCURR_DEPRECATED}, - {"AED", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"AFA", UCURR_COMMON|UCURR_DEPRECATED}, - {"AFN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ALK", UCURR_COMMON|UCURR_DEPRECATED}, - {"ALL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"AMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ANG", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"AOA", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"AOK", UCURR_COMMON|UCURR_DEPRECATED}, - {"AON", UCURR_COMMON|UCURR_DEPRECATED}, - {"AOR", UCURR_COMMON|UCURR_DEPRECATED}, - {"ARA", UCURR_COMMON|UCURR_DEPRECATED}, - {"ARL", UCURR_COMMON|UCURR_DEPRECATED}, - {"ARM", UCURR_COMMON|UCURR_DEPRECATED}, - {"ARP", UCURR_COMMON|UCURR_DEPRECATED}, - {"ARS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ATS", UCURR_COMMON|UCURR_DEPRECATED}, - {"AUD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"AWG", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"AZM", UCURR_COMMON|UCURR_DEPRECATED}, - {"AZN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BAD", UCURR_COMMON|UCURR_DEPRECATED}, - {"BAM", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BAN", UCURR_COMMON|UCURR_DEPRECATED}, - {"BBD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BDT", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BEC", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"BEF", UCURR_COMMON|UCURR_DEPRECATED}, - {"BEL", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"BGL", UCURR_COMMON|UCURR_DEPRECATED}, - {"BGM", UCURR_COMMON|UCURR_DEPRECATED}, - {"BGN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BGO", UCURR_COMMON|UCURR_DEPRECATED}, - {"BHD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BIF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BND", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BOB", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BOL", UCURR_COMMON|UCURR_DEPRECATED}, - {"BOP", UCURR_COMMON|UCURR_DEPRECATED}, - {"BOV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"BRB", UCURR_COMMON|UCURR_DEPRECATED}, - {"BRC", UCURR_COMMON|UCURR_DEPRECATED}, - {"BRE", UCURR_COMMON|UCURR_DEPRECATED}, - {"BRL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BRN", UCURR_COMMON|UCURR_DEPRECATED}, - {"BRR", UCURR_COMMON|UCURR_DEPRECATED}, - {"BRZ", UCURR_COMMON|UCURR_DEPRECATED}, - {"BSD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BTN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BUK", UCURR_COMMON|UCURR_DEPRECATED}, - {"BWP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BYB", UCURR_COMMON|UCURR_DEPRECATED}, - {"BYN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"BYR", UCURR_COMMON|UCURR_DEPRECATED}, - {"BZD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CAD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CDF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CHE", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"CHF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CHW", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"CLE", UCURR_COMMON|UCURR_DEPRECATED}, - {"CLF", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"CLP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CNH", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"CNX", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"CNY", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"COP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"COU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"CRC", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CSD", UCURR_COMMON|UCURR_DEPRECATED}, - {"CSK", UCURR_COMMON|UCURR_DEPRECATED}, - {"CUC", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CUP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CVE", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"CYP", UCURR_COMMON|UCURR_DEPRECATED}, - {"CZK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"DDM", UCURR_COMMON|UCURR_DEPRECATED}, - {"DEM", UCURR_COMMON|UCURR_DEPRECATED}, - {"DJF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"DKK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"DOP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"DZD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ECS", UCURR_COMMON|UCURR_DEPRECATED}, - {"ECV", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"EEK", UCURR_COMMON|UCURR_DEPRECATED}, - {"EGP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"EQE", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove? - {"ERN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ESA", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"ESB", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"ESP", UCURR_COMMON|UCURR_DEPRECATED}, - {"ETB", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"EUR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"FIM", UCURR_COMMON|UCURR_DEPRECATED}, - {"FJD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"FKP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"FRF", UCURR_COMMON|UCURR_DEPRECATED}, - {"GBP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"GEK", UCURR_COMMON|UCURR_DEPRECATED}, - {"GEL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"GHC", UCURR_COMMON|UCURR_DEPRECATED}, - {"GHS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"GIP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"GMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"GNF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"GNS", UCURR_COMMON|UCURR_DEPRECATED}, - {"GQE", UCURR_COMMON|UCURR_DEPRECATED}, - {"GRD", UCURR_COMMON|UCURR_DEPRECATED}, - {"GTQ", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"GWE", UCURR_COMMON|UCURR_DEPRECATED}, - {"GWP", UCURR_COMMON|UCURR_DEPRECATED}, - {"GYD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"HKD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"HNL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"HRD", UCURR_COMMON|UCURR_DEPRECATED}, - {"HRK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"HTG", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"HUF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"IDR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"IEP", UCURR_COMMON|UCURR_DEPRECATED}, - {"ILP", UCURR_COMMON|UCURR_DEPRECATED}, - {"ILR", UCURR_COMMON|UCURR_DEPRECATED}, - {"ILS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"INR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"IQD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"IRR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ISJ", UCURR_COMMON|UCURR_DEPRECATED}, - {"ISK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ITL", UCURR_COMMON|UCURR_DEPRECATED}, - {"JMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"JOD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"JPY", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KES", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KGS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KHR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KMF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KPW", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KRH", UCURR_COMMON|UCURR_DEPRECATED}, - {"KRO", UCURR_COMMON|UCURR_DEPRECATED}, - {"KRW", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KWD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KYD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"KZT", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"LAK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"LBP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"LKR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"LRD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"LSL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"LSM", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove? - {"LTL", UCURR_COMMON|UCURR_DEPRECATED}, - {"LTT", UCURR_COMMON|UCURR_DEPRECATED}, - {"LUC", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"LUF", UCURR_COMMON|UCURR_DEPRECATED}, - {"LUL", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"LVL", UCURR_COMMON|UCURR_DEPRECATED}, - {"LVR", UCURR_COMMON|UCURR_DEPRECATED}, - {"LYD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MAD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MAF", UCURR_COMMON|UCURR_DEPRECATED}, - {"MCF", UCURR_COMMON|UCURR_DEPRECATED}, - {"MDC", UCURR_COMMON|UCURR_DEPRECATED}, - {"MDL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MGA", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MGF", UCURR_COMMON|UCURR_DEPRECATED}, - {"MKD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MKN", UCURR_COMMON|UCURR_DEPRECATED}, - {"MLF", UCURR_COMMON|UCURR_DEPRECATED}, - {"MMK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MNT", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MOP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MRO", UCURR_COMMON|UCURR_DEPRECATED}, - {"MRU", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MTL", UCURR_COMMON|UCURR_DEPRECATED}, - {"MTP", UCURR_COMMON|UCURR_DEPRECATED}, - {"MUR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MVP", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove? - {"MVR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MWK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MXN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MXP", UCURR_COMMON|UCURR_DEPRECATED}, - {"MXV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"MYR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"MZE", UCURR_COMMON|UCURR_DEPRECATED}, - {"MZM", UCURR_COMMON|UCURR_DEPRECATED}, - {"MZN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"NAD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"NGN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"NIC", UCURR_COMMON|UCURR_DEPRECATED}, - {"NIO", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"NLG", UCURR_COMMON|UCURR_DEPRECATED}, - {"NOK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"NPR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"NZD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"OMR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"PAB", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"PEI", UCURR_COMMON|UCURR_DEPRECATED}, - {"PEN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"PES", UCURR_COMMON|UCURR_DEPRECATED}, - {"PGK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"PHP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"PKR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"PLN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"PLZ", UCURR_COMMON|UCURR_DEPRECATED}, - {"PTE", UCURR_COMMON|UCURR_DEPRECATED}, - {"PYG", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"QAR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"RHD", UCURR_COMMON|UCURR_DEPRECATED}, - {"ROL", UCURR_COMMON|UCURR_DEPRECATED}, - {"RON", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"RSD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"RUB", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"RUR", UCURR_COMMON|UCURR_DEPRECATED}, - {"RWF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SAR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SBD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SCR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SDD", UCURR_COMMON|UCURR_DEPRECATED}, - {"SDG", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SDP", UCURR_COMMON|UCURR_DEPRECATED}, - {"SEK", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SGD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SHP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SIT", UCURR_COMMON|UCURR_DEPRECATED}, - {"SKK", UCURR_COMMON|UCURR_DEPRECATED}, - {"SLL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SOS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SRD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SRG", UCURR_COMMON|UCURR_DEPRECATED}, - {"SSP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"STD", UCURR_COMMON|UCURR_DEPRECATED}, - {"STN", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SUR", UCURR_COMMON|UCURR_DEPRECATED}, - {"SVC", UCURR_COMMON|UCURR_DEPRECATED}, - {"SYP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"SZL", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"THB", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TJR", UCURR_COMMON|UCURR_DEPRECATED}, - {"TJS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TMM", UCURR_COMMON|UCURR_DEPRECATED}, - {"TMT", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TND", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TOP", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TPE", UCURR_COMMON|UCURR_DEPRECATED}, - {"TRL", UCURR_COMMON|UCURR_DEPRECATED}, - {"TRY", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TTD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TWD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"TZS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"UAH", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"UAK", UCURR_COMMON|UCURR_DEPRECATED}, - {"UGS", UCURR_COMMON|UCURR_DEPRECATED}, - {"UGX", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"USD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"USN", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"USS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"UYI", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"UYP", UCURR_COMMON|UCURR_DEPRECATED}, - {"UYU", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"UZS", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"VEB", UCURR_COMMON|UCURR_DEPRECATED}, - {"VEF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"VND", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"VNN", UCURR_COMMON|UCURR_DEPRECATED}, - {"VUV", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"WST", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"XAF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"XAG", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XAU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XBA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XBB", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XBC", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XBD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XCD", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"XDR", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XEU", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"XFO", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XFU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XOF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"XPD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XPF", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"XPT", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XRE", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"XSU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XTS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XUA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"XXX", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, - {"YDD", UCURR_COMMON|UCURR_DEPRECATED}, - {"YER", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"YUD", UCURR_COMMON|UCURR_DEPRECATED}, - {"YUM", UCURR_COMMON|UCURR_DEPRECATED}, - {"YUN", UCURR_COMMON|UCURR_DEPRECATED}, - {"YUR", UCURR_COMMON|UCURR_DEPRECATED}, - {"ZAL", UCURR_UNCOMMON|UCURR_DEPRECATED}, - {"ZAR", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ZMK", UCURR_COMMON|UCURR_DEPRECATED}, - {"ZMW", UCURR_COMMON|UCURR_NON_DEPRECATED}, - {"ZRN", UCURR_COMMON|UCURR_DEPRECATED}, - {"ZRZ", UCURR_COMMON|UCURR_DEPRECATED}, - {"ZWD", UCURR_COMMON|UCURR_DEPRECATED}, - {"ZWL", UCURR_COMMON|UCURR_DEPRECATED}, - {"ZWR", UCURR_COMMON|UCURR_DEPRECATED}, - { NULL, 0 } // Leave here to denote the end of the list. -}; - -#define UCURR_MATCHES_BITMASK(variable, typeToMatch) \ - ((typeToMatch) == UCURR_ALL || ((variable) & (typeToMatch)) == (typeToMatch)) - -static int32_t U_CALLCONV -ucurr_countCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { - UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context); - uint32_t currType = myContext->currType; - int32_t count = 0; - - /* Count the number of items matching the type we are looking for. */ - for (int32_t idx = 0; gCurrencyList[idx].currency != NULL; idx++) { - if (UCURR_MATCHES_BITMASK(gCurrencyList[idx].currType, currType)) { - count++; - } - } - return count; -} - -static const char* U_CALLCONV -ucurr_nextCurrencyList(UEnumeration *enumerator, - int32_t* resultLength, - UErrorCode * /*pErrorCode*/) -{ - UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context); - - /* Find the next in the list that matches the type we are looking for. */ - while (myContext->listIdx < UPRV_LENGTHOF(gCurrencyList)-1) { - const struct CurrencyList *currItem = &gCurrencyList[myContext->listIdx++]; - if (UCURR_MATCHES_BITMASK(currItem->currType, myContext->currType)) - { - if (resultLength) { - *resultLength = 3; /* Currency codes are only 3 chars long */ - } - return currItem->currency; - } - } - /* We enumerated too far. */ - if (resultLength) { - *resultLength = 0; - } - return NULL; -} - -static void U_CALLCONV -ucurr_resetCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { - ((UCurrencyContext *)(enumerator->context))->listIdx = 0; -} - -static void U_CALLCONV -ucurr_closeCurrencyList(UEnumeration *enumerator) { - uprv_free(enumerator->context); - uprv_free(enumerator); -} - -static void U_CALLCONV -ucurr_createCurrencyList(UHashtable *isoCodes, UErrorCode* status){ - UErrorCode localStatus = U_ZERO_ERROR; - - // Look up the CurrencyMap element in the root bundle. - UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); - UResourceBundle *currencyMapArray = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); - - if (U_SUCCESS(localStatus)) { - // process each entry in currency map - for (int32_t i=0; iisoCode = isoCode; - entry->from = fromDate; - entry->to = toDate; - - localStatus = U_ZERO_ERROR; - uhash_put(isoCodes, (UChar *)isoCode, entry, &localStatus); - } - } else { - *status = localStatus; - } - ures_close(currencyArray); - } - } else { - *status = localStatus; - } - - ures_close(currencyMapArray); -} - -static const UEnumeration gEnumCurrencyList = { - NULL, - NULL, - ucurr_closeCurrencyList, - ucurr_countCurrencyList, - uenum_unextDefault, - ucurr_nextCurrencyList, - ucurr_resetCurrencyList -}; -U_CDECL_END - - -static void U_CALLCONV initIsoCodes(UErrorCode &status) { - U_ASSERT(gIsoCodes == NULL); - ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); - - UHashtable *isoCodes = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status); - if (U_FAILURE(status)) { - return; - } - uhash_setValueDeleter(isoCodes, deleteIsoCodeEntry); - - ucurr_createCurrencyList(isoCodes, &status); - if (U_FAILURE(status)) { - uhash_close(isoCodes); - return; - } - gIsoCodes = isoCodes; // Note: gIsoCodes is const. Once set up here it is never altered, - // and read only access is safe without synchronization. -} - -static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) { - if (U_FAILURE(status)) { return; } - for (auto& entry : unisets::kCurrencyEntries) { - UnicodeString exemplar(entry.exemplar); - const UnicodeSet* set = unisets::get(entry.key); - if (set == nullptr) { return; } - UnicodeSetIterator it(*set); - while (it.next()) { - UnicodeString value = it.getString(); - if (value == exemplar) { - // No need to mark the exemplar character as an equivalent - continue; - } - makeEquivalent(exemplar, value, hash, status); - if (U_FAILURE(status)) { return; } - } - } -} - -static void U_CALLCONV initCurrSymbolsEquiv() { - U_ASSERT(gCurrSymbolsEquiv == NULL); - UErrorCode status = U_ZERO_ERROR; - ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); - icu::Hashtable *temp = new icu::Hashtable(status); - if (temp == NULL) { - return; - } - if (U_FAILURE(status)) { - delete temp; - return; - } - temp->setValueDeleter(deleteUnicode); - populateCurrSymbolsEquiv(temp, status); - if (U_FAILURE(status)) { - delete temp; - return; - } - gCurrSymbolsEquiv = temp; -} - -U_CAPI UBool U_EXPORT2 -ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eErrorCode) { - umtx_initOnce(gIsoCodesInitOnce, &initIsoCodes, *eErrorCode); - if (U_FAILURE(*eErrorCode)) { - return FALSE; - } - - IsoCodeEntry* result = (IsoCodeEntry *) uhash_get(gIsoCodes, isoCode); - if (result == NULL) { - return FALSE; - } else if (from > to) { - *eErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } else if ((from > result->to) || (to < result->from)) { - return FALSE; - } - return TRUE; -} - -static const icu::Hashtable* getCurrSymbolsEquiv() { - umtx_initOnce(gCurrSymbolsEquivInitOnce, &initCurrSymbolsEquiv); - return gCurrSymbolsEquiv; -} - -U_CAPI UEnumeration * U_EXPORT2 -ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode) { - UEnumeration *myEnum = NULL; - UCurrencyContext *myContext; - - myEnum = (UEnumeration*)uprv_malloc(sizeof(UEnumeration)); - if (myEnum == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(myEnum, &gEnumCurrencyList, sizeof(UEnumeration)); - myContext = (UCurrencyContext*)uprv_malloc(sizeof(UCurrencyContext)); - if (myContext == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - uprv_free(myEnum); - return NULL; - } - myContext->currType = currType; - myContext->listIdx = 0; - myEnum->context = myContext; - return myEnum; -} - -U_CAPI int32_t U_EXPORT2 -ucurr_countCurrencies(const char* locale, - UDate date, - UErrorCode* ec) -{ - int32_t currCount = 0; - - if (ec != NULL && U_SUCCESS(*ec)) - { - // local variables - UErrorCode localStatus = U_ZERO_ERROR; - char id[ULOC_FULLNAME_CAPACITY]; - uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus); - - // get country or country_variant in `id' - /*uint32_t variantType =*/ idForLocale(locale, id, sizeof(id), ec); - - if (U_FAILURE(*ec)) - { - return 0; - } - - // Remove variants, which is only needed for registration. - char *idDelim = strchr(id, VAR_DELIM); - if (idDelim) - { - idDelim[0] = 0; - } - - // Look up the CurrencyMap element in the root bundle. - UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); - UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); - - // Using the id derived from the local, get the currency data - UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); - - // process each currency to see which one is valid for the given date - if (U_SUCCESS(localStatus)) - { - for (int32_t i=0; i 2) - { - int32_t toLength = 0; - UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus); - const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus); - - currDate64 = (int64_t)toArray[0] << 32; - currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); - UDate toDate = (UDate)currDate64; - - if ((fromDate <= date) && (date < toDate)) - { - currCount++; - } - - ures_close(toRes); - } - else - { - if (fromDate <= date) - { - currCount++; - } - } - - // close open resources - ures_close(currencyRes); - ures_close(fromRes); - - } // end For loop - } // end if (U_SUCCESS(localStatus)) - - ures_close(countryArray); - - // Check for errors - if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) - { - // There is nothing to fallback to. - // Report the failure/warning if possible. - *ec = localStatus; - } - - if (U_SUCCESS(*ec)) - { - // no errors - return currCount; - } - - } - - // If we got here, either error code is invalid or - // some argument passed is no good. - return 0; -} - -U_CAPI int32_t U_EXPORT2 -ucurr_forLocaleAndDate(const char* locale, - UDate date, - int32_t index, - UChar* buff, - int32_t buffCapacity, - UErrorCode* ec) -{ - int32_t resLen = 0; - int32_t currIndex = 0; - const UChar* s = NULL; - - if (ec != NULL && U_SUCCESS(*ec)) - { - // check the arguments passed - if ((buff && buffCapacity) || !buffCapacity ) - { - // local variables - UErrorCode localStatus = U_ZERO_ERROR; - char id[ULOC_FULLNAME_CAPACITY]; - resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus); - - // get country or country_variant in `id' - /*uint32_t variantType =*/ idForLocale(locale, id, sizeof(id), ec); - if (U_FAILURE(*ec)) - { - return 0; - } - - // Remove variants, which is only needed for registration. - char *idDelim = strchr(id, VAR_DELIM); - if (idDelim) - { - idDelim[0] = 0; - } - - // Look up the CurrencyMap element in the root bundle. - UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); - UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); - - // Using the id derived from the local, get the currency data - UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); - - // process each currency to see which one is valid for the given date - bool matchFound = false; - if (U_SUCCESS(localStatus)) - { - if ((index <= 0) || (index> ures_getSize(countryArray))) - { - // requested index is out of bounds - ures_close(countryArray); - return 0; - } - - for (int32_t i=0; i 2) - { - int32_t toLength = 0; - UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus); - const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus); - - currDate64 = (int64_t)toArray[0] << 32; - currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); - UDate toDate = (UDate)currDate64; - - if ((fromDate <= date) && (date < toDate)) - { - currIndex++; - if (currIndex == index) - { - matchFound = true; - } - } - - ures_close(toRes); - } - else - { - if (fromDate <= date) - { - currIndex++; - if (currIndex == index) - { - matchFound = true; - } - } - } - - // close open resources - ures_close(currencyRes); - ures_close(fromRes); - - // check for loop exit - if (matchFound) - { - break; - } - - } // end For loop - } - - ures_close(countryArray); - - // Check for errors - if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) - { - // There is nothing to fallback to. - // Report the failure/warning if possible. - *ec = localStatus; - } - - if (U_SUCCESS(*ec)) - { - // no errors - if((buffCapacity> resLen) && matchFound) - { - // write out the currency value - u_strcpy(buff, s); - } - else - { - return 0; - } - } - - // return null terminated currency string - return u_terminateUChars(buff, buffCapacity, resLen, ec); - } - else - { - // illegal argument encountered - *ec = U_ILLEGAL_ARGUMENT_ERROR; - } - - } - - // If we got here, either error code is invalid or - // some argument passed is no good. - return resLen; -} - -static const UEnumeration defaultKeywordValues = { - NULL, - NULL, - ulist_close_keyword_values_iterator, - ulist_count_keyword_values, - uenum_unextDefault, - ulist_next_keyword_value, - ulist_reset_keyword_values_iterator -}; - -U_CAPI UEnumeration *U_EXPORT2 ucurr_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode* status) { - // Resolve region - char prefRegion[ULOC_COUNTRY_CAPACITY]; - ulocimp_getRegionForSupplementalData(locale, TRUE, prefRegion, sizeof(prefRegion), status); - - // Read value from supplementalData - UList *values = ulist_createEmptyList(status); - UList *otherValues = ulist_createEmptyList(status); - UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); - if (U_FAILURE(*status) || en == NULL) { - if (en == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - uprv_free(en); - } - ulist_deleteList(values); - ulist_deleteList(otherValues); - return NULL; - } - memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); - en->context = values; - - UResourceBundle *bundle = ures_openDirect(U_ICUDATA_CURR, "supplementalData", status); - ures_getByKey(bundle, "CurrencyMap", bundle, status); - UResourceBundle bundlekey, regbndl, curbndl, to; - ures_initStackObject(&bundlekey); - ures_initStackObject(®bndl); - ures_initStackObject(&curbndl); - ures_initStackObject(&to); - - while (U_SUCCESS(*status) && ures_hasNext(bundle)) { - ures_getNextResource(bundle, &bundlekey, status); - if (U_FAILURE(*status)) { - break; - } - const char *region = ures_getKey(&bundlekey); - UBool isPrefRegion = uprv_strcmp(region, prefRegion) == 0 ? TRUE : FALSE; - if (!isPrefRegion && commonlyUsed) { - // With commonlyUsed=true, we do not put - // currencies for other regions in the - // result list. - continue; - } - ures_getByKey(bundle, region, ®bndl, status); - if (U_FAILURE(*status)) { - break; - } - while (U_SUCCESS(*status) && ures_hasNext(®bndl)) { - ures_getNextResource(®bndl, &curbndl, status); - if (ures_getType(&curbndl) != URES_TABLE) { - // Currently, an empty ARRAY is mixed in. - continue; - } - char *curID = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); - int32_t curIDLength = ULOC_KEYWORDS_CAPACITY; - if (curID == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - -#if U_CHARSET_FAMILY==U_ASCII_FAMILY - ures_getUTF8StringByKey(&curbndl, "id", curID, &curIDLength, TRUE, status); - /* optimize - use the utf-8 string */ -#else - { - const UChar* defString = ures_getStringByKey(&curbndl, "id", &curIDLength, status); - if(U_SUCCESS(*status)) { - if(curIDLength+1 > ULOC_KEYWORDS_CAPACITY) { - *status = U_BUFFER_OVERFLOW_ERROR; - } else { - u_UCharsToChars(defString, curID, curIDLength+1); - } - } - } -#endif - - if (U_FAILURE(*status)) { - break; - } - UBool hasTo = FALSE; - ures_getByKey(&curbndl, "to", &to, status); - if (U_FAILURE(*status)) { - // Do nothing here... - *status = U_ZERO_ERROR; - } else { - hasTo = TRUE; - } - if (isPrefRegion && !hasTo && !ulist_containsString(values, curID, (int32_t)uprv_strlen(curID))) { - // Currently active currency for the target country - ulist_addItemEndList(values, curID, TRUE, status); - } else if (!ulist_containsString(otherValues, curID, (int32_t)uprv_strlen(curID)) && !commonlyUsed) { - ulist_addItemEndList(otherValues, curID, TRUE, status); - } else { - uprv_free(curID); - } - } - - } - if (U_SUCCESS(*status)) { - if (commonlyUsed) { - if (ulist_getListSize(values) == 0) { - // This could happen if no valid region is supplied in the input - // locale. In this case, we use the CLDR's default. - uenum_close(en); - en = ucurr_getKeywordValuesForLocale(key, "und", TRUE, status); - } - } else { - // Consolidate the list - char *value = NULL; - ulist_resetList(otherValues); - while ((value = (char *)ulist_getNext(otherValues)) != NULL) { - if (!ulist_containsString(values, value, (int32_t)uprv_strlen(value))) { - char *tmpValue = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); - uprv_memcpy(tmpValue, value, uprv_strlen(value) + 1); - ulist_addItemEndList(values, tmpValue, TRUE, status); - if (U_FAILURE(*status)) { - break; - } - } - } - } - - ulist_resetList((UList *)(en->context)); - } else { - ulist_deleteList(values); - uprv_free(en); - values = NULL; - en = NULL; - } - ures_close(&to); - ures_close(&curbndl); - ures_close(®bndl); - ures_close(&bundlekey); - ures_close(bundle); - - ulist_deleteList(otherValues); - - return en; -} - - -U_CAPI int32_t U_EXPORT2 -ucurr_getNumericCode(const UChar* currency) { - int32_t code = 0; - if (currency && u_strlen(currency) == ISO_CURRENCY_CODE_LENGTH) { - UErrorCode status = U_ZERO_ERROR; - - UResourceBundle *bundle = ures_openDirect(0, "currencyNumericCodes", &status); - ures_getByKey(bundle, "codeMap", bundle, &status); - if (U_SUCCESS(status)) { - char alphaCode[ISO_CURRENCY_CODE_LENGTH+1]; - myUCharsToChars(alphaCode, currency); - T_CString_toUpperCase(alphaCode); - ures_getByKey(bundle, alphaCode, bundle, &status); - int tmpCode = ures_getInt(bundle, &status); - if (U_SUCCESS(status)) { - code = tmpCode; - } - } - ures_close(bundle); - } - return code; -} -#endif /* #if !UCONFIG_NO_FORMATTING */ - -//eof diff --git a/deps/node/deps/icu-small/source/common/ucurrimp.h b/deps/node/deps/icu-small/source/common/ucurrimp.h deleted file mode 100644 index 6d958829..00000000 --- a/deps/node/deps/icu-small/source/common/ucurrimp.h +++ /dev/null @@ -1,78 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef _UCURR_IMP_H_ -#define _UCURR_IMP_H_ - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "unicode/parsepos.h" -#include "unicode/uniset.h" - -/** - * Internal method. Given a currency ISO code and a locale, return - * the "static" currency name. This is usually the same as the - * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the - * format is applied to the number 2.0 (to yield the more common - * plural) to return a static name. - * - * This is used for backward compatibility with old currency logic in - * DecimalFormat and DecimalFormatSymbols. - */ -U_CAPI void -uprv_getStaticCurrencyName(const UChar* iso, const char* loc, - icu::UnicodeString& result, UErrorCode& ec); - -/** - * Attempt to parse the given string as a currency, either as a - * display name in the given locale, or as a 3-letter ISO 4217 - * code. If multiple display names match, then the longest one is - * selected. If both a display name and a 3-letter ISO code - * match, then the display name is preferred, unless it's length - * is less than 3. - * - * The parameters must not be NULL. - * - * @param locale the locale of the display names to match - * @param text the text to parse - * @param pos input-output position; on input, the position within - * text to match; must have 0 <= pos.getIndex() < text.length(); - * on output, the position after the last matched character. If - * the parse fails, the position in unchanged upon output. - * @param type currency type to parse against, LONG_NAME only or not - * @param partialMatchLen The length of the longest matching prefix; - * this may be nonzero even if no full currency was matched. - * @return the ISO 4217 code, as a string, of the best match, or - * null if there is no match - * - * @internal - */ -U_CAPI void -uprv_parseCurrency(const char* locale, - const icu::UnicodeString& text, - icu::ParsePosition& pos, - int8_t type, - int32_t* partialMatchLen, - UChar* result, - UErrorCode& ec); - -/** - * Puts all possible first-characters of a currency into the - * specified UnicodeSet. - * - * @param locale the locale of the display names of interest - * @param result the UnicodeSet to which to add the starting characters - */ -void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec); - - - -#endif /* #ifndef _UCURR_IMP_H_ */ - -//eof diff --git a/deps/node/deps/icu-small/source/common/udata.cpp b/deps/node/deps/icu-small/source/common/udata.cpp deleted file mode 100644 index 99efbc97..00000000 --- a/deps/node/deps/icu-small/source/common/udata.cpp +++ /dev/null @@ -1,1453 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: udata.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999oct25 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" /* U_PLATFORM etc. */ - -#ifdef __GNUC__ -/* if gcc -#define ATTRIBUTE_WEAK __attribute__ ((weak)) -might have to #include some other header -*/ -#endif - -#include "unicode/putil.h" -#include "unicode/udata.h" -#include "unicode/uversion.h" -#include "charstr.h" -#include "cmemory.h" -#include "cstring.h" -#include "mutex.h" -#include "putilimp.h" -#include "uassert.h" -#include "ucln_cmn.h" -#include "ucmndata.h" -#include "udatamem.h" -#include "uhash.h" -#include "umapfile.h" -#include "umutex.h" - -/*********************************************************************** -* -* Notes on the organization of the ICU data implementation -* -* All of the public API is defined in udata.h -* -* The implementation is split into several files... -* -* - udata.c (this file) contains higher level code that knows about -* the search paths for locating data, caching opened data, etc. -* -* - umapfile.c contains the low level platform-specific code for actually loading -* (memory mapping, file reading, whatever) data into memory. -* -* - ucmndata.c deals with the tables of contents of ICU data items within -* an ICU common format data file. The implementation includes -* an abstract interface and support for multiple TOC formats. -* All knowledge of any specific TOC format is encapsulated here. -* -* - udatamem.c has code for managing UDataMemory structs. These are little -* descriptor objects for blocks of memory holding ICU data of -* various types. -*/ - -/* configuration ---------------------------------------------------------- */ - -/* If you are excruciatingly bored turn this on .. */ -/* #define UDATA_DEBUG 1 */ - -#if defined(UDATA_DEBUG) -# include -#endif - -U_NAMESPACE_USE - -/* - * Forward declarations - */ -static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err); - -/*********************************************************************** -* -* static (Global) data -* -************************************************************************/ - -/* - * Pointers to the common ICU data. - * - * We store multiple pointers to ICU data packages and iterate through them - * when looking for a data item. - * - * It is possible to combine this with dependency inversion: - * One or more data package libraries may export - * functions that each return a pointer to their piece of the ICU data, - * and this file would import them as weak functions, without a - * strong linker dependency from the common library on the data library. - * - * Then we can have applications depend on only that part of ICU's data - * that they really need, reducing the size of binaries that take advantage - * of this. - */ -static UDataMemory *gCommonICUDataArray[10] = { NULL }; // Access protected by icu global mutex. - -static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER(0); // See extendICUData(). - -static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */ -static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER; - -#if U_PLATFORM_HAS_WINUWP_API == 0 -static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized. - // Modifying is documented as thread-unsafe. -#else -static UDataFileAccess gDataFileAccess = UDATA_NO_FILES; // Windows UWP looks in one spot explicitly -#endif - -static UBool U_CALLCONV -udata_cleanup(void) -{ - int32_t i; - - if (gCommonDataCache) { /* Delete the cache of user data mappings. */ - uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */ - gCommonDataCache = NULL; /* Cleanup is not thread safe. */ - } - gCommonDataCacheInitOnce.reset(); - - for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) { - udata_close(gCommonICUDataArray[i]); - gCommonICUDataArray[i] = NULL; - } - gHaveTriedToLoadCommonData = 0; - - return TRUE; /* Everything was cleaned up */ -} - -static UBool U_CALLCONV -findCommonICUDataByName(const char *inBasename, UErrorCode &err) -{ - UBool found = FALSE; - int32_t i; - - UDataMemory *pData = udata_findCachedData(inBasename, err); - if (U_FAILURE(err) || pData == NULL) - return FALSE; - - { - Mutex lock; - for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) { - if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) { - /* The data pointer is already in the array. */ - found = TRUE; - break; - } - } - } - return found; -} - - -/* - * setCommonICUData. Set a UDataMemory to be the global ICU Data - */ -static UBool -setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */ - UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */ - /* changed by another thread before we got to it. */ - UErrorCode *pErr) -{ - UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr); - int32_t i; - UBool didUpdate = FALSE; - if (U_FAILURE(*pErr)) { - return FALSE; - } - - /* For the assignment, other threads must cleanly see either the old */ - /* or the new, not some partially initialized new. The old can not be */ - /* deleted - someone may still have a pointer to it lying around in */ - /* their locals. */ - UDatamemory_assign(newCommonData, pData); - umtx_lock(NULL); - for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) { - if (gCommonICUDataArray[i] == NULL) { - gCommonICUDataArray[i] = newCommonData; - didUpdate = TRUE; - break; - } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) { - /* The same data pointer is already in the array. */ - break; - } - } - umtx_unlock(NULL); - - if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) { - *pErr = U_USING_DEFAULT_WARNING; - } - if (didUpdate) { - ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup); - } else { - uprv_free(newCommonData); - } - return didUpdate; -} - -#if U_PLATFORM_HAS_WINUWP_API == 0 - -static UBool -setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) { - UDataMemory tData; - UDataMemory_init(&tData); - UDataMemory_setData(&tData, pData); - udata_checkCommonData(&tData, pErrorCode); - return setCommonICUData(&tData, FALSE, pErrorCode); -} - -#endif - -static const char * -findBasename(const char *path) { - const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR); - if(basename==NULL) { - return path; - } else { - return basename+1; - } -} - -#ifdef UDATA_DEBUG -static const char * -packageNameFromPath(const char *path) -{ - if((path == NULL) || (*path == 0)) { - return U_ICUDATA_NAME; - } - - path = findBasename(path); - - if((path == NULL) || (*path == 0)) { - return U_ICUDATA_NAME; - } - - return path; -} -#endif - -/*----------------------------------------------------------------------* - * * - * Cache for common data * - * Functions for looking up or adding entries to a cache of * - * data that has been previously opened. Avoids a potentially * - * expensive operation of re-opening the data for subsequent * - * uses. * - * * - * Data remains cached for the duration of the process. * - * * - *----------------------------------------------------------------------*/ - -typedef struct DataCacheElement { - char *name; - UDataMemory *item; -} DataCacheElement; - - - -/* - * Deleter function for DataCacheElements. - * udata cleanup function closes the hash table; hash table in turn calls back to - * here for each entry. - */ -static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) { - DataCacheElement *p = (DataCacheElement *)pDCEl; - udata_close(p->item); /* unmaps storage */ - uprv_free(p->name); /* delete the hash key string. */ - uprv_free(pDCEl); /* delete 'this' */ -} - -static void U_CALLCONV udata_initHashTable(UErrorCode &err) { - U_ASSERT(gCommonDataCache == NULL); - gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err); - if (U_FAILURE(err)) { - return; - } - U_ASSERT(gCommonDataCache != NULL); - uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter); - ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup); -} - - /* udata_getCacheHashTable() - * Get the hash table used to store the data cache entries. - * Lazy create it if it doesn't yet exist. - */ -static UHashtable *udata_getHashTable(UErrorCode &err) { - umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err); - return gCommonDataCache; -} - - - -static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err) -{ - UHashtable *htable; - UDataMemory *retVal = NULL; - DataCacheElement *el; - const char *baseName; - - htable = udata_getHashTable(err); - if (U_FAILURE(err)) { - return NULL; - } - - baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */ - umtx_lock(NULL); - el = (DataCacheElement *)uhash_get(htable, baseName); - umtx_unlock(NULL); - if (el != NULL) { - retVal = el->item; - } -#ifdef UDATA_DEBUG - fprintf(stderr, "Cache: [%s] -> %p\n", baseName, retVal); -#endif - return retVal; -} - - -static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) { - DataCacheElement *newElement; - const char *baseName; - int32_t nameLen; - UHashtable *htable; - DataCacheElement *oldValue = NULL; - UErrorCode subErr = U_ZERO_ERROR; - - htable = udata_getHashTable(*pErr); - if (U_FAILURE(*pErr)) { - return NULL; - } - - /* Create a new DataCacheElement - the thingy we store in the hash table - - * and copy the supplied path and UDataMemoryItems into it. - */ - newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement)); - if (newElement == NULL) { - *pErr = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - newElement->item = UDataMemory_createNewInstance(pErr); - if (U_FAILURE(*pErr)) { - uprv_free(newElement); - return NULL; - } - UDatamemory_assign(newElement->item, item); - - baseName = findBasename(path); - nameLen = (int32_t)uprv_strlen(baseName); - newElement->name = (char *)uprv_malloc(nameLen+1); - if (newElement->name == NULL) { - *pErr = U_MEMORY_ALLOCATION_ERROR; - uprv_free(newElement->item); - uprv_free(newElement); - return NULL; - } - uprv_strcpy(newElement->name, baseName); - - /* Stick the new DataCacheElement into the hash table. - */ - umtx_lock(NULL); - oldValue = (DataCacheElement *)uhash_get(htable, path); - if (oldValue != NULL) { - subErr = U_USING_DEFAULT_WARNING; - } - else { - uhash_put( - htable, - newElement->name, /* Key */ - newElement, /* Value */ - &subErr); - } - umtx_unlock(NULL); - -#ifdef UDATA_DEBUG - fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name, - newElement->item, u_errorName(subErr), newElement->item->vFuncs); -#endif - - if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) { - *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */ - uprv_free(newElement->name); - uprv_free(newElement->item); - uprv_free(newElement); - return oldValue ? oldValue->item : NULL; - } - - return newElement->item; -} - -/*----------------------------------------------------------------------*============== - * * - * Path management. Could be shared with other tools/etc if need be * - * later on. * - * * - *----------------------------------------------------------------------*/ - -U_NAMESPACE_BEGIN - -class UDataPathIterator -{ -public: - UDataPathIterator(const char *path, const char *pkg, - const char *item, const char *suffix, UBool doCheckLastFour, - UErrorCode *pErrorCode); - const char *next(UErrorCode *pErrorCode); - -private: - const char *path; /* working path (u_icudata_Dir) */ - const char *nextPath; /* path following this one */ - const char *basename; /* item's basename (icudt22e_mt.res)*/ - - StringPiece suffix; /* item suffix (can be null) */ - - uint32_t basenameLen; /* length of basename */ - - CharString itemPath; /* path passed in with item name */ - CharString pathBuffer; /* output path for this it'ion */ - CharString packageStub; /* example: "/icudt28b". Will ignore that leaf in set paths. */ - - UBool checkLastFour; /* if TRUE then allow paths such as '/foo/myapp.dat' - * to match, checks last 4 chars of suffix with - * last 4 of path, then previous chars. */ -}; - -/** - * @param iter The iterator to be initialized. Its current state does not matter. - * @param inPath The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME - * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l - * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat - * @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly. - * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2. - * '/blarg/stuff.dat' would also be found. - * Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case - * the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr"). - */ -UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg, - const char *item, const char *inSuffix, UBool doCheckLastFour, - UErrorCode *pErrorCode) -{ -#ifdef UDATA_DEBUG - fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath); -#endif - /** Path **/ - if(inPath == NULL) { - path = u_getDataDirectory(); - } else { - path = inPath; - } - - /** Package **/ - if(pkg != NULL) { - packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode); -#ifdef UDATA_DEBUG - fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length()); -#endif - } - - /** Item **/ - basename = findBasename(item); - basenameLen = (int32_t)uprv_strlen(basename); - - /** Item path **/ - if(basename == item) { - nextPath = path; - } else { - itemPath.append(item, (int32_t)(basename-item), *pErrorCode); - nextPath = itemPath.data(); - } -#ifdef UDATA_DEBUG - fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, inSuffix); -#endif - - /** Suffix **/ - if(inSuffix != NULL) { - suffix = inSuffix; - } else { - suffix = ""; - } - - checkLastFour = doCheckLastFour; - - /* pathBuffer will hold the output path strings returned by this iterator */ - -#ifdef UDATA_DEBUG - fprintf(stderr, "%p: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n", - iter, - item, - path, - basename, - suffix, - itemPath.data(), - nextPath, - checkLastFour?"TRUE":"false"); -#endif -} - -/** - * Get the next path on the list. - * - * @param iter The Iter to be used - * @param len If set, pointer to the length of the returned path, for convenience. - * @return Pointer to the next path segment, or NULL if there are no more. - */ -const char *UDataPathIterator::next(UErrorCode *pErrorCode) -{ - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - - const char *currentPath = NULL; - int32_t pathLen = 0; - const char *pathBasename; - - do - { - if( nextPath == NULL ) { - break; - } - currentPath = nextPath; - - if(nextPath == itemPath.data()) { /* we were processing item's path. */ - nextPath = path; /* start with regular path next tm. */ - pathLen = (int32_t)uprv_strlen(currentPath); - } else { - /* fix up next for next time */ - nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR); - if(nextPath == NULL) { - /* segment: entire path */ - pathLen = (int32_t)uprv_strlen(currentPath); - } else { - /* segment: until next segment */ - pathLen = (int32_t)(nextPath - currentPath); - /* skip divider */ - nextPath ++; - } - } - - if(pathLen == 0) { - continue; - } - -#ifdef UDATA_DEBUG - fprintf(stderr, "rest of path (IDD) = %s\n", currentPath); - fprintf(stderr, " "); - { - uint32_t qqq; - for(qqq=0;qqq=4) && - uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */ - uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */ - uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */ - -#ifdef UDATA_DEBUG - fprintf(stderr, "Have %s file on the path: %s\n", suffix, pathBuffer.data()); -#endif - /* do nothing */ - } - else - { /* regular dir path */ - if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) { - if((pathLen>=4) && - uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0) - { -#ifdef UDATA_DEBUG - fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data()); -#endif - continue; - } - - /* Check if it is a directory with the same name as our package */ - if(!packageStub.isEmpty() && - (pathLen > packageStub.length()) && - !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) { -#ifdef UDATA_DEBUG - fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen); -#endif - pathBuffer.truncate(pathLen - packageStub.length()); - } - pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode); - } - - /* + basename */ - pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode); - - if (!suffix.empty()) /* tack on suffix */ - { - if (suffix.length() > 4) { - // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res") - // then we need to ensure that the path ends with a separator. - pathBuffer.ensureEndsWithFileSeparator(*pErrorCode); - } - pathBuffer.append(suffix, *pErrorCode); - } - } - -#ifdef UDATA_DEBUG - fprintf(stderr, " --> %s\n", pathBuffer.data()); -#endif - - return pathBuffer.data(); - - } while(path); - - /* fell way off the end */ - return NULL; -} - -U_NAMESPACE_END - -/* ==================================================================================*/ - - -/*----------------------------------------------------------------------* - * * - * Add a static reference to the common data library * - * Unless overridden by an explicit udata_setCommonData, this will be * - * our common data. * - * * - *----------------------------------------------------------------------*/ -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time -extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT; -#endif - -/* - * This would be a good place for weak-linkage declarations of - * partial-data-library access functions where each returns a pointer - * to its data package, if it is linked in. - */ -/* -extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK; -extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK; -*/ - -/*----------------------------------------------------------------------* - * * - * openCommonData Attempt to open a common format (.dat) file * - * Map it into memory (if it's not there already) * - * and return a UDataMemory object for it. * - * * - * If the requested data is already open and cached * - * just return the cached UDataMem object. * - * * - *----------------------------------------------------------------------*/ -static UDataMemory * -openCommonData(const char *path, /* Path from OpenChoice? */ - int32_t commonDataIndex, /* ICU Data (index >= 0) if path == NULL */ - UErrorCode *pErrorCode) -{ - UDataMemory tData; - const char *pathBuffer; - const char *inBasename; - - if (U_FAILURE(*pErrorCode)) { - return NULL; - } - - UDataMemory_init(&tData); - - /* ??????? TODO revisit this */ - if (commonDataIndex >= 0) { - /* "mini-cache" for common ICU data */ - if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) { - return NULL; - } - { - Mutex lock; - if(gCommonICUDataArray[commonDataIndex] != NULL) { - return gCommonICUDataArray[commonDataIndex]; - } -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time - int32_t i; - for(i = 0; i < commonDataIndex; ++i) { - if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) { - /* The linked-in data is already in the list. */ - return NULL; - } - } -#endif - } - - /* Add the linked-in data to the list. */ - /* - * This is where we would check and call weakly linked partial-data-library - * access functions. - */ - /* - if (uprv_getICUData_collation) { - setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode); - } - if (uprv_getICUData_conversion) { - setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode); - } - */ -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time - setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode); - { - Mutex lock; - return gCommonICUDataArray[commonDataIndex]; - } -#endif - } - - - /* request is NOT for ICU Data. */ - - /* Find the base name portion of the supplied path. */ - /* inBasename will be left pointing somewhere within the original path string. */ - inBasename = findBasename(path); -#ifdef UDATA_DEBUG - fprintf(stderr, "inBasename = %s\n", inBasename); -#endif - - if(*inBasename==0) { - /* no basename. This will happen if the original path was a directory name, */ - /* like "a/b/c/". (Fallback to separate files will still work.) */ -#ifdef UDATA_DEBUG - fprintf(stderr, "ocd: no basename in %s, bailing.\n", path); -#endif - if (U_SUCCESS(*pErrorCode)) { - *pErrorCode=U_FILE_ACCESS_ERROR; - } - return NULL; - } - - /* Is the requested common data file already open and cached? */ - /* Note that the cache is keyed by the base name only. The rest of the path, */ - /* if any, is not considered. */ - UDataMemory *dataToReturn = udata_findCachedData(inBasename, *pErrorCode); - if (dataToReturn != NULL || U_FAILURE(*pErrorCode)) { - return dataToReturn; - } - - /* Requested item is not in the cache. - * Hunt it down, trying all the path locations - */ - - UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode); - - while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL) - { -#ifdef UDATA_DEBUG - fprintf(stderr, "ocd: trying path %s - ", pathBuffer); -#endif - uprv_mapFile(&tData, pathBuffer, pErrorCode); -#ifdef UDATA_DEBUG - fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded"); -#endif - } - if (U_FAILURE(*pErrorCode)) { - return NULL; - } - -#if defined(OS390_STUBDATA) && defined(OS390BATCH) - if (!UDataMemory_isLoaded(&tData)) { - char ourPathBuffer[1024]; - /* One more chance, for extendCommonData() */ - uprv_strncpy(ourPathBuffer, path, 1019); - ourPathBuffer[1019]=0; - uprv_strcat(ourPathBuffer, ".dat"); - uprv_mapFile(&tData, ourPathBuffer, pErrorCode); - } -#endif - - if (U_FAILURE(*pErrorCode)) { - return NULL; - } - if (!UDataMemory_isLoaded(&tData)) { - /* no common data */ - *pErrorCode=U_FILE_ACCESS_ERROR; - return NULL; - } - - /* we have mapped a file, check its header */ - udata_checkCommonData(&tData, pErrorCode); - - - /* Cache the UDataMemory struct for this .dat file, - * so we won't need to hunt it down and map it again next time - * something is needed from it. */ - return udata_cacheDataItem(inBasename, &tData, pErrorCode); -} - - -/*----------------------------------------------------------------------* - * * - * extendICUData If the full set of ICU data was not loaded at * - * program startup, load it now. This function will * - * be called when the lookup of an ICU data item in * - * the common ICU data fails. * - * * - * return true if new data is loaded, false otherwise.* - * * - *----------------------------------------------------------------------*/ -static UBool extendICUData(UErrorCode *pErr) -{ - UDataMemory *pData; - UDataMemory copyPData; - UBool didUpdate = FALSE; - - /* - * There is a chance for a race condition here. - * Normally, ICU data is loaded from a DLL or via mmap() and - * setCommonICUData() will detect if the same address is set twice. - * If ICU is built with data loading via fread() then the address will - * be different each time the common data is loaded and we may add - * multiple copies of the data. - * In this case, use a mutex to prevent the race. - * Use a specific mutex to avoid nested locks of the global mutex. - */ -#if MAP_IMPLEMENTATION==MAP_STDIO - static UMutex extendICUDataMutex = U_MUTEX_INITIALIZER; - umtx_lock(&extendICUDataMutex); -#endif - if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) { - /* See if we can explicitly open a .dat file for the ICUData. */ - pData = openCommonData( - U_ICUDATA_NAME, /* "icudt20l" , for example. */ - -1, /* Pretend we're not opening ICUData */ - pErr); - - /* How about if there is no pData, eh... */ - - UDataMemory_init(©PData); - if(pData != NULL) { - UDatamemory_assign(©PData, pData); - copyPData.map = 0; /* The mapping for this data is owned by the hash table */ - copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */ - /* CommonICUData is also unmapped when ICU is shut down.*/ - /* To avoid unmapping the data twice, zero out the map */ - /* fields in the UDataMemory that we're assigning */ - /* to CommonICUData. */ - - didUpdate = /* no longer using this result */ - setCommonICUData(©PData,/* The new common data. */ - FALSE, /* No warnings if write didn't happen */ - pErr); /* setCommonICUData honors errors; NOP if error set */ - } - - umtx_storeRelease(gHaveTriedToLoadCommonData, 1); - } - - didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr); /* Return 'true' when a racing writes out the extended */ - /* data after another thread has failed to see it (in openCommonData), so */ - /* extended data can be examined. */ - /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */ - -#if MAP_IMPLEMENTATION==MAP_STDIO - umtx_unlock(&extendICUDataMutex); -#endif - return didUpdate; /* Return true if ICUData pointer was updated. */ - /* (Could potentially have been done by another thread racing */ - /* us through here, but that's fine, we still return true */ - /* so that current thread will also examine extended data. */ -} - -/*----------------------------------------------------------------------* - * * - * udata_setCommonData * - * * - *----------------------------------------------------------------------*/ -U_CAPI void U_EXPORT2 -udata_setCommonData(const void *data, UErrorCode *pErrorCode) { - UDataMemory dataMemory; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - - if(data==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* set the data pointer and test for validity */ - UDataMemory_init(&dataMemory); - UDataMemory_setData(&dataMemory, data); - udata_checkCommonData(&dataMemory, pErrorCode); - if (U_FAILURE(*pErrorCode)) {return;} - - /* we have good data */ - /* Set it up as the ICU Common Data. */ - setCommonICUData(&dataMemory, TRUE, pErrorCode); -} - -/*--------------------------------------------------------------------------- - * - * udata_setAppData - * - *---------------------------------------------------------------------------- */ -U_CAPI void U_EXPORT2 -udata_setAppData(const char *path, const void *data, UErrorCode *err) -{ - UDataMemory udm; - - if(err==NULL || U_FAILURE(*err)) { - return; - } - if(data==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - UDataMemory_init(&udm); - UDataMemory_setData(&udm, data); - udata_checkCommonData(&udm, err); - udata_cacheDataItem(path, &udm, err); -} - -/*----------------------------------------------------------------------------* - * * - * checkDataItem Given a freshly located/loaded data item, either * - * an entry in a common file or a separately loaded file, * - * sanity check its header, and see if the data is * - * acceptable to the app. * - * If the data is good, create and return a UDataMemory * - * object that can be returned to the application. * - * Return NULL on any sort of failure. * - * * - *----------------------------------------------------------------------------*/ -static UDataMemory * -checkDataItem -( - const DataHeader *pHeader, /* The data item to be checked. */ - UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */ - void *context, /* pass-thru param for above. */ - const char *type, /* pass-thru param for above. */ - const char *name, /* pass-thru param for above. */ - UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */ - /* but openChoice should continue with */ - /* trying to get data from fallback path. */ - UErrorCode *fatalErr /* Bad error, caller should return immediately */ - ) -{ - UDataMemory *rDataMem = NULL; /* the new UDataMemory, to be returned. */ - - if (U_FAILURE(*fatalErr)) { - return NULL; - } - - if(pHeader->dataHeader.magic1==0xda && - pHeader->dataHeader.magic2==0x27 && - (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info)) - ) { - rDataMem=UDataMemory_createNewInstance(fatalErr); - if (U_FAILURE(*fatalErr)) { - return NULL; - } - rDataMem->pHeader = pHeader; - } else { - /* the data is not acceptable, look further */ - /* If we eventually find something good, this errorcode will be */ - /* cleared out. */ - *nonFatalErr=U_INVALID_FORMAT_ERROR; - } - return rDataMem; -} - -/** - * @return 0 if not loaded, 1 if loaded or err - */ -static UDataMemory *doLoadFromIndividualFiles(const char *pkgName, - const char *dataPath, const char *tocEntryPathSuffix, - /* following arguments are the same as doOpenChoice itself */ - const char *path, const char *type, const char *name, - UDataMemoryIsAcceptable *isAcceptable, void *context, - UErrorCode *subErrorCode, - UErrorCode *pErrorCode) -{ - const char *pathBuffer; - UDataMemory dataMemory; - UDataMemory *pEntryData; - - /* look in ind. files: package\nam.typ ========================= */ - /* init path iterator for individual files */ - UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode); - - while ((pathBuffer = iter.next(pErrorCode)) != NULL) - { -#ifdef UDATA_DEBUG - fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer); -#endif - if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode)) - { - pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode); - if (pEntryData != NULL) { - /* Data is good. - * Hand off ownership of the backing memory to the user's UDataMemory. - * and return it. */ - pEntryData->mapAddr = dataMemory.mapAddr; - pEntryData->map = dataMemory.map; - -#ifdef UDATA_DEBUG - fprintf(stderr, "** Mapped file: %s\n", pathBuffer); -#endif - return pEntryData; - } - - /* the data is not acceptable, or some error occurred. Either way, unmap the memory */ - udata_close(&dataMemory); - - /* If we had a nasty error, bail out completely. */ - if (U_FAILURE(*pErrorCode)) { - return NULL; - } - - /* Otherwise remember that we found data but didn't like it for some reason */ - *subErrorCode=U_INVALID_FORMAT_ERROR; - } -#ifdef UDATA_DEBUG - fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded"); -#endif - } - return NULL; -} - -/** - * @return 0 if not loaded, 1 if loaded or err - */ -static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/, - const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName, - /* following arguments are the same as doOpenChoice itself */ - const char *path, const char *type, const char *name, - UDataMemoryIsAcceptable *isAcceptable, void *context, - UErrorCode *subErrorCode, - UErrorCode *pErrorCode) -{ - UDataMemory *pEntryData; - const DataHeader *pHeader; - UDataMemory *pCommonData; - int32_t commonDataIndex; - UBool checkedExtendedICUData = FALSE; - /* try to get common data. The loop is for platforms such as the 390 that do - * not initially load the full set of ICU data. If the lookup of an ICU data item - * fails, the full (but slower to load) set is loaded, the and the loop repeats, - * trying the lookup again. Once the full set of ICU data is loaded, the loop wont - * repeat because the full set will be checked the first time through. - * - * The loop also handles the fallback to a .dat file if the application linked - * to the stub data library rather than a real library. - */ - for (commonDataIndex = isICUData ? 0 : -1;;) { - pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/ - - if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) { - int32_t length; - - /* look up the data piece in the common data */ - pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode); -#ifdef UDATA_DEBUG - fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, pHeader, u_errorName(*subErrorCode)); -#endif - - if(pHeader!=NULL) { - pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode); -#ifdef UDATA_DEBUG - fprintf(stderr, "pEntryData=%p\n", pEntryData); -#endif - if (U_FAILURE(*pErrorCode)) { - return NULL; - } - if (pEntryData != NULL) { - pEntryData->length = length; - return pEntryData; - } - } - } - // If we failed due to being out-of-memory, then stop early and report the error. - if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) { - *pErrorCode = *subErrorCode; - return NULL; - } - /* Data wasn't found. If we were looking for an ICUData item and there is - * more data available, load it and try again, - * otherwise break out of this loop. */ - if (!isICUData) { - return NULL; - } else if (pCommonData != NULL) { - ++commonDataIndex; /* try the next data package */ - } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) { - checkedExtendedICUData = TRUE; - /* try this data package slot again: it changed from NULL to non-NULL */ - } else { - return NULL; - } - } -} - -/* - * Identify the Time Zone resources that are subject to special override data loading. - */ -static UBool isTimeZoneFile(const char *name, const char *type) { - return ((uprv_strcmp(type, "res") == 0) && - (uprv_strcmp(name, "zoneinfo64") == 0 || - uprv_strcmp(name, "timezoneTypes") == 0 || - uprv_strcmp(name, "windowsZones") == 0 || - uprv_strcmp(name, "metaZones") == 0)); -} - -/* - * A note on the ownership of Mapped Memory - * - * For common format files, ownership resides with the UDataMemory object - * that lives in the cache of opened common data. These UDataMemorys are private - * to the udata implementation, and are never seen directly by users. - * - * The UDataMemory objects returned to users will have the address of some desired - * data within the mapped region, but they wont have the mapping info itself, and thus - * won't cause anything to be removed from memory when they are closed. - * - * For individual data files, the UDataMemory returned to the user holds the - * information necessary to unmap the data on close. If the user independently - * opens the same data file twice, two completely independent mappings will be made. - * (There is no cache of opened data items from individual files, only a cache of - * opened Common Data files, that is, files containing a collection of data items.) - * - * For common data passed in from the user via udata_setAppData() or - * udata_setCommonData(), ownership remains with the user. - * - * UDataMemory objects themselves, as opposed to the memory they describe, - * can be anywhere - heap, stack/local or global. - * They have a flag to indicate when they're heap allocated and thus - * must be deleted when closed. - */ - - -/*----------------------------------------------------------------------------* - * * - * main data loading functions * - * * - *----------------------------------------------------------------------------*/ -static UDataMemory * -doOpenChoice(const char *path, const char *type, const char *name, - UDataMemoryIsAcceptable *isAcceptable, void *context, - UErrorCode *pErrorCode) -{ - UDataMemory *retVal = NULL; - - const char *dataPath; - - int32_t tocEntrySuffixIndex; - const char *tocEntryPathSuffix; - UErrorCode subErrorCode=U_ZERO_ERROR; - const char *treeChar; - - UBool isICUData = FALSE; - - - /* Is this path ICU data? */ - if(path == NULL || - !strcmp(path, U_ICUDATA_ALIAS) || /* "ICUDATA" */ - !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */ - uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) || - !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */ - uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) { - isICUData = TRUE; - } - -#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* Windows: try "foo\bar" and "foo/bar" */ - /* remap from alternate path char to the main one */ - CharString altSepPath; - if(path) { - if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) { - altSepPath.append(path, *pErrorCode); - char *p; - while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) { - *p = U_FILE_SEP_CHAR; - } -#if defined (UDATA_DEBUG) - fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s); -#endif - path = altSepPath.data(); - } - } -#endif - - CharString tocEntryName; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */ - CharString tocEntryPath; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */ - - CharString pkgName; - CharString treeName; - - /* ======= Set up strings */ - if(path==NULL) { - pkgName.append(U_ICUDATA_NAME, *pErrorCode); - } else { - const char *pkg; - const char *first; - pkg = uprv_strrchr(path, U_FILE_SEP_CHAR); - first = uprv_strchr(path, U_FILE_SEP_CHAR); - if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */ - /* see if this is an /absolute/path/to/package path */ - if(pkg) { - pkgName.append(pkg+1, *pErrorCode); - } else { - pkgName.append(path, *pErrorCode); - } - } else { - treeChar = uprv_strchr(path, U_TREE_SEPARATOR); - if(treeChar) { - treeName.append(treeChar+1, *pErrorCode); /* following '-' */ - if(isICUData) { - pkgName.append(U_ICUDATA_NAME, *pErrorCode); - } else { - pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode); - if (first == NULL) { - /* - This user data has no path, but there is a tree name. - Look up the correct path from the data cache later. - */ - path = pkgName.data(); - } - } - } else { - if(isICUData) { - pkgName.append(U_ICUDATA_NAME, *pErrorCode); - } else { - pkgName.append(path, *pErrorCode); - } - } - } - } - -#ifdef UDATA_DEBUG - fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data()); -#endif - - /* setting up the entry name and file name - * Make up a full name by appending the type to the supplied - * name, assuming that a type was supplied. - */ - - /* prepend the package */ - tocEntryName.append(pkgName, *pErrorCode); - tocEntryPath.append(pkgName, *pErrorCode); - tocEntrySuffixIndex = tocEntryName.length(); - - if(!treeName.isEmpty()) { - tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode); - tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode); - } - - tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode); - tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode); - if(type!=NULL && *type!=0) { - tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode); - tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode); - } - // The +1 is for the U_FILE_SEP_CHAR that is always appended above. - tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */ - -#ifdef UDATA_DEBUG - fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data()); - fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data()); -#endif - -#if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time - if(path == NULL) { - path = COMMON_DATA_NAME; /* "icudt26e" */ - } -#else - // Windows UWP expects only a single data file. - path = COMMON_DATA_NAME; /* "icudt26e" */ -#endif - - /************************ Begin loop looking for ind. files ***************/ -#ifdef UDATA_DEBUG - fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path)); -#endif - - /* End of dealing with a null basename */ - dataPath = u_getDataDirectory(); - - /**** Time zone individual files override */ - if (isICUData && isTimeZoneFile(name, type)) { - const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode); - if (tzFilesDir[0] != 0) { -#ifdef UDATA_DEBUG - fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir); -#endif - retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix, - /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode); - if((retVal != NULL) || U_FAILURE(*pErrorCode)) { - return retVal; - } - } - } - - /**** COMMON PACKAGE - only if packages are first. */ - if(gDataFileAccess == UDATA_PACKAGES_FIRST) { -#ifdef UDATA_DEBUG - fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n"); -#endif - /* #2 */ - retVal = doLoadFromCommonData(isICUData, - pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(), - path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); - if((retVal != NULL) || U_FAILURE(*pErrorCode)) { - return retVal; - } - } - - /**** INDIVIDUAL FILES */ - if((gDataFileAccess==UDATA_PACKAGES_FIRST) || - (gDataFileAccess==UDATA_FILES_FIRST)) { -#ifdef UDATA_DEBUG - fprintf(stderr, "Trying individual files\n"); -#endif - /* Check to make sure that there is a dataPath to iterate over */ - if ((dataPath && *dataPath) || !isICUData) { - retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix, - path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); - if((retVal != NULL) || U_FAILURE(*pErrorCode)) { - return retVal; - } - } - } - - /**** COMMON PACKAGE */ - if((gDataFileAccess==UDATA_ONLY_PACKAGES) || - (gDataFileAccess==UDATA_FILES_FIRST)) { -#ifdef UDATA_DEBUG - fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n"); -#endif - retVal = doLoadFromCommonData(isICUData, - pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(), - path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); - if((retVal != NULL) || U_FAILURE(*pErrorCode)) { - return retVal; - } - } - - /* Load from DLL. If we haven't attempted package load, we also haven't had any chance to - try a DLL (static or setCommonData/etc) load. - If we ever have a "UDATA_ONLY_FILES", add it to the or list here. */ - if(gDataFileAccess==UDATA_NO_FILES) { -#ifdef UDATA_DEBUG - fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n"); -#endif - retVal = doLoadFromCommonData(isICUData, - pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(), - path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); - if((retVal != NULL) || U_FAILURE(*pErrorCode)) { - return retVal; - } - } - - /* data not found */ - if(U_SUCCESS(*pErrorCode)) { - if(U_SUCCESS(subErrorCode)) { - /* file not found */ - *pErrorCode=U_FILE_ACCESS_ERROR; - } else { - /* entry point not found or rejected */ - *pErrorCode=subErrorCode; - } - } - return retVal; -} - - - -/* API ---------------------------------------------------------------------- */ - -U_CAPI UDataMemory * U_EXPORT2 -udata_open(const char *path, const char *type, const char *name, - UErrorCode *pErrorCode) { -#ifdef UDATA_DEBUG - fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type); - fflush(stderr); -#endif - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } else if(name==NULL || *name==0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } else { - return doOpenChoice(path, type, name, NULL, NULL, pErrorCode); - } -} - - - -U_CAPI UDataMemory * U_EXPORT2 -udata_openChoice(const char *path, const char *type, const char *name, - UDataMemoryIsAcceptable *isAcceptable, void *context, - UErrorCode *pErrorCode) { -#ifdef UDATA_DEBUG - fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type); -#endif - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } else if(name==NULL || *name==0 || isAcceptable==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } else { - return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode); - } -} - - - -U_CAPI void U_EXPORT2 -udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) { - if(pInfo!=NULL) { - if(pData!=NULL && pData->pHeader!=NULL) { - const UDataInfo *info=&pData->pHeader->info; - uint16_t dataInfoSize=udata_getInfoSize(info); - if(pInfo->size>dataInfoSize) { - pInfo->size=dataInfoSize; - } - uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2); - if(info->isBigEndian!=U_IS_BIG_ENDIAN) { - /* opposite endianness */ - uint16_t x=info->reservedWord; - pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8)); - } - } else { - pInfo->size=0; - } - } -} - - -U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/) -{ - // Note: this function is documented as not thread safe. - gDataFileAccess = access; -} diff --git a/deps/node/deps/icu-small/source/common/udatamem.cpp b/deps/node/deps/icu-small/source/common/udatamem.cpp deleted file mode 100644 index 6bf7c012..00000000 --- a/deps/node/deps/icu-small/source/common/udatamem.cpp +++ /dev/null @@ -1,161 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*---------------------------------------------------------------------------------- - * - * UDataMemory A class-like struct that serves as a handle to a piece of memory - * that contains some ICU data (resource, converters, whatever.) - * - * When an application opens ICU data (with udata_open, for example, - * a UDataMemory * is returned. - * - *----------------------------------------------------------------------------------*/ - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "unicode/udata.h" - -#include "udatamem.h" - -U_CFUNC void UDataMemory_init(UDataMemory *This) { - uprv_memset(This, 0, sizeof(UDataMemory)); - This->length=-1; -} - - -U_CFUNC void UDatamemory_assign(UDataMemory *dest, UDataMemory *source) { - /* UDataMemory Assignment. Destination UDataMemory must be initialized first. */ - UBool mallocedFlag = dest->heapAllocated; - uprv_memcpy(dest, source, sizeof(UDataMemory)); - dest->heapAllocated = mallocedFlag; -} - -U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) { - UDataMemory *This; - - if (U_FAILURE(*pErr)) { - return NULL; - } - This = (UDataMemory *)uprv_malloc(sizeof(UDataMemory)); - if (This == NULL) { - *pErr = U_MEMORY_ALLOCATION_ERROR; } - else { - UDataMemory_init(This); - This->heapAllocated = TRUE; - } - return This; -} - - -U_CFUNC const DataHeader * -UDataMemory_normalizeDataPointer(const void *p) { - /* allow the data to be optionally prepended with an alignment-forcing double value */ - const DataHeader *pdh = (const DataHeader *)p; - if(pdh==NULL || (pdh->dataHeader.magic1==0xda && pdh->dataHeader.magic2==0x27)) { - return pdh; - } else { -#if U_PLATFORM == U_PF_OS400 - /* - TODO: Fix this once the compiler implements this feature. Keep in sync with genccode.c - - This is here because this platform can't currently put - const data into the read-only pages of an object or - shared library (service program). Only strings are allowed in read-only - pages, so we use char * strings to store the data. - - In order to prevent the beginning of the data from ever matching the - magic numbers we must skip the initial double. - [grhoten 4/24/2003] - */ - return (const DataHeader *)*((const void **)p+1); -#else - return (const DataHeader *)((const double *)p+1); -#endif - } -} - - -U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr) { - This->pHeader = UDataMemory_normalizeDataPointer(dataAddr); -} - - -U_CAPI void U_EXPORT2 -udata_close(UDataMemory *pData) { - if(pData!=NULL) { - uprv_unmapFile(pData); - if(pData->heapAllocated ) { - uprv_free(pData); - } else { - UDataMemory_init(pData); - } - } -} - -U_CAPI const void * U_EXPORT2 -udata_getMemory(UDataMemory *pData) { - if(pData!=NULL && pData->pHeader!=NULL) { - return (char *)(pData->pHeader)+udata_getHeaderSize(pData->pHeader); - } else { - return NULL; - } -} - -/** - * Get the length of the data item if possible. - * The length may be up to 15 bytes larger than the actual data. - * - * TODO Consider making this function public. - * It would have to return the actual length in more cases. - * For example, the length of the last item in a .dat package could be - * computed from the size of the whole .dat package minus the offset of the - * last item. - * The size of a file that was directly memory-mapped could be determined - * using some system API. - * - * In order to get perfect values for all data items, we may have to add a - * length field to UDataInfo, but that complicates data generation - * and may be overkill. - * - * @param pData The data item. - * @return the length of the data item, or -1 if not known - * @internal Currently used only in cintltst/udatatst.c - */ -U_CAPI int32_t U_EXPORT2 -udata_getLength(const UDataMemory *pData) { - if(pData!=NULL && pData->pHeader!=NULL && pData->length>=0) { - /* - * subtract the header size, - * return only the size of the actual data starting at udata_getMemory() - */ - return pData->length-udata_getHeaderSize(pData->pHeader); - } else { - return -1; - } -} - -/** - * Get the memory including the data header. - * Used in cintltst/udatatst.c - * @internal - */ -U_CAPI const void * U_EXPORT2 -udata_getRawMemory(const UDataMemory *pData) { - if(pData!=NULL && pData->pHeader!=NULL) { - return pData->pHeader; - } else { - return NULL; - } -} - -U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This) { - return This->pHeader != NULL; -} diff --git a/deps/node/deps/icu-small/source/common/udatamem.h b/deps/node/deps/icu-small/source/common/udatamem.h deleted file mode 100644 index a05dd697..00000000 --- a/deps/node/deps/icu-small/source/common/udatamem.h +++ /dev/null @@ -1,61 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*---------------------------------------------------------------------------------- - * - * UDataMemory A class-like struct that serves as a handle to a piece of memory - * that contains some ICU data (resource, converters, whatever.) - * - * When an application opens ICU data (with udata_open, for example, - * a UDataMemory * is returned. - * - *----------------------------------------------------------------------------------*/ -#ifndef __UDATAMEM_H__ -#define __UDATAMEM_H__ - -#include "unicode/udata.h" -#include "ucmndata.h" - -struct UDataMemory { - const commonDataFuncs *vFuncs; /* Function Pointers for accessing TOC */ - - const DataHeader *pHeader; /* Header of the memory being described by this */ - /* UDataMemory object. */ - const void *toc; /* For common memory, table of contents for */ - /* the pieces within. */ - UBool heapAllocated; /* True if this UDataMemory Object is on the */ - /* heap and thus needs to be deleted when closed. */ - - void *mapAddr; /* For mapped or allocated memory, the start addr. */ - /* Only non-null if a close operation should unmap */ - /* the associated data. */ - void *map; /* Handle, or other data, OS dependent. */ - /* Only non-null if a close operation should unmap */ - /* the associated data, and additional info */ - /* beyond the mapAddr is needed to do that. */ - int32_t length; /* Length of the data in bytes; -1 if unknown. */ -}; - -U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr); -U_CFUNC void UDatamemory_assign (UDataMemory *dest, UDataMemory *source); -U_CFUNC void UDataMemory_init (UDataMemory *This); -U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This); -U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr); - -U_CFUNC const DataHeader *UDataMemory_normalizeDataPointer(const void *p); - -U_CAPI int32_t U_EXPORT2 -udata_getLength(const UDataMemory *pData); - -U_CAPI const void * U_EXPORT2 -udata_getRawMemory(const UDataMemory *pData); - -#endif diff --git a/deps/node/deps/icu-small/source/common/udataswp.cpp b/deps/node/deps/icu-small/source/common/udataswp.cpp deleted file mode 100644 index 86f302bd..00000000 --- a/deps/node/deps/icu-small/source/common/udataswp.cpp +++ /dev/null @@ -1,473 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: udataswp.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003jun05 -* created by: Markus W. Scherer -* -* Definitions for ICU data transformations for different platforms, -* changing between big- and little-endian data and/or between -* charset families (ASCII<->EBCDIC). -*/ - -#include -#include "unicode/utypes.h" -#include "unicode/udata.h" /* UDataInfo */ -#include "ucmndata.h" /* DataHeader */ -#include "cmemory.h" -#include "udataswp.h" - -/* swapping primitives ------------------------------------------------------ */ - -static int32_t U_CALLCONV -uprv_swapArray16(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint16_t *p; - uint16_t *q; - int32_t count; - uint16_t x; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - p=(const uint16_t *)inData; - q=(uint16_t *)outData; - count=length/2; - while(count>0) { - x=*p++; - *q++=(uint16_t)((x<<8)|(x>>8)); - --count; - } - - return length; -} - -static int32_t U_CALLCONV -uprv_copyArray16(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - return length; -} - -static int32_t U_CALLCONV -uprv_swapArray32(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint32_t *p; - uint32_t *q; - int32_t count; - uint32_t x; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - p=(const uint32_t *)inData; - q=(uint32_t *)outData; - count=length/4; - while(count>0) { - x=*p++; - *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); - --count; - } - - return length; -} - -static int32_t U_CALLCONV -uprv_copyArray32(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - return length; -} - -static int32_t U_CALLCONV -uprv_swapArray64(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint64_t *p; - uint64_t *q; - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - p=(const uint64_t *)inData; - q=(uint64_t *)outData; - count=length/8; - while(count>0) { - uint64_t x=*p++; - x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)| - ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56); - *q++=x; - --count; - } - - return length; -} - -static int32_t U_CALLCONV -uprv_copyArray64(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - return length; -} - -static uint16_t U_CALLCONV -uprv_readSwapUInt16(uint16_t x) { - return (uint16_t)((x<<8)|(x>>8)); -} - -static uint16_t U_CALLCONV -uprv_readDirectUInt16(uint16_t x) { - return x; -} - -static uint32_t U_CALLCONV -uprv_readSwapUInt32(uint32_t x) { - return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); -} - -static uint32_t U_CALLCONV -uprv_readDirectUInt32(uint32_t x) { - return x; -} - -static void U_CALLCONV -uprv_writeSwapUInt16(uint16_t *p, uint16_t x) { - *p=(uint16_t)((x<<8)|(x>>8)); -} - -static void U_CALLCONV -uprv_writeDirectUInt16(uint16_t *p, uint16_t x) { - *p=x; -} - -static void U_CALLCONV -uprv_writeSwapUInt32(uint32_t *p, uint32_t x) { - *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); -} - -static void U_CALLCONV -uprv_writeDirectUInt32(uint32_t *p, uint32_t x) { - *p=x; -} - -U_CAPI int16_t U_EXPORT2 -udata_readInt16(const UDataSwapper *ds, int16_t x) { - return (int16_t)ds->readUInt16((uint16_t)x); -} - -U_CAPI int32_t U_EXPORT2 -udata_readInt32(const UDataSwapper *ds, int32_t x) { - return (int32_t)ds->readUInt32((uint32_t)x); -} - -/** - * Swap a block of invariant, NUL-terminated strings, but not padding - * bytes after the last string. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -udata_swapInvStringBlock(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const char *inChars; - int32_t stringsLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* reduce the strings length to not include bytes after the last NUL */ - inChars=(const char *)inData; - stringsLength=length; - while(stringsLength>0 && inChars[stringsLength-1]!=0) { - --stringsLength; - } - - /* swap up to the last NUL */ - ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode); - - /* copy the bytes after the last NUL */ - if(inData!=outData && length>stringsLength) { - uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength); - } - - /* return the length including padding bytes */ - if(U_SUCCESS(*pErrorCode)) { - return length; - } else { - return 0; - } -} - -U_CAPI void U_EXPORT2 -udata_printError(const UDataSwapper *ds, - const char *fmt, - ...) { - va_list args; - - if(ds->printError!=NULL) { - va_start(args, fmt); - ds->printError(ds->printErrorContext, fmt, args); - va_end(args); - } -} - -/* swap a data header ------------------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -udata_swapDataHeader(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const DataHeader *pHeader; - uint16_t headerSize, infoSize; - - /* argument checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* check minimum length and magic bytes */ - pHeader=(const DataHeader *)inData; - if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || - pHeader->dataHeader.magic1!=0xda || - pHeader->dataHeader.magic2!=0x27 || - pHeader->info.sizeofUChar!=2 - ) { - udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n"); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - headerSize=ds->readUInt16(pHeader->dataHeader.headerSize); - infoSize=ds->readUInt16(pHeader->info.size); - - if( headerSizedataHeader)+infoSize) || - (length>=0 && length0) { - DataHeader *outHeader; - const char *s; - int32_t maxLength; - - /* Most of the fields are just bytes and need no swapping. */ - if(inData!=outData) { - uprv_memcpy(outData, inData, headerSize); - } - outHeader=(DataHeader *)outData; - - outHeader->info.isBigEndian = ds->outIsBigEndian; - outHeader->info.charsetFamily = ds->outCharset; - - /* swap headerSize */ - ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode); - - /* swap UDataInfo size and reservedWord */ - ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode); - - /* swap copyright statement after the UDataInfo */ - infoSize+=sizeof(pHeader->dataHeader); - s=(const char *)inData+infoSize; - maxLength=headerSize-infoSize; - /* get the length of the string */ - for(length=0; lengthswapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode); - } - - return headerSize; -} - -/* API functions ------------------------------------------------------------ */ - -U_CAPI UDataSwapper * U_EXPORT2 -udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, - UBool outIsBigEndian, uint8_t outCharset, - UErrorCode *pErrorCode) { - UDataSwapper *swapper; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } - if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - /* allocate the swapper */ - swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper)); - if(swapper==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memset(swapper, 0, sizeof(UDataSwapper)); - - /* set values and functions pointers according to in/out parameters */ - swapper->inIsBigEndian=inIsBigEndian; - swapper->inCharset=inCharset; - swapper->outIsBigEndian=outIsBigEndian; - swapper->outCharset=outCharset; - - swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16; - swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32; - - swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16; - swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32; - - swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic; - - if(inIsBigEndian==outIsBigEndian) { - swapper->swapArray16=uprv_copyArray16; - swapper->swapArray32=uprv_copyArray32; - swapper->swapArray64=uprv_copyArray64; - } else { - swapper->swapArray16=uprv_swapArray16; - swapper->swapArray32=uprv_swapArray32; - swapper->swapArray64=uprv_swapArray64; - } - - if(inCharset==U_ASCII_FAMILY) { - swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii; - } else /* U_EBCDIC_FAMILY */ { - swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic; - } - - return swapper; -} - -U_CAPI UDataSwapper * U_EXPORT2 -udata_openSwapperForInputData(const void *data, int32_t length, - UBool outIsBigEndian, uint8_t outCharset, - UErrorCode *pErrorCode) { - const DataHeader *pHeader; - uint16_t headerSize, infoSize; - UBool inIsBigEndian; - int8_t inCharset; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } - if( data==NULL || - (length>=0 && length<(int32_t)sizeof(DataHeader)) || - outCharset>U_EBCDIC_FAMILY - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - pHeader=(const DataHeader *)data; - if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || - pHeader->dataHeader.magic1!=0xda || - pHeader->dataHeader.magic2!=0x27 || - pHeader->info.sizeofUChar!=2 - ) { - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inIsBigEndian=(UBool)pHeader->info.isBigEndian; - inCharset=pHeader->info.charsetFamily; - - if(inIsBigEndian==U_IS_BIG_ENDIAN) { - headerSize=pHeader->dataHeader.headerSize; - infoSize=pHeader->info.size; - } else { - headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize); - infoSize=uprv_readSwapUInt16(pHeader->info.size); - } - - if( headerSizedataHeader)+infoSize) || - (length>=0 && lengthEBCDIC). -*/ - -#ifndef __UDATASWP_H__ -#define __UDATASWP_H__ - -#include -#include "unicode/utypes.h" - -/* forward declaration */ - -U_CDECL_BEGIN - -struct UDataSwapper; -typedef struct UDataSwapper UDataSwapper; - -/** - * Function type for data transformation. - * Transforms data, or just returns the length of the data if - * the input length is -1. - * Swap functions assume that their data pointers are aligned properly. - * - * Quick implementation outline: - * (best to copy and adapt and existing swapper implementation) - * check that the data looks like the expected format - * if(length<0) { - * preflight: - * never dereference outData - * read inData and determine the data size - * assume that inData is long enough for this - * } else { - * outData can be NULL if length==0 - * inData==outData (in-place swapping) possible but not required! - * verify that length>=(actual size) - * if there is a chance that not every byte up to size is reached - * due to padding etc.: - * if(inData!=outData) { - * memcpy(outData, inData, actual size); - * } - * swap contents - * } - * return actual size - * - * Further implementation notes: - * - read integers from inData before swapping them - * because in-place swapping can make them unreadable - * - compareInvChars compares a local Unicode string with already-swapped - * output charset strings - * - * @param ds Pointer to UDataSwapper containing global data about the - * transformation and function pointers for handling primitive - * types. - * @param inData Pointer to the input data to be transformed or examined. - * @param length Length of the data, counting bytes. May be -1 for preflighting. - * If length>=0, then transform the data. - * If length==-1, then only determine the length of the data. - * The length cannot be determined from the data itself for all - * types of data (e.g., not for simple arrays of integers). - * @param outData Pointer to the output data buffer. - * If length>=0 (transformation), then the output buffer must - * have a capacity of at least length. - * If length==-1, then outData will not be used and can be NULL. - * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must - * fulfill U_SUCCESS on input. - * @return The actual length of the data. - * - * @see UDataSwapper - * @internal ICU 2.8 - */ -typedef int32_t U_CALLCONV -UDataSwapFn(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Convert one uint16_t from input to platform endianness. - * @internal ICU 2.8 - */ -typedef uint16_t U_CALLCONV -UDataReadUInt16(uint16_t x); - -/** - * Convert one uint32_t from input to platform endianness. - * @internal ICU 2.8 - */ -typedef uint32_t U_CALLCONV -UDataReadUInt32(uint32_t x); - -/** - * Convert one uint16_t from platform to input endianness. - * @internal ICU 2.8 - */ -typedef void U_CALLCONV -UDataWriteUInt16(uint16_t *p, uint16_t x); - -/** - * Convert one uint32_t from platform to input endianness. - * @internal ICU 2.8 - */ -typedef void U_CALLCONV -UDataWriteUInt32(uint32_t *p, uint32_t x); - -/** - * Compare invariant-character strings, one in the output data and the - * other one caller-provided in Unicode. - * An output data string is compared because strings are usually swapped - * before the rest of the data, to allow for sorting of string tables - * according to the output charset. - * You can use -1 for the length parameters of NUL-terminated strings as usual. - * Returns Unicode code point order for invariant characters. - * @internal ICU 2.8 - */ -typedef int32_t U_CALLCONV -UDataCompareInvChars(const UDataSwapper *ds, - const char *outString, int32_t outLength, - const UChar *localString, int32_t localLength); - -/** - * Function for message output when an error occurs during data swapping. - * A format string and variable number of arguments are passed - * like for vprintf(). - * - * @param context A function-specific context pointer. - * @param fmt The format string. - * @param args The arguments for format string inserts. - * - * @internal ICU 2.8 - */ -typedef void U_CALLCONV -UDataPrintError(void *context, const char *fmt, va_list args); - -struct UDataSwapper { - /** Input endianness. @internal ICU 2.8 */ - UBool inIsBigEndian; - /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ - uint8_t inCharset; - /** Output endianness. @internal ICU 2.8 */ - UBool outIsBigEndian; - /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ - uint8_t outCharset; - - /* basic functions for reading data values */ - - /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */ - UDataReadUInt16 *readUInt16; - /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */ - UDataReadUInt32 *readUInt32; - /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */ - UDataCompareInvChars *compareInvChars; - - /* basic functions for writing data values */ - - /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */ - UDataWriteUInt16 *writeUInt16; - /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */ - UDataWriteUInt32 *writeUInt32; - - /* basic functions for data transformations */ - - /** Transform an array of 16-bit integers. @internal ICU 2.8 */ - UDataSwapFn *swapArray16; - /** Transform an array of 32-bit integers. @internal ICU 2.8 */ - UDataSwapFn *swapArray32; - /** Transform an array of 64-bit integers. @internal ICU 53 */ - UDataSwapFn *swapArray64; - /** Transform an invariant-character string. @internal ICU 2.8 */ - UDataSwapFn *swapInvChars; - - /** - * Function for message output when an error occurs during data swapping. - * Can be NULL. - * @internal ICU 2.8 - */ - UDataPrintError *printError; - /** Context pointer for printError. @internal ICU 2.8 */ - void *printErrorContext; -}; - -U_CDECL_END - -U_CAPI UDataSwapper * U_EXPORT2 -udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, - UBool outIsBigEndian, uint8_t outCharset, - UErrorCode *pErrorCode); - -/** - * Open a UDataSwapper for the given input data and the specified output - * characteristics. - * Values of -1 for any of the characteristics mean the local platform's - * characteristics. - * - * @see udata_swap - * @internal ICU 2.8 - */ -U_CAPI UDataSwapper * U_EXPORT2 -udata_openSwapperForInputData(const void *data, int32_t length, - UBool outIsBigEndian, uint8_t outCharset, - UErrorCode *pErrorCode); - -U_CAPI void U_EXPORT2 -udata_closeSwapper(UDataSwapper *ds); - -/** - * Read the beginning of an ICU data piece, recognize magic bytes, - * swap the structure. - * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece. - * - * @return The size of the data header, in bytes. - * - * @internal ICU 2.8 - */ -U_CAPI int32_t U_EXPORT2 -udata_swapDataHeader(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Convert one int16_t from input to platform endianness. - * @internal ICU 2.8 - */ -U_CAPI int16_t U_EXPORT2 -udata_readInt16(const UDataSwapper *ds, int16_t x); - -/** - * Convert one int32_t from input to platform endianness. - * @internal ICU 2.8 - */ -U_CAPI int32_t U_EXPORT2 -udata_readInt32(const UDataSwapper *ds, int32_t x); - -/** - * Swap a block of invariant, NUL-terminated strings, but not padding - * bytes after the last string. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -udata_swapInvStringBlock(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -U_CAPI void U_EXPORT2 -udata_printError(const UDataSwapper *ds, - const char *fmt, - ...); - -/* internal exports from putil.c -------------------------------------------- */ - -/* declared here to keep them out of the public putil.h */ - -/** - * Swap invariant char * strings ASCII->EBCDIC. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -uprv_ebcdicFromAscii(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Copy invariant ASCII char * strings and verify they are invariant. - * @internal - */ -U_CFUNC int32_t -uprv_copyAscii(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swap invariant char * strings EBCDIC->ASCII. - * @internal - */ -U_CFUNC int32_t -uprv_asciiFromEbcdic(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Copy invariant EBCDIC char * strings and verify they are invariant. - * @internal - */ -U_CFUNC int32_t -uprv_copyEbcdic(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Compare ASCII invariant char * with Unicode invariant UChar * - * @internal - */ -U_CFUNC int32_t -uprv_compareInvAscii(const UDataSwapper *ds, - const char *outString, int32_t outLength, - const UChar *localString, int32_t localLength); - -/** - * Compare EBCDIC invariant char * with Unicode invariant UChar * - * @internal - */ -U_CFUNC int32_t -uprv_compareInvEbcdic(const UDataSwapper *ds, - const char *outString, int32_t outLength, - const UChar *localString, int32_t localLength); - -/** - * \def uprv_compareInvWithUChar - * Compare an invariant-character strings with a UChar string - * @internal - */ -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define uprv_compareInvWithUChar uprv_compareInvAscii -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define uprv_compareInvWithUChar uprv_compareInvEbcdic -#else -# error Unknown charset family! -#endif - -// utrie_swap.cpp -----------------------------------------------------------*** - -/** - * Swaps a serialized UTrie. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swaps a serialized UTrie2. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swaps a serialized UCPTrie. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ucptrie_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swaps a serialized UTrie, UTrie2, or UCPTrie. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie_swapAnyVersion(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/* material... -------------------------------------------------------------- */ - -#if 0 - -/* udata.h */ - -/** - * Public API function in udata.c - * - * Same as udata_openChoice() but automatically swaps the data. - * isAcceptable, if not NULL, may accept data with endianness and charset family - * different from the current platform's properties. - * If the data is acceptable and the platform properties do not match, then - * the swap function is called to swap an allocated version of the data. - * Preflighting may or may not be performed depending on whether the size of - * the loaded data item is known. - * - * @param isAcceptable Same as for udata_openChoice(). May be NULL. - * - * @internal ICU 2.8 - */ -U_CAPI UDataMemory * U_EXPORT2 -udata_openSwap(const char *path, const char *type, const char *name, - UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext, - UDataSwapFn *swap, - UDataPrintError *printError, void *printErrorContext, - UErrorCode *pErrorCode); - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/uelement.h b/deps/node/deps/icu-small/source/common/uelement.h deleted file mode 100644 index 9d45f09f..00000000 --- a/deps/node/deps/icu-small/source/common/uelement.h +++ /dev/null @@ -1,91 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: uelement.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011jul04 -* created by: Markus W. Scherer -* -* Common definitions for UHashTable and UVector. -* UHashTok moved here from uhash.h and renamed UElement. -* This allows users of UVector to avoid the confusing #include of uhash.h. -* uhash.h aliases UElement to UHashTok, -* so that we need not change all of its code and its users. -*/ - -#ifndef __UELEMENT_H__ -#define __UELEMENT_H__ - -#include "unicode/utypes.h" - -U_CDECL_BEGIN - -/** - * A UVector element, or a key or value within a UHashtable. - * It may be either a 32-bit integral value or an opaque void* pointer. - * The void* pointer may be smaller than 32 bits (e.g. 24 bits) - * or may be larger (e.g. 64 bits). - * - * Because a UElement is the size of a native pointer or a 32-bit - * integer, we pass it around by value. - */ -union UElement { - void* pointer; - int32_t integer; -}; -typedef union UElement UElement; - -/** - * An element-equality (boolean) comparison function. - * @param e1 An element (object or integer) - * @param e2 An element (object or integer) - * @return TRUE if the two elements are equal. - */ -typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2); - -/** - * An element sorting (three-way) comparison function. - * @param e1 An element (object or integer) - * @param e2 An element (object or integer) - * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2. - */ -typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2); - -/** - * An element assignment function. It may copy an integer, copy - * a pointer, or clone a pointer, as appropriate. - * @param dst The element to be assigned to - * @param src The element to assign from - */ -typedef void U_CALLCONV UElementAssigner(UElement *dst, UElement *src); - -U_CDECL_END - -/** - * Comparator function for UnicodeString* keys. Implements UElementsAreEqual. - * @param key1 The string for comparison - * @param key2 The string for comparison - * @return true if key1 and key2 are equal, return false otherwise. - */ -U_CAPI UBool U_EXPORT2 -uhash_compareUnicodeString(const UElement key1, const UElement key2); - -/** - * Comparator function for UnicodeString* keys (case insensitive). - * Make sure to use together with uhash_hashCaselessUnicodeString. - * Implements UElementsAreEqual. - * @param key1 The string for comparison - * @param key2 The string for comparison - * @return true if key1 and key2 are equal, return false otherwise. - */ -U_CAPI UBool U_EXPORT2 -uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2); - -#endif /* __UELEMENT_H__ */ diff --git a/deps/node/deps/icu-small/source/common/uenum.cpp b/deps/node/deps/icu-small/source/common/uenum.cpp deleted file mode 100644 index f75cfb7a..00000000 --- a/deps/node/deps/icu-small/source/common/uenum.cpp +++ /dev/null @@ -1,189 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uenum.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2002jul08 -* created by: Vladimir Weinstein -*/ - -#include "unicode/putil.h" -#include "uenumimp.h" -#include "cmemory.h" - -/* Layout of the baseContext buffer. */ -typedef struct { - int32_t len; /* number of bytes available starting at 'data' */ - char data; /* actual data starts here */ -} _UEnumBuffer; - -/* Extra bytes to allocate in the baseContext buffer. */ -static const int32_t PAD = 8; - -/* Return a pointer to the baseContext buffer, possibly allocating - or reallocating it if at least 'capacity' bytes are not available. */ -static void* _getBuffer(UEnumeration* en, int32_t capacity) { - - if (en->baseContext != NULL) { - if (((_UEnumBuffer*) en->baseContext)->len < capacity) { - capacity += PAD; - en->baseContext = uprv_realloc(en->baseContext, - sizeof(int32_t) + capacity); - if (en->baseContext == NULL) { - return NULL; - } - ((_UEnumBuffer*) en->baseContext)->len = capacity; - } - } else { - capacity += PAD; - en->baseContext = uprv_malloc(sizeof(int32_t) + capacity); - if (en->baseContext == NULL) { - return NULL; - } - ((_UEnumBuffer*) en->baseContext)->len = capacity; - } - - return (void*) & ((_UEnumBuffer*) en->baseContext)->data; -} - -U_CAPI void U_EXPORT2 -uenum_close(UEnumeration* en) -{ - if (en) { - if (en->close != NULL) { - if (en->baseContext) { - uprv_free(en->baseContext); - } - en->close(en); - } else { /* this seems dangerous, but we better kill the object */ - uprv_free(en); - } - } -} - -U_CAPI int32_t U_EXPORT2 -uenum_count(UEnumeration* en, UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return -1; - } - if (en->count != NULL) { - return en->count(en, status); - } else { - *status = U_UNSUPPORTED_ERROR; - return -1; - } -} - -/* Don't call this directly. Only uenum_unext should be calling this. */ -U_CAPI const UChar* U_EXPORT2 -uenum_unextDefault(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - UChar *ustr = NULL; - int32_t len = 0; - if (en->next != NULL) { - const char *cstr = en->next(en, &len, status); - if (cstr != NULL) { - ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar)); - if (ustr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - u_charsToUChars(cstr, ustr, len+1); - } - } - } else { - *status = U_UNSUPPORTED_ERROR; - } - if (resultLength) { - *resultLength = len; - } - return ustr; -} - -/* Don't call this directly. Only uenum_next should be calling this. */ -U_CAPI const char* U_EXPORT2 -uenum_nextDefault(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - if (en->uNext != NULL) { - char *tempCharVal; - const UChar *tempUCharVal = en->uNext(en, resultLength, status); - if (tempUCharVal == NULL) { - return NULL; - } - tempCharVal = (char*) - _getBuffer(en, (*resultLength+1) * sizeof(char)); - if (!tempCharVal) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1); - return tempCharVal; - } else { - *status = U_UNSUPPORTED_ERROR; - return NULL; - } -} - -U_CAPI const UChar* U_EXPORT2 -uenum_unext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return NULL; - } - if (en->uNext != NULL) { - return en->uNext(en, resultLength, status); - } else { - *status = U_UNSUPPORTED_ERROR; - return NULL; - } -} - -U_CAPI const char* U_EXPORT2 -uenum_next(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return NULL; - } - if (en->next != NULL) { - if (resultLength != NULL) { - return en->next(en, resultLength, status); - } - else { - int32_t dummyLength=0; - return en->next(en, &dummyLength, status); - } - } else { - *status = U_UNSUPPORTED_ERROR; - return NULL; - } -} - -U_CAPI void U_EXPORT2 -uenum_reset(UEnumeration* en, UErrorCode* status) -{ - if (!en || U_FAILURE(*status)) { - return; - } - if (en->reset != NULL) { - en->reset(en, status); - } else { - *status = U_UNSUPPORTED_ERROR; - } -} diff --git a/deps/node/deps/icu-small/source/common/uenumimp.h b/deps/node/deps/icu-small/source/common/uenumimp.h deleted file mode 100644 index 93854406..00000000 --- a/deps/node/deps/icu-small/source/common/uenumimp.h +++ /dev/null @@ -1,155 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2006, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uenumimp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2002jul08 -* created by: Vladimir Weinstein -*/ - -#ifndef __UENUMIMP_H -#define __UENUMIMP_H - -#include "unicode/uenum.h" - -U_CDECL_BEGIN - -/** - * following are the type declarations for - * implementations of APIs. If any of these - * functions are NULL, U_UNSUPPORTED_ERROR - * is returned. If close is NULL, the enumeration - * object is going to be released. - * Initial error checking is done in the body - * of API function, so the implementations - * need not to check the initial error condition. - */ - -/** - * Function type declaration for uenum_close(). - * - * This function should cleanup the enumerator object - * - * @param en enumeration to be closed - */ -typedef void U_CALLCONV -UEnumClose(UEnumeration *en); - -/** - * Function type declaration for uenum_count(). - * - * This function should count the number of elements - * in this enumeration - * - * @param en enumeration to be counted - * @param status pointer to UErrorCode variable - * @return number of elements in enumeration - */ -typedef int32_t U_CALLCONV -UEnumCount(UEnumeration *en, UErrorCode *status); - -/** - * Function type declaration for uenum_unext(). - * - * This function returns the next element as a UChar *, - * or NULL after all elements haven been enumerated. - * - * @param en enumeration - * @param resultLength pointer to result length - * @param status pointer to UErrorCode variable - * @return next element as UChar *, - * or NULL after all elements haven been enumerated - */ -typedef const UChar* U_CALLCONV -UEnumUNext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -/** - * Function type declaration for uenum_next(). - * - * This function returns the next element as a char *, - * or NULL after all elements haven been enumerated. - * - * @param en enumeration - * @param resultLength pointer to result length - * @param status pointer to UErrorCode variable - * @return next element as char *, - * or NULL after all elements haven been enumerated - */ -typedef const char* U_CALLCONV -UEnumNext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -/** - * Function type declaration for uenum_reset(). - * - * This function should reset the enumeration - * object - * - * @param en enumeration - * @param status pointer to UErrorCode variable - */ -typedef void U_CALLCONV -UEnumReset(UEnumeration* en, - UErrorCode* status); - - -struct UEnumeration { - /* baseContext. For the base class only. Don't touch! */ - void *baseContext; - - /* context. Use it for what you need */ - void *context; - - /** - * these are functions that will - * be used for APIs - */ - /* called from uenum_close */ - UEnumClose *close; - /* called from uenum_count */ - UEnumCount *count; - /* called from uenum_unext */ - UEnumUNext *uNext; - /* called from uenum_next */ - UEnumNext *next; - /* called from uenum_reset */ - UEnumReset *reset; -}; - -U_CDECL_END - -/* This is the default implementation for uenum_unext(). - * It automatically converts the char * string to UChar *. - * Don't call this directly. This is called internally by uenum_unext - * when a UEnumeration is defined with 'uNext' pointing to this - * function. - */ -U_CAPI const UChar* U_EXPORT2 -uenum_unextDefault(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -/* This is the default implementation for uenum_next(). - * It automatically converts the UChar * string to char *. - * Don't call this directly. This is called internally by uenum_next - * when a UEnumeration is defined with 'next' pointing to this - * function. - */ -U_CAPI const char* U_EXPORT2 -uenum_nextDefault(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -#endif diff --git a/deps/node/deps/icu-small/source/common/uhash.cpp b/deps/node/deps/icu-small/source/common/uhash.cpp deleted file mode 100644 index 239997d0..00000000 --- a/deps/node/deps/icu-small/source/common/uhash.cpp +++ /dev/null @@ -1,991 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* Date Name Description -* 03/22/00 aliu Adapted from original C++ ICU Hashtable. -* 07/06/01 aliu Modified to support int32_t keys on -* platforms with sizeof(void*) < 32. -****************************************************************************** -*/ - -#include "uhash.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "cmemory.h" -#include "uassert.h" -#include "ustr_imp.h" - -/* This hashtable is implemented as a double hash. All elements are - * stored in a single array with no secondary storage for collision - * resolution (no linked list, etc.). When there is a hash collision - * (when two unequal keys have the same hashcode) we resolve this by - * using a secondary hash. The secondary hash is an increment - * computed as a hash function (a different one) of the primary - * hashcode. This increment is added to the initial hash value to - * obtain further slots assigned to the same hash code. For this to - * work, the length of the array and the increment must be relatively - * prime. The easiest way to achieve this is to have the length of - * the array be prime, and the increment be any value from - * 1..length-1. - * - * Hashcodes are 32-bit integers. We make sure all hashcodes are - * non-negative by masking off the top bit. This has two effects: (1) - * modulo arithmetic is simplified. If we allowed negative hashcodes, - * then when we computed hashcode % length, we could get a negative - * result, which we would then have to adjust back into range. It's - * simpler to just make hashcodes non-negative. (2) It makes it easy - * to check for empty vs. occupied slots in the table. We just mark - * empty or deleted slots with a negative hashcode. - * - * The central function is _uhash_find(). This function looks for a - * slot matching the given key and hashcode. If one is found, it - * returns a pointer to that slot. If the table is full, and no match - * is found, it returns NULL -- in theory. This would make the code - * more complicated, since all callers of _uhash_find() would then - * have to check for a NULL result. To keep this from happening, we - * don't allow the table to fill. When there is only one - * empty/deleted slot left, uhash_put() will refuse to increase the - * count, and fail. This simplifies the code. In practice, one will - * seldom encounter this using default UHashtables. However, if a - * hashtable is set to a U_FIXED resize policy, or if memory is - * exhausted, then the table may fill. - * - * High and low water ratios control rehashing. They establish levels - * of fullness (from 0 to 1) outside of which the data array is - * reallocated and repopulated. Setting the low water ratio to zero - * means the table will never shrink. Setting the high water ratio to - * one means the table will never grow. The ratios should be - * coordinated with the ratio between successive elements of the - * PRIMES table, so that when the primeIndex is incremented or - * decremented during rehashing, it brings the ratio of count / length - * back into the desired range (between low and high water ratios). - */ - -/******************************************************************** - * PRIVATE Constants, Macros - ********************************************************************/ - -/* This is a list of non-consecutive primes chosen such that - * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81 - * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this - * ratio is changed, the low and high water ratios should also be - * adjusted to suit. - * - * These prime numbers were also chosen so that they are the largest - * prime number while being less than a power of two. - */ -static const int32_t PRIMES[] = { - 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, - 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, - 16777213, 33554393, 67108859, 134217689, 268435399, 536870909, - 1073741789, 2147483647 /*, 4294967291 */ -}; - -#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES) -#define DEFAULT_PRIME_INDEX 4 - -/* These ratios are tuned to the PRIMES array such that a resize - * places the table back into the zone of non-resizing. That is, - * after a call to _uhash_rehash(), a subsequent call to - * _uhash_rehash() should do nothing (should not churn). This is only - * a potential problem with U_GROW_AND_SHRINK. - */ -static const float RESIZE_POLICY_RATIO_TABLE[6] = { - /* low, high water ratio */ - 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */ - 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */ - 0.0F, 1.0F /* U_FIXED: Never change size */ -}; - -/* - Invariants for hashcode values: - - * DELETED < 0 - * EMPTY < 0 - * Real hashes >= 0 - - Hashcodes may not start out this way, but internally they are - adjusted so that they are always positive. We assume 32-bit - hashcodes; adjust these constants for other hashcode sizes. -*/ -#define HASH_DELETED ((int32_t) 0x80000000) -#define HASH_EMPTY ((int32_t) HASH_DELETED + 1) - -#define IS_EMPTY_OR_DELETED(x) ((x) < 0) - -/* This macro expects a UHashTok.pointer as its keypointer and - valuepointer parameters */ -#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \ - if (hash->keyDeleter != NULL && keypointer != NULL) { \ - (*hash->keyDeleter)(keypointer); \ - } \ - if (hash->valueDeleter != NULL && valuepointer != NULL) { \ - (*hash->valueDeleter)(valuepointer); \ - } - -/* - * Constants for hinting whether a key or value is an integer - * or a pointer. If a hint bit is zero, then the associated - * token is assumed to be an integer. - */ -#define HINT_KEY_POINTER (1) -#define HINT_VALUE_POINTER (2) - -/******************************************************************** - * PRIVATE Implementation - ********************************************************************/ - -static UHashTok -_uhash_setElement(UHashtable *hash, UHashElement* e, - int32_t hashcode, - UHashTok key, UHashTok value, int8_t hint) { - - UHashTok oldValue = e->value; - if (hash->keyDeleter != NULL && e->key.pointer != NULL && - e->key.pointer != key.pointer) { /* Avoid double deletion */ - (*hash->keyDeleter)(e->key.pointer); - } - if (hash->valueDeleter != NULL) { - if (oldValue.pointer != NULL && - oldValue.pointer != value.pointer) { /* Avoid double deletion */ - (*hash->valueDeleter)(oldValue.pointer); - } - oldValue.pointer = NULL; - } - /* Compilers should copy the UHashTok union correctly, but even if - * they do, memory heap tools (e.g. BoundsChecker) can get - * confused when a pointer is cloaked in a union and then copied. - * TO ALLEVIATE THIS, we use hints (based on what API the user is - * calling) to copy pointers when we know the user thinks - * something is a pointer. */ - if (hint & HINT_KEY_POINTER) { - e->key.pointer = key.pointer; - } else { - e->key = key; - } - if (hint & HINT_VALUE_POINTER) { - e->value.pointer = value.pointer; - } else { - e->value = value; - } - e->hashcode = hashcode; - return oldValue; -} - -/** - * Assumes that the given element is not empty or deleted. - */ -static UHashTok -_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) { - UHashTok empty; - U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode)); - --hash->count; - empty.pointer = NULL; empty.integer = 0; - return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0); -} - -static void -_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { - U_ASSERT(hash != NULL); - U_ASSERT(((int32_t)policy) >= 0); - U_ASSERT(((int32_t)policy) < 3); - hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2]; - hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1]; -} - -/** - * Allocate internal data array of a size determined by the given - * prime index. If the index is out of range it is pinned into range. - * If the allocation fails the status is set to - * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In - * either case the previous array pointer is overwritten. - * - * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1. - */ -static void -_uhash_allocate(UHashtable *hash, - int32_t primeIndex, - UErrorCode *status) { - - UHashElement *p, *limit; - UHashTok emptytok; - - if (U_FAILURE(*status)) return; - - U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH); - - hash->primeIndex = static_cast(primeIndex); - hash->length = PRIMES[primeIndex]; - - p = hash->elements = (UHashElement*) - uprv_malloc(sizeof(UHashElement) * hash->length); - - if (hash->elements == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - - emptytok.pointer = NULL; /* Only one of these two is needed */ - emptytok.integer = 0; /* but we don't know which one. */ - - limit = p + hash->length; - while (p < limit) { - p->key = emptytok; - p->value = emptytok; - p->hashcode = HASH_EMPTY; - ++p; - } - - hash->count = 0; - hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); - hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); -} - -static UHashtable* -_uhash_init(UHashtable *result, - UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t primeIndex, - UErrorCode *status) -{ - if (U_FAILURE(*status)) return NULL; - U_ASSERT(keyHash != NULL); - U_ASSERT(keyComp != NULL); - - result->keyHasher = keyHash; - result->keyComparator = keyComp; - result->valueComparator = valueComp; - result->keyDeleter = NULL; - result->valueDeleter = NULL; - result->allocated = FALSE; - _uhash_internalSetResizePolicy(result, U_GROW); - - _uhash_allocate(result, primeIndex, status); - - if (U_FAILURE(*status)) { - return NULL; - } - - return result; -} - -static UHashtable* -_uhash_create(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t primeIndex, - UErrorCode *status) { - UHashtable *result; - - if (U_FAILURE(*status)) return NULL; - - result = (UHashtable*) uprv_malloc(sizeof(UHashtable)); - if (result == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status); - result->allocated = TRUE; - - if (U_FAILURE(*status)) { - uprv_free(result); - return NULL; - } - - return result; -} - -/** - * Look for a key in the table, or if no such key exists, the first - * empty slot matching the given hashcode. Keys are compared using - * the keyComparator function. - * - * First find the start position, which is the hashcode modulo - * the length. Test it to see if it is: - * - * a. identical: First check the hash values for a quick check, - * then compare keys for equality using keyComparator. - * b. deleted - * c. empty - * - * Stop if it is identical or empty, otherwise continue by adding a - * "jump" value (moduloing by the length again to keep it within - * range) and retesting. For efficiency, there need enough empty - * values so that the searchs stop within a reasonable amount of time. - * This can be changed by changing the high/low water marks. - * - * In theory, this function can return NULL, if it is full (no empty - * or deleted slots) and if no matching key is found. In practice, we - * prevent this elsewhere (in uhash_put) by making sure the last slot - * in the table is never filled. - * - * The size of the table should be prime for this algorithm to work; - * otherwise we are not guaranteed that the jump value (the secondary - * hash) is relatively prime to the table length. - */ -static UHashElement* -_uhash_find(const UHashtable *hash, UHashTok key, - int32_t hashcode) { - - int32_t firstDeleted = -1; /* assume invalid index */ - int32_t theIndex, startIndex; - int32_t jump = 0; /* lazy evaluate */ - int32_t tableHash; - UHashElement *elements = hash->elements; - - hashcode &= 0x7FFFFFFF; /* must be positive */ - startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length; - - do { - tableHash = elements[theIndex].hashcode; - if (tableHash == hashcode) { /* quick check */ - if ((*hash->keyComparator)(key, elements[theIndex].key)) { - return &(elements[theIndex]); - } - } else if (!IS_EMPTY_OR_DELETED(tableHash)) { - /* We have hit a slot which contains a key-value pair, - * but for which the hash code does not match. Keep - * looking. - */ - } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */ - break; - } else if (firstDeleted < 0) { /* remember first deleted */ - firstDeleted = theIndex; - } - if (jump == 0) { /* lazy compute jump */ - /* The jump value must be relatively prime to the table - * length. As long as the length is prime, then any value - * 1..length-1 will be relatively prime to it. - */ - jump = (hashcode % (hash->length - 1)) + 1; - } - theIndex = (theIndex + jump) % hash->length; - } while (theIndex != startIndex); - - if (firstDeleted >= 0) { - theIndex = firstDeleted; /* reset if had deleted slot */ - } else if (tableHash != HASH_EMPTY) { - /* We get to this point if the hashtable is full (no empty or - * deleted slots), and we've failed to find a match. THIS - * WILL NEVER HAPPEN as long as uhash_put() makes sure that - * count is always < length. - */ - U_ASSERT(FALSE); - return NULL; /* Never happens if uhash_put() behaves */ - } - return &(elements[theIndex]); -} - -/** - * Attempt to grow or shrink the data arrays in order to make the - * count fit between the high and low water marks. hash_put() and - * hash_remove() call this method when the count exceeds the high or - * low water marks. This method may do nothing, if memory allocation - * fails, or if the count is already in range, or if the length is - * already at the low or high limit. In any case, upon return the - * arrays will be valid. - */ -static void -_uhash_rehash(UHashtable *hash, UErrorCode *status) { - - UHashElement *old = hash->elements; - int32_t oldLength = hash->length; - int32_t newPrimeIndex = hash->primeIndex; - int32_t i; - - if (hash->count > hash->highWaterMark) { - if (++newPrimeIndex >= PRIMES_LENGTH) { - return; - } - } else if (hash->count < hash->lowWaterMark) { - if (--newPrimeIndex < 0) { - return; - } - } else { - return; - } - - _uhash_allocate(hash, newPrimeIndex, status); - - if (U_FAILURE(*status)) { - hash->elements = old; - hash->length = oldLength; - return; - } - - for (i = oldLength - 1; i >= 0; --i) { - if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) { - UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode); - U_ASSERT(e != NULL); - U_ASSERT(e->hashcode == HASH_EMPTY); - e->key = old[i].key; - e->value = old[i].value; - e->hashcode = old[i].hashcode; - ++hash->count; - } - } - - uprv_free(old); -} - -static UHashTok -_uhash_remove(UHashtable *hash, - UHashTok key) { - /* First find the position of the key in the table. If the object - * has not been removed already, remove it. If the user wanted - * keys deleted, then delete it also. We have to put a special - * hashcode in that position that means that something has been - * deleted, since when we do a find, we have to continue PAST any - * deleted values. - */ - UHashTok result; - UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key)); - U_ASSERT(e != NULL); - result.pointer = NULL; - result.integer = 0; - if (!IS_EMPTY_OR_DELETED(e->hashcode)) { - result = _uhash_internalRemoveElement(hash, e); - if (hash->count < hash->lowWaterMark) { - UErrorCode status = U_ZERO_ERROR; - _uhash_rehash(hash, &status); - } - } - return result; -} - -static UHashTok -_uhash_put(UHashtable *hash, - UHashTok key, - UHashTok value, - int8_t hint, - UErrorCode *status) { - - /* Put finds the position in the table for the new value. If the - * key is already in the table, it is deleted, if there is a - * non-NULL keyDeleter. Then the key, the hash and the value are - * all put at the position in their respective arrays. - */ - int32_t hashcode; - UHashElement* e; - UHashTok emptytok; - - if (U_FAILURE(*status)) { - goto err; - } - U_ASSERT(hash != NULL); - /* Cannot always check pointer here or iSeries sees NULL every time. */ - if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) { - /* Disallow storage of NULL values, since NULL is returned by - * get() to indicate an absent key. Storing NULL == removing. - */ - return _uhash_remove(hash, key); - } - if (hash->count > hash->highWaterMark) { - _uhash_rehash(hash, status); - if (U_FAILURE(*status)) { - goto err; - } - } - - hashcode = (*hash->keyHasher)(key); - e = _uhash_find(hash, key, hashcode); - U_ASSERT(e != NULL); - - if (IS_EMPTY_OR_DELETED(e->hashcode)) { - /* Important: We must never actually fill the table up. If we - * do so, then _uhash_find() will return NULL, and we'll have - * to check for NULL after every call to _uhash_find(). To - * avoid this we make sure there is always at least one empty - * or deleted slot in the table. This only is a problem if we - * are out of memory and rehash isn't working. - */ - ++hash->count; - if (hash->count == hash->length) { - /* Don't allow count to reach length */ - --hash->count; - *status = U_MEMORY_ALLOCATION_ERROR; - goto err; - } - } - - /* We must in all cases handle storage properly. If there was an - * old key, then it must be deleted (if the deleter != NULL). - * Make hashcodes stored in table positive. - */ - return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint); - - err: - /* If the deleters are non-NULL, this method adopts its key and/or - * value arguments, and we must be sure to delete the key and/or - * value in all cases, even upon failure. - */ - HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer); - emptytok.pointer = NULL; emptytok.integer = 0; - return emptytok; -} - - -/******************************************************************** - * PUBLIC API - ********************************************************************/ - -U_CAPI UHashtable* U_EXPORT2 -uhash_open(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - UErrorCode *status) { - - return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); -} - -U_CAPI UHashtable* U_EXPORT2 -uhash_openSize(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t size, - UErrorCode *status) { - - /* Find the smallest index i for which PRIMES[i] >= size. */ - int32_t i = 0; - while (i<(PRIMES_LENGTH-1) && PRIMES[i]= size. - int32_t i = 0; - while (i<(PRIMES_LENGTH-1) && PRIMES[i]elements != NULL) { - if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) { - int32_t pos=UHASH_FIRST; - UHashElement *e; - while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) { - HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer); - } - } - uprv_free(hash->elements); - hash->elements = NULL; - } - if (hash->allocated) { - uprv_free(hash); - } -} - -U_CAPI UHashFunction *U_EXPORT2 -uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) { - UHashFunction *result = hash->keyHasher; - hash->keyHasher = fn; - return result; -} - -U_CAPI UKeyComparator *U_EXPORT2 -uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) { - UKeyComparator *result = hash->keyComparator; - hash->keyComparator = fn; - return result; -} -U_CAPI UValueComparator *U_EXPORT2 -uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ - UValueComparator *result = hash->valueComparator; - hash->valueComparator = fn; - return result; -} - -U_CAPI UObjectDeleter *U_EXPORT2 -uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) { - UObjectDeleter *result = hash->keyDeleter; - hash->keyDeleter = fn; - return result; -} - -U_CAPI UObjectDeleter *U_EXPORT2 -uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) { - UObjectDeleter *result = hash->valueDeleter; - hash->valueDeleter = fn; - return result; -} - -U_CAPI void U_EXPORT2 -uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { - UErrorCode status = U_ZERO_ERROR; - _uhash_internalSetResizePolicy(hash, policy); - hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); - hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); - _uhash_rehash(hash, &status); -} - -U_CAPI int32_t U_EXPORT2 -uhash_count(const UHashtable *hash) { - return hash->count; -} - -U_CAPI void* U_EXPORT2 -uhash_get(const UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; -} - -U_CAPI void* U_EXPORT2 -uhash_iget(const UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_geti(const UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_igeti(const UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; -} - -U_CAPI void* U_EXPORT2 -uhash_put(UHashtable *hash, - void* key, - void* value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.pointer = key; - valueholder.pointer = value; - return _uhash_put(hash, keyholder, valueholder, - HINT_KEY_POINTER | HINT_VALUE_POINTER, - status).pointer; -} - -U_CAPI void* U_EXPORT2 -uhash_iput(UHashtable *hash, - int32_t key, - void* value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.integer = key; - valueholder.pointer = value; - return _uhash_put(hash, keyholder, valueholder, - HINT_VALUE_POINTER, - status).pointer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_puti(UHashtable *hash, - void* key, - int32_t value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.pointer = key; - valueholder.integer = value; - return _uhash_put(hash, keyholder, valueholder, - HINT_KEY_POINTER, - status).integer; -} - - -U_CAPI int32_t U_EXPORT2 -uhash_iputi(UHashtable *hash, - int32_t key, - int32_t value, - UErrorCode *status) { - UHashTok keyholder, valueholder; - keyholder.integer = key; - valueholder.integer = value; - return _uhash_put(hash, keyholder, valueholder, - 0, /* neither is a ptr */ - status).integer; -} - -U_CAPI void* U_EXPORT2 -uhash_remove(UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_remove(hash, keyholder).pointer; -} - -U_CAPI void* U_EXPORT2 -uhash_iremove(UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_remove(hash, keyholder).pointer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_removei(UHashtable *hash, - const void* key) { - UHashTok keyholder; - keyholder.pointer = (void*) key; - return _uhash_remove(hash, keyholder).integer; -} - -U_CAPI int32_t U_EXPORT2 -uhash_iremovei(UHashtable *hash, - int32_t key) { - UHashTok keyholder; - keyholder.integer = key; - return _uhash_remove(hash, keyholder).integer; -} - -U_CAPI void U_EXPORT2 -uhash_removeAll(UHashtable *hash) { - int32_t pos = UHASH_FIRST; - const UHashElement *e; - U_ASSERT(hash != NULL); - if (hash->count != 0) { - while ((e = uhash_nextElement(hash, &pos)) != NULL) { - uhash_removeElement(hash, e); - } - } - U_ASSERT(hash->count == 0); -} - -U_CAPI const UHashElement* U_EXPORT2 -uhash_find(const UHashtable *hash, const void* key) { - UHashTok keyholder; - const UHashElement *e; - keyholder.pointer = (void*) key; - e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); - return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e; -} - -U_CAPI const UHashElement* U_EXPORT2 -uhash_nextElement(const UHashtable *hash, int32_t *pos) { - /* Walk through the array until we find an element that is not - * EMPTY and not DELETED. - */ - int32_t i; - U_ASSERT(hash != NULL); - for (i = *pos + 1; i < hash->length; ++i) { - if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) { - *pos = i; - return &(hash->elements[i]); - } - } - - /* No more elements */ - return NULL; -} - -U_CAPI void* U_EXPORT2 -uhash_removeElement(UHashtable *hash, const UHashElement* e) { - U_ASSERT(hash != NULL); - U_ASSERT(e != NULL); - if (!IS_EMPTY_OR_DELETED(e->hashcode)) { - UHashElement *nce = (UHashElement *)e; - return _uhash_internalRemoveElement(hash, nce).pointer; - } - return NULL; -} - -/******************************************************************** - * UHashTok convenience - ********************************************************************/ - -/** - * Return a UHashTok for an integer. - */ -/*U_CAPI UHashTok U_EXPORT2 -uhash_toki(int32_t i) { - UHashTok tok; - tok.integer = i; - return tok; -}*/ - -/** - * Return a UHashTok for a pointer. - */ -/*U_CAPI UHashTok U_EXPORT2 -uhash_tokp(void* p) { - UHashTok tok; - tok.pointer = p; - return tok; -}*/ - -/******************************************************************** - * PUBLIC Key Hash Functions - ********************************************************************/ - -U_CAPI int32_t U_EXPORT2 -uhash_hashUChars(const UHashTok key) { - const UChar *s = (const UChar *)key.pointer; - return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s)); -} - -U_CAPI int32_t U_EXPORT2 -uhash_hashChars(const UHashTok key) { - const char *s = (const char *)key.pointer; - return s == NULL ? 0 : static_cast(ustr_hashCharsN(s, static_cast(uprv_strlen(s)))); -} - -U_CAPI int32_t U_EXPORT2 -uhash_hashIChars(const UHashTok key) { - const char *s = (const char *)key.pointer; - return s == NULL ? 0 : ustr_hashICharsN(s, static_cast(uprv_strlen(s))); -} - -U_CAPI UBool U_EXPORT2 -uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ - int32_t count1, count2, pos, i; - - if(hash1==hash2){ - return TRUE; - } - - /* - * Make sure that we are comparing 2 valid hashes of the same type - * with valid comparison functions. - * Without valid comparison functions, a binary comparison - * of the hash values will yield random results on machines - * with 64-bit pointers and 32-bit integer hashes. - * A valueComparator is normally optional. - */ - if (hash1==NULL || hash2==NULL || - hash1->keyComparator != hash2->keyComparator || - hash1->valueComparator != hash2->valueComparator || - hash1->valueComparator == NULL) - { - /* - Normally we would return an error here about incompatible hash tables, - but we return FALSE instead. - */ - return FALSE; - } - - count1 = uhash_count(hash1); - count2 = uhash_count(hash2); - if(count1!=count2){ - return FALSE; - } - - pos=UHASH_FIRST; - for(i=0; ikey; - const UHashTok val1 = elem1->value; - /* here the keys are not compared, instead the key form hash1 is used to fetch - * value from hash2. If the hashes are equal then then both hashes should - * contain equal values for the same key! - */ - const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1)); - const UHashTok val2 = elem2->value; - if(hash1->valueComparator(val1, val2)==FALSE){ - return FALSE; - } - } - return TRUE; -} - -/******************************************************************** - * PUBLIC Comparator Functions - ********************************************************************/ - -U_CAPI UBool U_EXPORT2 -uhash_compareUChars(const UHashTok key1, const UHashTok key2) { - const UChar *p1 = (const UChar*) key1.pointer; - const UChar *p2 = (const UChar*) key2.pointer; - if (p1 == p2) { - return TRUE; - } - if (p1 == NULL || p2 == NULL) { - return FALSE; - } - while (*p1 != 0 && *p1 == *p2) { - ++p1; - ++p2; - } - return (UBool)(*p1 == *p2); -} - -U_CAPI UBool U_EXPORT2 -uhash_compareChars(const UHashTok key1, const UHashTok key2) { - const char *p1 = (const char*) key1.pointer; - const char *p2 = (const char*) key2.pointer; - if (p1 == p2) { - return TRUE; - } - if (p1 == NULL || p2 == NULL) { - return FALSE; - } - while (*p1 != 0 && *p1 == *p2) { - ++p1; - ++p2; - } - return (UBool)(*p1 == *p2); -} - -U_CAPI UBool U_EXPORT2 -uhash_compareIChars(const UHashTok key1, const UHashTok key2) { - const char *p1 = (const char*) key1.pointer; - const char *p2 = (const char*) key2.pointer; - if (p1 == p2) { - return TRUE; - } - if (p1 == NULL || p2 == NULL) { - return FALSE; - } - while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) { - ++p1; - ++p2; - } - return (UBool)(*p1 == *p2); -} - -/******************************************************************** - * PUBLIC int32_t Support Functions - ********************************************************************/ - -U_CAPI int32_t U_EXPORT2 -uhash_hashLong(const UHashTok key) { - return key.integer; -} - -U_CAPI UBool U_EXPORT2 -uhash_compareLong(const UHashTok key1, const UHashTok key2) { - return (UBool)(key1.integer == key2.integer); -} diff --git a/deps/node/deps/icu-small/source/common/uhash.h b/deps/node/deps/icu-small/source/common/uhash.h deleted file mode 100644 index b59d2711..00000000 --- a/deps/node/deps/icu-small/source/common/uhash.h +++ /dev/null @@ -1,718 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1997-2015, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* Date Name Description -* 03/22/00 aliu Adapted from original C++ ICU Hashtable. -* 07/06/01 aliu Modified to support int32_t keys on -* platforms with sizeof(void*) < 32. -****************************************************************************** -*/ - -#ifndef UHASH_H -#define UHASH_H - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "uelement.h" -#include "unicode/localpointer.h" - -/** - * UHashtable stores key-value pairs and does moderately fast lookup - * based on keys. It provides a good tradeoff between access time and - * storage space. As elements are added to it, it grows to accomodate - * them. By default, the table never shrinks, even if all elements - * are removed from it. - * - * Keys and values are stored as void* pointers. These void* pointers - * may be actual pointers to strings, objects, or any other structure - * in memory, or they may simply be integral values cast to void*. - * UHashtable doesn't care and manipulates them via user-supplied - * functions. These functions hash keys, compare keys, delete keys, - * and delete values. Some function pointers are optional (may be - * NULL); others must be supplied. Several prebuilt functions exist - * to handle common key types. - * - * UHashtable ownership of keys and values is flexible, and controlled - * by whether or not the key deleter and value deleter functions are - * set. If a void* key is actually a pointer to a deletable object, - * then UHashtable can be made to delete that object by setting the - * key deleter function pointer to a non-NULL value. If this is done, - * then keys passed to uhash_put() are owned by the hashtable and will - * be deleted by it at some point, either as keys are replaced, or - * when uhash_close() is finally called. The same is true of values - * and the value deleter function pointer. Keys passed to methods - * other than uhash_put() are never owned by the hashtable. - * - * NULL values are not allowed. uhash_get() returns NULL to indicate - * a key that is not in the table, and having a NULL value in the - * table would generate an ambiguous result. If a key and a NULL - * value is passed to uhash_put(), this has the effect of doing a - * uhash_remove() on that key. This keeps uhash_get(), uhash_count(), - * and uhash_nextElement() consistent with one another. - * - * To see everything in a hashtable, use uhash_nextElement() to - * iterate through its contents. Each call to this function returns a - * UHashElement pointer. A hash element contains a key, value, and - * hashcode. During iteration an element may be deleted by calling - * uhash_removeElement(); iteration may safely continue thereafter. - * The uhash_remove() function may also be safely called in - * mid-iteration. If uhash_put() is called during iteration, - * the iteration is still guaranteed to terminate reasonably, but - * there is no guarantee that every element will be returned or that - * some won't be returned more than once. - * - * Under no circumstances should the UHashElement returned by - * uhash_nextElement be modified directly. - * - * By default, the hashtable grows when necessary, but never shrinks, - * even if all items are removed. For most applications this is - * optimal. However, in a highly dynamic usage where memory is at a - * premium, the table can be set to both grow and shrink by calling - * uhash_setResizePolicy() with the policy U_GROW_AND_SHRINK. In a - * situation where memory is critical and the client wants a table - * that does not grow at all, the constant U_FIXED can be used. - */ - -/******************************************************************** - * Data Structures - ********************************************************************/ - -U_CDECL_BEGIN - -/** - * A key or value within a UHashtable. - * The hashing and comparison functions take a pointer to a - * UHashTok, but the deleter receives the void* pointer within it. - */ -typedef UElement UHashTok; - -/** - * This is a single hash element. - */ -struct UHashElement { - /* Reorder these elements to pack nicely if necessary */ - int32_t hashcode; - UHashTok value; - UHashTok key; -}; -typedef struct UHashElement UHashElement; - -/** - * A hashing function. - * @param key A key stored in a hashtable - * @return A NON-NEGATIVE hash code for parm. - */ -typedef int32_t U_CALLCONV UHashFunction(const UHashTok key); - -/** - * A key equality (boolean) comparison function. - */ -typedef UElementsAreEqual UKeyComparator; - -/** - * A value equality (boolean) comparison function. - */ -typedef UElementsAreEqual UValueComparator; - -/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */ - -/** - * This specifies whether or not, and how, the hastable resizes itself. - * See uhash_setResizePolicy(). - */ -enum UHashResizePolicy { - U_GROW, /* Grow on demand, do not shrink */ - U_GROW_AND_SHRINK, /* Grow and shrink on demand */ - U_FIXED /* Never change size */ -}; - -/** - * The UHashtable struct. Clients should treat this as an opaque data - * type and manipulate it only through the uhash_... API. - */ -struct UHashtable { - - /* Main key-value pair storage array */ - - UHashElement *elements; - - /* Function pointers */ - - UHashFunction *keyHasher; /* Computes hash from key. - * Never null. */ - UKeyComparator *keyComparator; /* Compares keys for equality. - * Never null. */ - UValueComparator *valueComparator; /* Compares the values for equality */ - - UObjectDeleter *keyDeleter; /* Deletes keys when required. - * If NULL won't do anything */ - UObjectDeleter *valueDeleter; /* Deletes values when required. - * If NULL won't do anything */ - - /* Size parameters */ - - int32_t count; /* The number of key-value pairs in this table. - * 0 <= count <= length. In practice we - * never let count == length (see code). */ - int32_t length; /* The physical size of the arrays hashes, keys - * and values. Must be prime. */ - - /* Rehashing thresholds */ - - int32_t highWaterMark; /* If count > highWaterMark, rehash */ - int32_t lowWaterMark; /* If count < lowWaterMark, rehash */ - float highWaterRatio; /* 0..1; high water as a fraction of length */ - float lowWaterRatio; /* 0..1; low water as a fraction of length */ - - int8_t primeIndex; /* Index into our prime table for length. - * length == PRIMES[primeIndex] */ - UBool allocated; /* Was this UHashtable allocated? */ -}; -typedef struct UHashtable UHashtable; - -U_CDECL_END - -/******************************************************************** - * API - ********************************************************************/ - -/** - * Initialize a new UHashtable. - * @param keyHash A pointer to the key hashing function. Must not be - * NULL. - * @param keyComp A pointer to the function that compares keys. Must - * not be NULL. - * @param status A pointer to an UErrorCode to receive any errors. - * @return A pointer to a UHashtable, or 0 if an error occurred. - * @see uhash_openSize - */ -U_CAPI UHashtable* U_EXPORT2 -uhash_open(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - UErrorCode *status); - -/** - * Initialize a new UHashtable with a given initial size. - * @param keyHash A pointer to the key hashing function. Must not be - * NULL. - * @param keyComp A pointer to the function that compares keys. Must - * not be NULL. - * @param size The initial capacity of this hash table. - * @param status A pointer to an UErrorCode to receive any errors. - * @return A pointer to a UHashtable, or 0 if an error occurred. - * @see uhash_open - */ -U_CAPI UHashtable* U_EXPORT2 -uhash_openSize(UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t size, - UErrorCode *status); - -/** - * Initialize an existing UHashtable. - * @param keyHash A pointer to the key hashing function. Must not be - * NULL. - * @param keyComp A pointer to the function that compares keys. Must - * not be NULL. - * @param status A pointer to an UErrorCode to receive any errors. - * @return A pointer to a UHashtable, or 0 if an error occurred. - * @see uhash_openSize - */ -U_CAPI UHashtable* U_EXPORT2 -uhash_init(UHashtable *hash, - UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - UErrorCode *status); - -/** - * Initialize an existing UHashtable. - * @param keyHash A pointer to the key hashing function. Must not be - * NULL. - * @param keyComp A pointer to the function that compares keys. Must - * not be NULL. - * @param size The initial capacity of this hash table. - * @param status A pointer to an UErrorCode to receive any errors. - * @return A pointer to a UHashtable, or 0 if an error occurred. - * @see uhash_openSize - */ -U_CAPI UHashtable* U_EXPORT2 -uhash_initSize(UHashtable *hash, - UHashFunction *keyHash, - UKeyComparator *keyComp, - UValueComparator *valueComp, - int32_t size, - UErrorCode *status); - -/** - * Close a UHashtable, releasing the memory used. - * @param hash The UHashtable to close. If hash is NULL no operation is performed. - */ -U_CAPI void U_EXPORT2 -uhash_close(UHashtable *hash); - - - -/** - * Set the function used to hash keys. - * @param hash The UHashtable to set - * @param fn the function to be used hash keys; must not be NULL - * @return the previous key hasher; non-NULL - */ -U_CAPI UHashFunction *U_EXPORT2 -uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn); - -/** - * Set the function used to compare keys. The default comparison is a - * void* pointer comparison. - * @param hash The UHashtable to set - * @param fn the function to be used compare keys; must not be NULL - * @return the previous key comparator; non-NULL - */ -U_CAPI UKeyComparator *U_EXPORT2 -uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn); - -/** - * Set the function used to compare values. The default comparison is a - * void* pointer comparison. - * @param hash The UHashtable to set - * @param fn the function to be used compare keys; must not be NULL - * @return the previous key comparator; non-NULL - */ -U_CAPI UValueComparator *U_EXPORT2 -uhash_setValueComparator(UHashtable *hash, UValueComparator *fn); - -/** - * Set the function used to delete keys. If this function pointer is - * NULL, this hashtable does not delete keys. If it is non-NULL, this - * hashtable does delete keys. This function should be set once - * before any elements are added to the hashtable and should not be - * changed thereafter. - * @param hash The UHashtable to set - * @param fn the function to be used delete keys, or NULL - * @return the previous key deleter; may be NULL - */ -U_CAPI UObjectDeleter *U_EXPORT2 -uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn); - -/** - * Set the function used to delete values. If this function pointer - * is NULL, this hashtable does not delete values. If it is non-NULL, - * this hashtable does delete values. This function should be set - * once before any elements are added to the hashtable and should not - * be changed thereafter. - * @param hash The UHashtable to set - * @param fn the function to be used delete values, or NULL - * @return the previous value deleter; may be NULL - */ -U_CAPI UObjectDeleter *U_EXPORT2 -uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn); - -/** - * Specify whether or not, and how, the hastable resizes itself. - * By default, tables grow but do not shrink (policy U_GROW). - * See enum UHashResizePolicy. - * @param hash The UHashtable to set - * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED} - */ -U_CAPI void U_EXPORT2 -uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy); - -/** - * Get the number of key-value pairs stored in a UHashtable. - * @param hash The UHashtable to query. - * @return The number of key-value pairs stored in hash. - */ -U_CAPI int32_t U_EXPORT2 -uhash_count(const UHashtable *hash); - -/** - * Put a (key=pointer, value=pointer) item in a UHashtable. If the - * keyDeleter is non-NULL, then the hashtable owns 'key' after this - * call. If the valueDeleter is non-NULL, then the hashtable owns - * 'value' after this call. Storing a NULL value is the same as - * calling uhash_remove(). - * @param hash The target UHashtable. - * @param key The key to store. - * @param value The value to store, may be NULL (see above). - * @param status A pointer to an UErrorCode to receive any errors. - * @return The previous value, or NULL if none. - * @see uhash_get - */ -U_CAPI void* U_EXPORT2 -uhash_put(UHashtable *hash, - void *key, - void *value, - UErrorCode *status); - -/** - * Put a (key=integer, value=pointer) item in a UHashtable. - * keyDeleter must be NULL. If the valueDeleter is non-NULL, then the - * hashtable owns 'value' after this call. Storing a NULL value is - * the same as calling uhash_remove(). - * @param hash The target UHashtable. - * @param key The integer key to store. - * @param value The value to store, may be NULL (see above). - * @param status A pointer to an UErrorCode to receive any errors. - * @return The previous value, or NULL if none. - * @see uhash_get - */ -U_CAPI void* U_EXPORT2 -uhash_iput(UHashtable *hash, - int32_t key, - void* value, - UErrorCode *status); - -/** - * Put a (key=pointer, value=integer) item in a UHashtable. If the - * keyDeleter is non-NULL, then the hashtable owns 'key' after this - * call. valueDeleter must be NULL. Storing a 0 value is the same as - * calling uhash_remove(). - * @param hash The target UHashtable. - * @param key The key to store. - * @param value The integer value to store. - * @param status A pointer to an UErrorCode to receive any errors. - * @return The previous value, or 0 if none. - * @see uhash_get - */ -U_CAPI int32_t U_EXPORT2 -uhash_puti(UHashtable *hash, - void* key, - int32_t value, - UErrorCode *status); - -/** - * Put a (key=integer, value=integer) item in a UHashtable. If the - * keyDeleter is non-NULL, then the hashtable owns 'key' after this - * call. valueDeleter must be NULL. Storing a 0 value is the same as - * calling uhash_remove(). - * @param hash The target UHashtable. - * @param key The key to store. - * @param value The integer value to store. - * @param status A pointer to an UErrorCode to receive any errors. - * @return The previous value, or 0 if none. - * @see uhash_get - */ -U_CAPI int32_t U_EXPORT2 -uhash_iputi(UHashtable *hash, - int32_t key, - int32_t value, - UErrorCode *status); - -/** - * Retrieve a pointer value from a UHashtable using a pointer key, - * as previously stored by uhash_put(). - * @param hash The target UHashtable. - * @param key A pointer key stored in a hashtable - * @return The requested item, or NULL if not found. - */ -U_CAPI void* U_EXPORT2 -uhash_get(const UHashtable *hash, - const void *key); - -/** - * Retrieve a pointer value from a UHashtable using a integer key, - * as previously stored by uhash_iput(). - * @param hash The target UHashtable. - * @param key An integer key stored in a hashtable - * @return The requested item, or NULL if not found. - */ -U_CAPI void* U_EXPORT2 -uhash_iget(const UHashtable *hash, - int32_t key); - -/** - * Retrieve an integer value from a UHashtable using a pointer key, - * as previously stored by uhash_puti(). - * @param hash The target UHashtable. - * @param key A pointer key stored in a hashtable - * @return The requested item, or 0 if not found. - */ -U_CAPI int32_t U_EXPORT2 -uhash_geti(const UHashtable *hash, - const void* key); -/** - * Retrieve an integer value from a UHashtable using an integer key, - * as previously stored by uhash_iputi(). - * @param hash The target UHashtable. - * @param key An integer key stored in a hashtable - * @return The requested item, or 0 if not found. - */ -U_CAPI int32_t U_EXPORT2 -uhash_igeti(const UHashtable *hash, - int32_t key); - -/** - * Remove an item from a UHashtable stored by uhash_put(). - * @param hash The target UHashtable. - * @param key A key stored in a hashtable - * @return The item removed, or NULL if not found. - */ -U_CAPI void* U_EXPORT2 -uhash_remove(UHashtable *hash, - const void *key); - -/** - * Remove an item from a UHashtable stored by uhash_iput(). - * @param hash The target UHashtable. - * @param key An integer key stored in a hashtable - * @return The item removed, or NULL if not found. - */ -U_CAPI void* U_EXPORT2 -uhash_iremove(UHashtable *hash, - int32_t key); - -/** - * Remove an item from a UHashtable stored by uhash_puti(). - * @param hash The target UHashtable. - * @param key An key stored in a hashtable - * @return The item removed, or 0 if not found. - */ -U_CAPI int32_t U_EXPORT2 -uhash_removei(UHashtable *hash, - const void* key); - -/** - * Remove an item from a UHashtable stored by uhash_iputi(). - * @param hash The target UHashtable. - * @param key An integer key stored in a hashtable - * @return The item removed, or 0 if not found. - */ -U_CAPI int32_t U_EXPORT2 -uhash_iremovei(UHashtable *hash, - int32_t key); - -/** - * Remove all items from a UHashtable. - * @param hash The target UHashtable. - */ -U_CAPI void U_EXPORT2 -uhash_removeAll(UHashtable *hash); - -/** - * Locate an element of a UHashtable. The caller must not modify the - * returned object. The primary use of this function is to obtain the - * stored key when it may not be identical to the search key. For - * example, if the compare function is a case-insensitive string - * compare, then the hash key may be desired in order to obtain the - * canonical case corresponding to a search key. - * @param hash The target UHashtable. - * @param key A key stored in a hashtable - * @return a hash element, or NULL if the key is not found. - */ -U_CAPI const UHashElement* U_EXPORT2 -uhash_find(const UHashtable *hash, const void* key); - -/** - * \def UHASH_FIRST - * Constant for use with uhash_nextElement - * @see uhash_nextElement - */ -#define UHASH_FIRST (-1) - -/** - * Iterate through the elements of a UHashtable. The caller must not - * modify the returned object. However, uhash_removeElement() may be - * called during iteration to remove an element from the table. - * Iteration may safely be resumed afterwards. If uhash_put() is - * called during iteration the iteration will then be out of sync and - * should be restarted. - * @param hash The target UHashtable. - * @param pos This should be set to UHASH_FIRST initially, and left untouched - * thereafter. - * @return a hash element, or NULL if no further key-value pairs - * exist in the table. - */ -U_CAPI const UHashElement* U_EXPORT2 -uhash_nextElement(const UHashtable *hash, - int32_t *pos); - -/** - * Remove an element, returned by uhash_nextElement(), from the table. - * Iteration may be safely continued afterwards. - * @param hash The hashtable - * @param e The element, returned by uhash_nextElement(), to remove. - * Must not be NULL. Must not be an empty or deleted element (as long - * as this was returned by uhash_nextElement() it will not be empty or - * deleted). Note: Although this parameter is const, it will be - * modified. - * @return the value that was removed. - */ -U_CAPI void* U_EXPORT2 -uhash_removeElement(UHashtable *hash, const UHashElement* e); - -/******************************************************************** - * UHashTok convenience - ********************************************************************/ - -/** - * Return a UHashTok for an integer. - * @param i The given integer - * @return a UHashTok for an integer. - */ -/*U_CAPI UHashTok U_EXPORT2 -uhash_toki(int32_t i);*/ - -/** - * Return a UHashTok for a pointer. - * @param p The given pointer - * @return a UHashTok for a pointer. - */ -/*U_CAPI UHashTok U_EXPORT2 -uhash_tokp(void* p);*/ - -/******************************************************************** - * UChar* and char* Support Functions - ********************************************************************/ - -/** - * Generate a hash code for a null-terminated UChar* string. If the - * string is not null-terminated do not use this function. Use - * together with uhash_compareUChars. - * @param key The string (const UChar*) to hash. - * @return A hash code for the key. - */ -U_CAPI int32_t U_EXPORT2 -uhash_hashUChars(const UHashTok key); - -/** - * Generate a hash code for a null-terminated char* string. If the - * string is not null-terminated do not use this function. Use - * together with uhash_compareChars. - * @param key The string (const char*) to hash. - * @return A hash code for the key. - */ -U_CAPI int32_t U_EXPORT2 -uhash_hashChars(const UHashTok key); - -/** - * Generate a case-insensitive hash code for a null-terminated char* - * string. If the string is not null-terminated do not use this - * function. Use together with uhash_compareIChars. - * @param key The string (const char*) to hash. - * @return A hash code for the key. - */ -U_CAPI int32_t U_EXPORT2 -uhash_hashIChars(const UHashTok key); - -/** - * Comparator for null-terminated UChar* strings. Use together with - * uhash_hashUChars. - * @param key1 The string for comparison - * @param key2 The string for comparison - * @return true if key1 and key2 are equal, return false otherwise. - */ -U_CAPI UBool U_EXPORT2 -uhash_compareUChars(const UHashTok key1, const UHashTok key2); - -/** - * Comparator for null-terminated char* strings. Use together with - * uhash_hashChars. - * @param key1 The string for comparison - * @param key2 The string for comparison - * @return true if key1 and key2 are equal, return false otherwise. - */ -U_CAPI UBool U_EXPORT2 -uhash_compareChars(const UHashTok key1, const UHashTok key2); - -/** - * Case-insensitive comparator for null-terminated char* strings. Use - * together with uhash_hashIChars. - * @param key1 The string for comparison - * @param key2 The string for comparison - * @return true if key1 and key2 are equal, return false otherwise. - */ -U_CAPI UBool U_EXPORT2 -uhash_compareIChars(const UHashTok key1, const UHashTok key2); - -/******************************************************************** - * UnicodeString Support Functions - ********************************************************************/ - -/** - * Hash function for UnicodeString* keys. - * @param key The string (const char*) to hash. - * @return A hash code for the key. - */ -U_CAPI int32_t U_EXPORT2 -uhash_hashUnicodeString(const UElement key); - -/** - * Hash function for UnicodeString* keys (case insensitive). - * Make sure to use together with uhash_compareCaselessUnicodeString. - * @param key The string (const char*) to hash. - * @return A hash code for the key. - */ -U_CAPI int32_t U_EXPORT2 -uhash_hashCaselessUnicodeString(const UElement key); - -/******************************************************************** - * int32_t Support Functions - ********************************************************************/ - -/** - * Hash function for 32-bit integer keys. - * @param key The string (const char*) to hash. - * @return A hash code for the key. - */ -U_CAPI int32_t U_EXPORT2 -uhash_hashLong(const UHashTok key); - -/** - * Comparator function for 32-bit integer keys. - * @param key1 The integer for comparison - * @param Key2 The integer for comparison - * @return true if key1 and key2 are equal, return false otherwise - */ -U_CAPI UBool U_EXPORT2 -uhash_compareLong(const UHashTok key1, const UHashTok key2); - -/******************************************************************** - * Other Support Functions - ********************************************************************/ - -/** - * Deleter for Hashtable objects. - * @param obj The object to be deleted - */ -U_CAPI void U_EXPORT2 -uhash_deleteHashtable(void *obj); - -/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */ - -/** - * Checks if the given hash tables are equal or not. - * @param hash1 - * @param hash2 - * @return true if the hashtables are equal and false if not. - */ -U_CAPI UBool U_EXPORT2 -uhash_equals(const UHashtable* hash1, const UHashtable* hash2); - - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUHashtablePointer - * "Smart pointer" class, closes a UHashtable via uhash_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUHashtablePointer, UHashtable, uhash_close); - -U_NAMESPACE_END - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/uhash_us.cpp b/deps/node/deps/icu-small/source/common/uhash_us.cpp deleted file mode 100644 index ef482c27..00000000 --- a/deps/node/deps/icu-small/source/common/uhash_us.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* Date Name Description -* 03/22/00 aliu Creation. -* 07/06/01 aliu Modified to support int32_t keys on -* platforms with sizeof(void*) < 32. -****************************************************************************** -*/ - -#include "hash.h" - -/** - * Deleter for Hashtable objects. - */ -U_CAPI void U_EXPORT2 -uhash_deleteHashtable(void *obj) { - U_NAMESPACE_USE - delete (Hashtable*) obj; -} - -//eof diff --git a/deps/node/deps/icu-small/source/common/uidna.cpp b/deps/node/deps/icu-small/source/common/uidna.cpp deleted file mode 100644 index 6d56fcb8..00000000 --- a/deps/node/deps/icu-small/source/common/uidna.cpp +++ /dev/null @@ -1,922 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * - * Copyright (C) 2003-2014, International Business Machines - * Corporation and others. All Rights Reserved. - * - ******************************************************************************* - * file name: uidna.cpp - * encoding: UTF-8 - * tab size: 8 (not used) - * indentation:4 - * - * created on: 2003feb1 - * created by: Ram Viswanadha - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -#include "unicode/uidna.h" -#include "unicode/ustring.h" -#include "unicode/usprep.h" -#include "punycode.h" -#include "ustr_imp.h" -#include "cmemory.h" -#include "uassert.h" -#include "sprpimpl.h" - -/* it is official IDNA ACE Prefix is "xn--" */ -static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; -#define ACE_PREFIX_LENGTH 4 - -#define MAX_LABEL_LENGTH 63 -/* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ -#define MAX_LABEL_BUFFER_SIZE 100 - -#define MAX_DOMAIN_NAME_LENGTH 255 -/* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ -#define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 - -#define LOWER_CASE_DELTA 0x0020 -#define HYPHEN 0x002D -#define FULL_STOP 0x002E -#define CAPITAL_A 0x0041 -#define CAPITAL_Z 0x005A - -inline static UChar -toASCIILower(UChar ch){ - if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ - return ch + LOWER_CASE_DELTA; - } - return ch; -} - -inline static UBool -startsWithPrefix(const UChar* src , int32_t srcLength){ - UBool startsWithPrefix = TRUE; - - if(srcLength < ACE_PREFIX_LENGTH){ - return FALSE; - } - - for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ - if(toASCIILower(src[i]) != ACE_PREFIX[i]){ - startsWithPrefix = FALSE; - } - } - return startsWithPrefix; -} - - -inline static int32_t -compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, - const UChar* s2, int32_t s2Len){ - - int32_t minLength; - int32_t lengthResult; - - // are we comparing different lengths? - if(s1Len != s2Len) { - if(s1Len < s2Len) { - minLength = s1Len; - lengthResult = -1; - } else { - minLength = s2Len; - lengthResult = 1; - } - } else { - // ok the lengths are equal - minLength = s1Len; - lengthResult = 0; - } - - UChar c1,c2; - int32_t rc; - - for(int32_t i =0;/* no condition */;i++) { - - /* If we reach the ends of both strings then they match */ - if(i == minLength) { - return lengthResult; - } - - c1 = s1[i]; - c2 = s2[i]; - - /* Case-insensitive comparison */ - if(c1!=c2) { - rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); - if(rc!=0) { - lengthResult=rc; - break; - } - } - } - return lengthResult; -} - - -/** - * Ascertain if the given code point is a label separator as - * defined by the IDNA RFC - * - * @param ch The code point to be ascertained - * @return true if the char is a label separator - * @stable ICU 2.8 - */ -static inline UBool isLabelSeparator(UChar ch){ - switch(ch){ - case 0x002e: - case 0x3002: - case 0xFF0E: - case 0xFF61: - return TRUE; - default: - return FALSE; - } -} - -// returns the length of the label excluding the separator -// if *limit == separator then the length returned does not include -// the separtor. -static inline int32_t -getNextSeparator(UChar *src, int32_t srcLength, - UChar **limit, UBool *done){ - if(srcLength == -1){ - int32_t i; - for(i=0 ; ;i++){ - if(src[i] == 0){ - *limit = src + i; // point to null - *done = TRUE; - return i; - } - if(isLabelSeparator(src[i])){ - *limit = src + (i+1); // go past the delimiter - return i; - - } - } - }else{ - int32_t i; - for(i=0;i0x007A){ - return FALSE; - } - //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] - if( (ch==0x002D) || - (0x0030 <= ch && ch <= 0x0039) || - (0x0041 <= ch && ch <= 0x005A) || - (0x0061 <= ch && ch <= 0x007A) - ){ - return TRUE; - } - return FALSE; -} - -static int32_t -_internal_toASCII(const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UStringPrepProfile* nameprep, - UParseError* parseError, - UErrorCode* status) -{ - - // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. - UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; - //initialize pointers to stack buffers - UChar *b1 = b1Stack, *b2 = b2Stack; - int32_t b1Len=0, b2Len, - b1Capacity = MAX_LABEL_BUFFER_SIZE, - b2Capacity = MAX_LABEL_BUFFER_SIZE , - reqLength=0; - - int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; - UBool* caseFlags = NULL; - - // the source contains all ascii codepoints - UBool srcIsASCII = TRUE; - // assume the source contains all LDH codepoints - UBool srcIsLDH = TRUE; - - int32_t j=0; - - //get the options - UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); - - int32_t failPos = -1; - - if(srcLength == -1){ - srcLength = u_strlen(src); - } - - if(srcLength > b1Capacity){ - b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); - if(b1==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - b1Capacity = srcLength; - } - - // step 1 - for( j=0;j 0x7F){ - srcIsASCII = FALSE; - } - b1[b1Len++] = src[j]; - } - - // step 2 is performed only if the source contains non ASCII - if(srcIsASCII == FALSE){ - - // step 2 - b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); - - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - // we do not have enough room so grow the buffer - if(b1 != b1Stack){ - uprv_free(b1); - } - b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); - if(b1==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - - *status = U_ZERO_ERROR; // reset error - - b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); - } - } - // error bail out - if(U_FAILURE(*status)){ - goto CLEANUP; - } - if(b1Len == 0){ - *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; - goto CLEANUP; - } - - // for step 3 & 4 - srcIsASCII = TRUE; - for( j=0;j 0x7F){ - srcIsASCII = FALSE; - }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character - srcIsLDH = FALSE; - failPos = j; - } - } - if(useSTD3ASCIIRules == TRUE){ - // verify 3a and 3b - // 3(a) Verify the absence of non-LDH ASCII code points; that is, the - // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. - // 3(b) Verify the absence of leading and trailing hyphen-minus; that - // is, the absence of U+002D at the beginning and end of the - // sequence. - if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ - || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ - *status = U_IDNA_STD3_ASCII_RULES_ERROR; - - /* populate the parseError struct */ - if(srcIsLDH==FALSE){ - // failPos is always set the index of failure - uprv_syntaxError(b1,failPos, b1Len,parseError); - }else if(b1[0] == HYPHEN){ - // fail position is 0 - uprv_syntaxError(b1,0,b1Len,parseError); - }else{ - // the last index in the source is always length-1 - uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); - } - - goto CLEANUP; - } - } - // Step 4: if the source is ASCII then proceed to step 8 - if(srcIsASCII){ - if(b1Len <= destCapacity){ - u_memmove(dest, b1, b1Len); - reqLength = b1Len; - }else{ - reqLength = b1Len; - goto CLEANUP; - } - }else{ - // step 5 : verify the sequence does not begin with ACE prefix - if(!startsWithPrefix(b1,b1Len)){ - - //step 6: encode the sequence with punycode - - // do not preserve the case flags for now! - // TODO: Preserve the case while implementing the RFE - // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); - // uprv_memset(caseFlags,TRUE,b1Len); - - b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); - - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - /* we do not have enough room so grow the buffer*/ - b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); - if(b2 == NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - - *status = U_ZERO_ERROR; // reset error - - b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); - } - //error bail out - if(U_FAILURE(*status)){ - goto CLEANUP; - } - // TODO : Reconsider while implementing the case preserve RFE - // convert all codepoints to lower case ASCII - // toASCIILower(b2,b2Len); - reqLength = b2Len+ACE_PREFIX_LENGTH; - - if(reqLength > destCapacity){ - *status = U_BUFFER_OVERFLOW_ERROR; - goto CLEANUP; - } - //Step 7: prepend the ACE prefix - u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); - //Step 6: copy the contents in b2 into dest - u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); - - }else{ - *status = U_IDNA_ACE_PREFIX_ERROR; - //position of failure is 0 - uprv_syntaxError(b1,0,b1Len,parseError); - goto CLEANUP; - } - } - // step 8: verify the length of label - if(reqLength > MAX_LABEL_LENGTH){ - *status = U_IDNA_LABEL_TOO_LONG_ERROR; - } - -CLEANUP: - if(b1 != b1Stack){ - uprv_free(b1); - } - if(b2 != b2Stack){ - uprv_free(b2); - } - uprv_free(caseFlags); - - return u_terminateUChars(dest, destCapacity, reqLength, status); -} - -static int32_t -_internal_toUnicode(const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UStringPrepProfile* nameprep, - UParseError* parseError, - UErrorCode* status) -{ - - //get the options - //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); - int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; - - // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. - UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; - - //initialize pointers to stack buffers - UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; - int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len, - b1Capacity = MAX_LABEL_BUFFER_SIZE, - b2Capacity = MAX_LABEL_BUFFER_SIZE, - b3Capacity = MAX_LABEL_BUFFER_SIZE, - reqLength=0; - - UBool* caseFlags = NULL; - - UBool srcIsASCII = TRUE; - /*UBool srcIsLDH = TRUE; - int32_t failPos =0;*/ - - // step 1: find out if all the codepoints in src are ASCII - if(srcLength==-1){ - srcLength = 0; - for(;src[srcLength]!=0;){ - if(src[srcLength]> 0x7f){ - srcIsASCII = FALSE; - }/*else if(isLDHChar(src[srcLength])==FALSE){ - // here we do not assemble surrogates - // since we know that LDH code points - // are in the ASCII range only - srcIsLDH = FALSE; - failPos = srcLength; - }*/ - srcLength++; - } - }else if(srcLength > 0){ - for(int32_t j=0; j 0x7f){ - srcIsASCII = FALSE; - }/*else if(isLDHChar(src[j])==FALSE){ - // here we do not assemble surrogates - // since we know that LDH code points - // are in the ASCII range only - srcIsLDH = FALSE; - failPos = j; - }*/ - } - }else{ - return 0; - } - - if(srcIsASCII == FALSE){ - // step 2: process the string - b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - /* we do not have enough room so grow the buffer*/ - b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); - if(b1==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - - *status = U_ZERO_ERROR; // reset error - - b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); - } - //bail out on error - if(U_FAILURE(*status)){ - goto CLEANUP; - } - }else{ - - //just point src to b1 - b1 = (UChar*) src; - b1Len = srcLength; - } - - // The RFC states that - // - // ToUnicode never fails. If any step fails, then the original input - // is returned immediately in that step. - // - - //step 3: verify ACE Prefix - if(startsWithPrefix(b1,b1Len)){ - - //step 4: Remove the ACE Prefix - b1Prime = b1 + ACE_PREFIX_LENGTH; - b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; - - //step 5: Decode using punycode - b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); - - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - /* we do not have enough room so grow the buffer*/ - b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); - if(b2==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - - *status = U_ZERO_ERROR; // reset error - - b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); - } - - - //step 6:Apply toASCII - b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); - - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - /* we do not have enough room so grow the buffer*/ - b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); - if(b3==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - - *status = U_ZERO_ERROR; // reset error - - b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); - - } - //bail out on error - if(U_FAILURE(*status)){ - goto CLEANUP; - } - - //step 7: verify - if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ - // Cause the original to be returned. - *status = U_IDNA_VERIFICATION_ERROR; - goto CLEANUP; - } - - //step 8: return output of step 5 - reqLength = b2Len; - if(b2Len <= destCapacity) { - u_memmove(dest, b2, b2Len); - } - } - else{ - // See the start of this if statement for why this is commented out. - // verify that STD3 ASCII rules are satisfied - /*if(useSTD3ASCIIRules == TRUE){ - if( srcIsLDH == FALSE // source contains some non-LDH characters - || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ - *status = U_IDNA_STD3_ASCII_RULES_ERROR; - - // populate the parseError struct - if(srcIsLDH==FALSE){ - // failPos is always set the index of failure - uprv_syntaxError(src,failPos, srcLength,parseError); - }else if(src[0] == HYPHEN){ - // fail position is 0 - uprv_syntaxError(src,0,srcLength,parseError); - }else{ - // the last index in the source is always length-1 - uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); - } - - goto CLEANUP; - } - }*/ - // just return the source - //copy the source to destination - if(srcLength <= destCapacity){ - u_memmove(dest, src, srcLength); - } - reqLength = srcLength; - } - - -CLEANUP: - - if(b1 != b1Stack && b1!=src){ - uprv_free(b1); - } - if(b2 != b2Stack){ - uprv_free(b2); - } - uprv_free(caseFlags); - - // The RFC states that - // - // ToUnicode never fails. If any step fails, then the original input - // is returned immediately in that step. - // - // So if any step fails lets copy source to destination - if(U_FAILURE(*status)){ - //copy the source to destination - if(dest && srcLength <= destCapacity){ - // srcLength should have already been set earlier. - U_ASSERT(srcLength >= 0); - u_memmove(dest, src, srcLength); - } - reqLength = srcLength; - *status = U_ZERO_ERROR; - } - - return u_terminateUChars(dest, destCapacity, reqLength, status); -} - -U_CAPI int32_t U_EXPORT2 -uidna_toASCII(const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return 0; - } - if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); - - if(U_FAILURE(*status)){ - return -1; - } - - int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); - - /* close the profile*/ - usprep_close(nameprep); - - return retLen; -} - -U_CAPI int32_t U_EXPORT2 -uidna_toUnicode(const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return 0; - } - if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); - - if(U_FAILURE(*status)){ - return -1; - } - - int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); - - usprep_close(nameprep); - - return retLen; -} - - -U_CAPI int32_t U_EXPORT2 -uidna_IDNToASCII( const UChar *src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError *parseError, - UErrorCode *status){ - - if(status == NULL || U_FAILURE(*status)){ - return 0; - } - if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - int32_t reqLength = 0; - - UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); - - if(U_FAILURE(*status)){ - return 0; - } - - //initialize pointers - UChar *delimiter = (UChar*)src; - UChar *labelStart = (UChar*)src; - UChar *currentDest = (UChar*) dest; - int32_t remainingLen = srcLength; - int32_t remainingDestCapacity = destCapacity; - int32_t labelLen = 0, labelReqLength = 0; - UBool done = FALSE; - - - for(;;){ - - labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); - labelReqLength = 0; - if(!(labelLen==0 && done)){// make sure this is not a root label separator. - - labelReqLength = _internal_toASCII( labelStart, labelLen, - currentDest, remainingDestCapacity, - options, nameprep, - parseError, status); - - if(*status == U_BUFFER_OVERFLOW_ERROR){ - - *status = U_ZERO_ERROR; // reset error - remainingDestCapacity = 0; - } - } - - - if(U_FAILURE(*status)){ - break; - } - - reqLength +=labelReqLength; - // adjust the destination pointer - if(labelReqLength < remainingDestCapacity){ - currentDest = currentDest + labelReqLength; - remainingDestCapacity -= labelReqLength; - }else{ - // should never occur - remainingDestCapacity = 0; - } - - if(done == TRUE){ - break; - } - - // add the label separator - if(remainingDestCapacity > 0){ - *currentDest++ = FULL_STOP; - remainingDestCapacity--; - } - reqLength++; - - labelStart = delimiter; - if(remainingLen >0 ){ - remainingLen = (int32_t)(srcLength - (delimiter - src)); - } - - } - - if(reqLength > MAX_DOMAIN_NAME_LENGTH){ - *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; - } - - usprep_close(nameprep); - - return u_terminateUChars(dest, destCapacity, reqLength, status); -} - -U_CAPI int32_t U_EXPORT2 -uidna_IDNToUnicode( const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return 0; - } - if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - int32_t reqLength = 0; - - UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); - - if(U_FAILURE(*status)){ - return 0; - } - - //initialize pointers - UChar *delimiter = (UChar*)src; - UChar *labelStart = (UChar*)src; - UChar *currentDest = (UChar*) dest; - int32_t remainingLen = srcLength; - int32_t remainingDestCapacity = destCapacity; - int32_t labelLen = 0, labelReqLength = 0; - UBool done = FALSE; - - for(;;){ - - labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); - - // The RFC states that - // - // ToUnicode never fails. If any step fails, then the original input - // is returned immediately in that step. - // - // _internal_toUnicode will copy the label. - /*if(labelLen==0 && done==FALSE){ - *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; - break; - }*/ - - labelReqLength = _internal_toUnicode(labelStart, labelLen, - currentDest, remainingDestCapacity, - options, nameprep, - parseError, status); - - if(*status == U_BUFFER_OVERFLOW_ERROR){ - *status = U_ZERO_ERROR; // reset error - remainingDestCapacity = 0; - } - - if(U_FAILURE(*status)){ - break; - } - - reqLength +=labelReqLength; - // adjust the destination pointer - if(labelReqLength < remainingDestCapacity){ - currentDest = currentDest + labelReqLength; - remainingDestCapacity -= labelReqLength; - }else{ - // should never occur - remainingDestCapacity = 0; - } - - if(done == TRUE){ - break; - } - - // add the label separator - // Unlike the ToASCII operation we don't normalize the label separators - if(remainingDestCapacity > 0){ - *currentDest++ = *(labelStart + labelLen); - remainingDestCapacity--; - } - reqLength++; - - labelStart = delimiter; - if(remainingLen >0 ){ - remainingLen = (int32_t)(srcLength - (delimiter - src)); - } - - } - - if(reqLength > MAX_DOMAIN_NAME_LENGTH){ - *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; - } - - usprep_close(nameprep); - - return u_terminateUChars(dest, destCapacity, reqLength, status); -} - -U_CAPI int32_t U_EXPORT2 -uidna_compare( const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - int32_t options, - UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return -1; - } - - UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; - UChar *b1 = b1Stack, *b2 = b2Stack; - int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; - int32_t result=-1; - - UParseError parseError; - - b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); - if(b1==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - - *status = U_ZERO_ERROR; // reset error - - b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); - - } - - b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); - if(b2==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - goto CLEANUP; - } - - *status = U_ZERO_ERROR; // reset error - - b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); - - } - // when toASCII is applied all label separators are replaced with FULL_STOP - result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); - -CLEANUP: - if(b1 != b1Stack){ - uprv_free(b1); - } - - if(b2 != b2Stack){ - uprv_free(b2); - } - - return result; -} - -#endif /* #if !UCONFIG_NO_IDNA */ diff --git a/deps/node/deps/icu-small/source/common/uinit.cpp b/deps/node/deps/icu-small/source/common/uinit.cpp deleted file mode 100644 index 624431be..00000000 --- a/deps/node/deps/icu-small/source/common/uinit.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2001-2015, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* file name: uinit.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001July05 -* created by: George Rhoten -*/ - -#include "unicode/utypes.h" -#include "unicode/icuplug.h" -#include "unicode/uclean.h" -#include "cmemory.h" -#include "icuplugimp.h" -#include "ucln_cmn.h" -#include "ucnv_io.h" -#include "umutex.h" -#include "utracimp.h" - -U_NAMESPACE_BEGIN - -static UInitOnce gICUInitOnce = U_INITONCE_INITIALIZER; - -static UBool U_CALLCONV uinit_cleanup() { - gICUInitOnce.reset(); - return TRUE; -} - -static void U_CALLCONV -initData(UErrorCode &status) -{ -#if UCONFIG_ENABLE_PLUGINS - /* initialize plugins */ - uplug_init(&status); -#endif - -#if !UCONFIG_NO_CONVERSION - /* - * 2005-may-02 - * - * ICU4C 3.4 (jitterbug 4497) hardcodes the data for Unicode character - * properties for APIs that want to be fast. - * Therefore, we need not load them here nor check for errors. - * Instead, we load the converter alias table to see if any ICU data - * is available. - * Users should really open the service objects they need and check - * for errors there, to make sure that the actual items they need are - * available. - */ - ucnv_io_countKnownConverters(&status); -#endif - ucln_common_registerCleanup(UCLN_COMMON_UINIT, uinit_cleanup); -} - -U_NAMESPACE_END - -U_NAMESPACE_USE - -/* - * ICU Initialization Function. Need not be called. - */ -U_CAPI void U_EXPORT2 -u_init(UErrorCode *status) { - UTRACE_ENTRY_OC(UTRACE_U_INIT); - umtx_initOnce(gICUInitOnce, &initData, *status); - UTRACE_EXIT_STATUS(*status); -} diff --git a/deps/node/deps/icu-small/source/common/uinvchar.cpp b/deps/node/deps/icu-small/source/common/uinvchar.cpp deleted file mode 100644 index eafb951e..00000000 --- a/deps/node/deps/icu-small/source/common/uinvchar.cpp +++ /dev/null @@ -1,614 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2010, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uinvchar.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2004sep14 -* created by: Markus W. Scherer -* -* Functions for handling invariant characters, moved here from putil.c -* for better modularization. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "udataswp.h" -#include "cstring.h" -#include "cmemory.h" -#include "uassert.h" -#include "uinvchar.h" - -/* invariant-character handling --------------------------------------------- */ - -/* - * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) - * appropriately for most EBCDIC codepages. - * - * They currently also map most other ASCII graphic characters, - * appropriately for codepages 37 and 1047. - * Exceptions: The characters for []^ have different codes in 37 & 1047. - * Both versions are mapped to ASCII. - * - * ASCII 37 1047 - * [ 5B BA AD - * ] 5D BB BD - * ^ 5E B0 5F - * - * There are no mappings for variant characters from Unicode to EBCDIC. - * - * Currently, C0 control codes are also included in these maps. - * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other - * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), - * but there is no mapping for ASCII LF back to EBCDIC. - * - * ASCII EBCDIC S/390-OE - * LF 0A 25 15 - * NEL 85 15 25 - * - * The maps below explicitly exclude the variant - * control and graphical characters that are in ASCII-based - * codepages at 0x80 and above. - * "No mapping" is expressed by mapping to a 00 byte. - * - * These tables do not establish a converter or a codepage. - */ - -static const uint8_t asciiFromEbcdic[256]={ - 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, - 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, - - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, - 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, - 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, - - 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, - 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, - - 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -static const uint8_t ebcdicFromAscii[256]={ - 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, - 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, - - 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, - 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, - 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, - 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ -static const uint8_t lowercaseAsciiFromEbcdic[256]={ - 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, - 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, - - 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, - 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, - 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, - - 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, - 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, - - 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 -}; - -/* - * Bit sets indicating which characters of the ASCII repertoire - * (by ASCII/Unicode code) are "invariant". - * See utypes.h for more details. - * - * As invariant are considered the characters of the ASCII repertoire except - * for the following: - * 21 '!' - * 23 '#' - * 24 '$' - * - * 40 '@' - * - * 5b '[' - * 5c '\' - * 5d ']' - * 5e '^' - * - * 60 '`' - * - * 7b '{' - * 7c '|' - * 7d '}' - * 7e '~' - */ -static const uint32_t invariantChars[4]={ - 0xfffffbff, /* 00..1f but not 0a */ - 0xffffffe5, /* 20..3f but not 21 23 24 */ - 0x87fffffe, /* 40..5f but not 40 5b..5e */ - 0x87fffffe /* 60..7f but not 60 7b..7e */ -}; - -/* - * test unsigned types (or values known to be non-negative) for invariant characters, - * tests ASCII-family character values - */ -#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) - -/* test signed types for invariant characters, adds test for positive values */ -#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) - -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -#define CHAR_TO_UCHAR(c) c -#define UCHAR_TO_CHAR(c) c -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] -#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] -#else -# error U_CHARSET_FAMILY is not valid -#endif - - -U_CAPI void U_EXPORT2 -u_charsToUChars(const char *cs, UChar *us, int32_t length) { - UChar u; - uint8_t c; - - /* - * Allow the entire ASCII repertoire to be mapped _to_ Unicode. - * For EBCDIC systems, this works for characters with codes from - * codepages 37 and 1047 or compatible. - */ - while(length>0) { - c=(uint8_t)(*cs++); - u=(UChar)CHAR_TO_UCHAR(c); - U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ - *us++=u; - --length; - } -} - -U_CAPI void U_EXPORT2 -u_UCharsToChars(const UChar *us, char *cs, int32_t length) { - UChar u; - - while(length>0) { - u=*us++; - if(!UCHAR_IS_INVARIANT(u)) { - U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ - u=0; - } - *cs++=(char)UCHAR_TO_CHAR(u); - --length; - } -} - -U_CAPI UBool U_EXPORT2 -uprv_isInvariantString(const char *s, int32_t length) { - uint8_t c; - - for(;;) { - if(length<0) { - /* NUL-terminated */ - c=(uint8_t)*s++; - if(c==0) { - break; - } - } else { - /* count length */ - if(length==0) { - break; - } - --length; - c=(uint8_t)*s++; - if(c==0) { - continue; /* NUL is invariant */ - } - } - /* c!=0 now, one branch below checks c==0 for variant characters */ - - /* - * no assertions here because these functions are legitimately called - * for strings with variant characters - */ -#if U_CHARSET_FAMILY==U_ASCII_FAMILY - if(!UCHAR_IS_INVARIANT(c)) { - return FALSE; /* found a variant char */ - } -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY - c=CHAR_TO_UCHAR(c); - if(c==0 || !UCHAR_IS_INVARIANT(c)) { - return FALSE; /* found a variant char */ - } -#else -# error U_CHARSET_FAMILY is not valid -#endif - } - return TRUE; -} - -U_CAPI UBool U_EXPORT2 -uprv_isInvariantUString(const UChar *s, int32_t length) { - UChar c; - - for(;;) { - if(length<0) { - /* NUL-terminated */ - c=*s++; - if(c==0) { - break; - } - } else { - /* count length */ - if(length==0) { - break; - } - --length; - c=*s++; - } - - /* - * no assertions here because these functions are legitimately called - * for strings with variant characters - */ - if(!UCHAR_IS_INVARIANT(c)) { - return FALSE; /* found a variant char */ - } - } - return TRUE; -} - -/* UDataSwapFn implementations used in udataswp.c ------- */ - -/* convert ASCII to EBCDIC and verify that all characters are invariant */ -U_CAPI int32_t U_EXPORT2 -uprv_ebcdicFromAscii(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t *t; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - s=(const uint8_t *)inData; - t=(uint8_t *)outData; - count=length; - while(count>0) { - c=*s++; - if(!UCHAR_IS_INVARIANT(c)) { - udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - *t++=ebcdicFromAscii[c]; - --count; - } - - return length; -} - -/* this function only checks and copies ASCII strings without conversion */ -U_CFUNC int32_t -uprv_copyAscii(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and checking */ - s=(const uint8_t *)inData; - count=length; - while(count>0) { - c=*s++; - if(!UCHAR_IS_INVARIANT(c)) { - udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - --count; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - - return length; -} - -/* convert EBCDIC to ASCII and verify that all characters are invariant */ -U_CFUNC int32_t -uprv_asciiFromEbcdic(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t *t; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - s=(const uint8_t *)inData; - t=(uint8_t *)outData; - count=length; - while(count>0) { - c=*s++; - if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { - udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - *t++=c; - --count; - } - - return length; -} - -/* this function only checks and copies EBCDIC strings without conversion */ -U_CFUNC int32_t -uprv_copyEbcdic(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const uint8_t *s; - uint8_t c; - - int32_t count; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and checking */ - s=(const uint8_t *)inData; - count=length; - while(count>0) { - c=*s++; - if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { - udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", - length, length-count); - *pErrorCode=U_INVALID_CHAR_FOUND; - return 0; - } - --count; - } - - if(length>0 && inData!=outData) { - uprv_memcpy(outData, inData, length); - } - - return length; -} - -/* compare invariant strings; variant characters compare less than others and unlike each other */ -U_CFUNC int32_t -uprv_compareInvAscii(const UDataSwapper *ds, - const char *outString, int32_t outLength, - const UChar *localString, int32_t localLength) { - (void)ds; - int32_t minLength; - UChar32 c1, c2; - uint8_t c; - - if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { - return 0; - } - - if(outLength<0) { - outLength=(int32_t)uprv_strlen(outString); - } - if(localLength<0) { - localLength=u_strlen(localString); - } - - minLength= outLength0) { - c=(uint8_t)*outString++; - if(UCHAR_IS_INVARIANT(c)) { - c1=c; - } else { - c1=-1; - } - - c2=*localString++; - if(!UCHAR_IS_INVARIANT(c2)) { - c2=-2; - } - - if((c1-=c2)!=0) { - return c1; - } - - --minLength; - } - - /* strings start with same prefix, compare lengths */ - return outLength-localLength; -} - -U_CFUNC int32_t -uprv_compareInvEbcdic(const UDataSwapper *ds, - const char *outString, int32_t outLength, - const UChar *localString, int32_t localLength) { - (void)ds; - int32_t minLength; - UChar32 c1, c2; - uint8_t c; - - if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { - return 0; - } - - if(outLength<0) { - outLength=(int32_t)uprv_strlen(outString); - } - if(localLength<0) { - localLength=u_strlen(localString); - } - - minLength= outLength0) { - c=(uint8_t)*outString++; - if(c==0) { - c1=0; - } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { - /* c1 is set */ - } else { - c1=-1; - } - - c2=*localString++; - if(!UCHAR_IS_INVARIANT(c2)) { - c2=-2; - } - - if((c1-=c2)!=0) { - return c1; - } - - --minLength; - } - - /* strings start with same prefix, compare lengths */ - return outLength-localLength; -} - -U_CAPI int32_t U_EXPORT2 -uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { - int32_t c1, c2; - - for(;; ++s1, ++s2) { - c1=(uint8_t)*s1; - c2=(uint8_t)*s2; - if(c1!=c2) { - if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { - c1=-(int32_t)(uint8_t)*s1; - } - if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { - c2=-(int32_t)(uint8_t)*s2; - } - return c1-c2; - } else if(c1==0) { - return 0; - } - } -} - -U_CAPI char U_EXPORT2 -uprv_ebcdicToLowercaseAscii(char c) { - return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; -} - -U_INTERNAL uint8_t* U_EXPORT2 -uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) -{ - uint8_t *orig_dst = dst; - - if(n==-1) { - n = static_cast(uprv_strlen((const char*)src)+1); /* copy NUL */ - } - /* copy non-null */ - while(*src && n>0) { - *(dst++) = asciiFromEbcdic[*(src++)]; - n--; - } - /* pad */ - while(n>0) { - *(dst++) = 0; - n--; - } - return orig_dst; -} - -U_INTERNAL uint8_t* U_EXPORT2 -uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) -{ - uint8_t *orig_dst = dst; - - if(n==-1) { - n = static_cast(uprv_strlen((const char*)src)+1); /* copy NUL */ - } - /* copy non-null */ - while(*src && n>0) { - char ch = ebcdicFromAscii[*(src++)]; - if(ch == 0) { - ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ - } - *(dst++) = ch; - n--; - } - /* pad */ - while(n>0) { - *(dst++) = 0; - n--; - } - return orig_dst; -} diff --git a/deps/node/deps/icu-small/source/common/uinvchar.h b/deps/node/deps/icu-small/source/common/uinvchar.h deleted file mode 100644 index 56dddfa8..00000000 --- a/deps/node/deps/icu-small/source/common/uinvchar.h +++ /dev/null @@ -1,130 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uinvchar.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2004sep14 -* created by: Markus W. Scherer -* -* Definitions for handling invariant characters, moved here from putil.c -* for better modularization. -*/ - -#ifndef __UINVCHAR_H__ -#define __UINVCHAR_H__ - -#include "unicode/utypes.h" -#ifdef __cplusplus -#include "unicode/unistr.h" -#endif - -/** - * Check if a char string only contains invariant characters. - * See utypes.h for details. - * - * @param s Input string pointer. - * @param length Length of the string, can be -1 if NUL-terminated. - * @return TRUE if s contains only invariant characters. - * - * @internal (ICU 2.8) - */ -U_INTERNAL UBool U_EXPORT2 -uprv_isInvariantString(const char *s, int32_t length); - -/** - * Check if a Unicode string only contains invariant characters. - * See utypes.h for details. - * - * @param s Input string pointer. - * @param length Length of the string, can be -1 if NUL-terminated. - * @return TRUE if s contains only invariant characters. - * - * @internal (ICU 2.8) - */ -U_INTERNAL UBool U_EXPORT2 -uprv_isInvariantUString(const UChar *s, int32_t length); - -/** - * \def U_UPPER_ORDINAL - * Get the ordinal number of an uppercase invariant character - * @internal - */ -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define U_UPPER_ORDINAL(x) ((x)-'A') -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \ - (((x) < 'S') ? ((x)-'J'+9) : \ - ((x)-'S'+18))) -#else -# error Unknown charset family! -#endif - -/** - * Compare two EBCDIC invariant-character strings in ASCII order. - * @internal - */ -U_INTERNAL int32_t U_EXPORT2 -uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); - -/** - * \def uprv_compareInvCharsAsAscii - * Compare two invariant-character strings in ASCII order. - * @internal - */ -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2) -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2) -#else -# error Unknown charset family! -#endif - -/** - * Converts an EBCDIC invariant character to lowercase ASCII. - * @internal - */ -U_INTERNAL char U_EXPORT2 -uprv_ebcdicToLowercaseAscii(char c); - -/** - * \def uprv_invCharToLowercaseAscii - * Converts an invariant character to lowercase ASCII. - * @internal - */ -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define uprv_invCharToLowercaseAscii uprv_asciitolower -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii -#else -# error Unknown charset family! -#endif - -/** - * Copy EBCDIC to ASCII - * @internal - * @see uprv_strncpy - */ -U_INTERNAL uint8_t* U_EXPORT2 -uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n); - - -/** - * Copy ASCII to EBCDIC - * @internal - * @see uprv_strncpy - */ -U_INTERNAL uint8_t* U_EXPORT2 -uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n); - - - -#endif diff --git a/deps/node/deps/icu-small/source/common/uiter.cpp b/deps/node/deps/icu-small/source/common/uiter.cpp deleted file mode 100644 index b9252d81..00000000 --- a/deps/node/deps/icu-small/source/common/uiter.cpp +++ /dev/null @@ -1,1108 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uiter.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jan18 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/chariter.h" -#include "unicode/rep.h" -#include "unicode/uiter.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "cstring.h" - -U_NAMESPACE_USE - -#define IS_EVEN(n) (((n)&1)==0) -#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p) - -U_CDECL_BEGIN - -/* No-Op UCharIterator implementation for illegal input --------------------- */ - -static int32_t U_CALLCONV -noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) { - return 0; -} - -static int32_t U_CALLCONV -noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) { - return 0; -} - -static UBool U_CALLCONV -noopHasNext(UCharIterator * /*iter*/) { - return FALSE; -} - -static UChar32 U_CALLCONV -noopCurrent(UCharIterator * /*iter*/) { - return U_SENTINEL; -} - -static uint32_t U_CALLCONV -noopGetState(const UCharIterator * /*iter*/) { - return UITER_NO_STATE; -} - -static void U_CALLCONV -noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) { - *pErrorCode=U_UNSUPPORTED_ERROR; -} - -static const UCharIterator noopIterator={ - 0, 0, 0, 0, 0, 0, - noopGetIndex, - noopMove, - noopHasNext, - noopHasNext, - noopCurrent, - noopCurrent, - noopCurrent, - NULL, - noopGetState, - noopSetState -}; - -/* UCharIterator implementation for simple strings -------------------------- */ - -/* - * This is an implementation of a code unit (UChar) iterator - * for UChar * strings. - * - * The UCharIterator.context field holds a pointer to the string. - */ - -static int32_t U_CALLCONV -stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { - switch(origin) { - case UITER_ZERO: - return 0; - case UITER_START: - return iter->start; - case UITER_CURRENT: - return iter->index; - case UITER_LIMIT: - return iter->limit; - case UITER_LENGTH: - return iter->length; - default: - /* not a valid origin */ - /* Should never get here! */ - return -1; - } -} - -static int32_t U_CALLCONV -stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { - int32_t pos; - - switch(origin) { - case UITER_ZERO: - pos=delta; - break; - case UITER_START: - pos=iter->start+delta; - break; - case UITER_CURRENT: - pos=iter->index+delta; - break; - case UITER_LIMIT: - pos=iter->limit+delta; - break; - case UITER_LENGTH: - pos=iter->length+delta; - break; - default: - return -1; /* Error */ - } - - if(posstart) { - pos=iter->start; - } else if(pos>iter->limit) { - pos=iter->limit; - } - - return iter->index=pos; -} - -static UBool U_CALLCONV -stringIteratorHasNext(UCharIterator *iter) { - return iter->indexlimit; -} - -static UBool U_CALLCONV -stringIteratorHasPrevious(UCharIterator *iter) { - return iter->index>iter->start; -} - -static UChar32 U_CALLCONV -stringIteratorCurrent(UCharIterator *iter) { - if(iter->indexlimit) { - return ((const UChar *)(iter->context))[iter->index]; - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -stringIteratorNext(UCharIterator *iter) { - if(iter->indexlimit) { - return ((const UChar *)(iter->context))[iter->index++]; - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -stringIteratorPrevious(UCharIterator *iter) { - if(iter->index>iter->start) { - return ((const UChar *)(iter->context))[--iter->index]; - } else { - return U_SENTINEL; - } -} - -static uint32_t U_CALLCONV -stringIteratorGetState(const UCharIterator *iter) { - return (uint32_t)iter->index; -} - -static void U_CALLCONV -stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - /* do nothing */ - } else if(iter==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else if((int32_t)statestart || iter->limit<(int32_t)state) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } else { - iter->index=(int32_t)state; - } -} - -static const UCharIterator stringIterator={ - 0, 0, 0, 0, 0, 0, - stringIteratorGetIndex, - stringIteratorMove, - stringIteratorHasNext, - stringIteratorHasPrevious, - stringIteratorCurrent, - stringIteratorNext, - stringIteratorPrevious, - NULL, - stringIteratorGetState, - stringIteratorSetState -}; - -U_CAPI void U_EXPORT2 -uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) { - if(iter!=0) { - if(s!=0 && length>=-1) { - *iter=stringIterator; - iter->context=s; - if(length>=0) { - iter->length=length; - } else { - iter->length=u_strlen(s); - } - iter->limit=iter->length; - } else { - *iter=noopIterator; - } - } -} - -/* UCharIterator implementation for UTF-16BE strings ------------------------ */ - -/* - * This is an implementation of a code unit (UChar) iterator - * for UTF-16BE strings, i.e., strings in byte-vectors where - * each UChar is stored as a big-endian pair of bytes. - * - * The UCharIterator.context field holds a pointer to the string. - * Everything works just like with a normal UChar iterator (uiter_setString), - * except that UChars are assembled from byte pairs. - */ - -/* internal helper function */ -static inline UChar32 -utf16BEIteratorGet(UCharIterator *iter, int32_t index) { - const uint8_t *p=(const uint8_t *)iter->context; - return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1]; -} - -static UChar32 U_CALLCONV -utf16BEIteratorCurrent(UCharIterator *iter) { - int32_t index; - - if((index=iter->index)limit) { - return utf16BEIteratorGet(iter, index); - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -utf16BEIteratorNext(UCharIterator *iter) { - int32_t index; - - if((index=iter->index)limit) { - iter->index=index+1; - return utf16BEIteratorGet(iter, index); - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -utf16BEIteratorPrevious(UCharIterator *iter) { - int32_t index; - - if((index=iter->index)>iter->start) { - iter->index=--index; - return utf16BEIteratorGet(iter, index); - } else { - return U_SENTINEL; - } -} - -static const UCharIterator utf16BEIterator={ - 0, 0, 0, 0, 0, 0, - stringIteratorGetIndex, - stringIteratorMove, - stringIteratorHasNext, - stringIteratorHasPrevious, - utf16BEIteratorCurrent, - utf16BEIteratorNext, - utf16BEIteratorPrevious, - NULL, - stringIteratorGetState, - stringIteratorSetState -}; - -/* - * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL, - * i.e., before a pair of 0 bytes where the first 0 byte is at an even - * offset from s. - */ -static int32_t -utf16BE_strlen(const char *s) { - if(IS_POINTER_EVEN(s)) { - /* - * even-aligned, call u_strlen(s) - * we are probably on a little-endian machine, but searching for UChar NUL - * does not care about endianness - */ - return u_strlen((const UChar *)s); - } else { - /* odd-aligned, search for pair of 0 bytes */ - const char *p=s; - - while(!(*p==0 && p[1]==0)) { - p+=2; - } - return (int32_t)((p-s)/2); - } -} - -U_CAPI void U_EXPORT2 -uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) { - if(iter!=NULL) { - /* allow only even-length strings (the input length counts bytes) */ - if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) { - /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */ - length>>=1; - - if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) { - /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */ - uiter_setString(iter, (const UChar *)s, length); - return; - } - - *iter=utf16BEIterator; - iter->context=s; - if(length>=0) { - iter->length=length; - } else { - iter->length=utf16BE_strlen(s); - } - iter->limit=iter->length; - } else { - *iter=noopIterator; - } - } -} - -/* UCharIterator wrapper around CharacterIterator --------------------------- */ - -/* - * This is wrapper code around a C++ CharacterIterator to - * look like a C UCharIterator. - * - * The UCharIterator.context field holds a pointer to the CharacterIterator. - */ - -static int32_t U_CALLCONV -characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { - switch(origin) { - case UITER_ZERO: - return 0; - case UITER_START: - return ((CharacterIterator *)(iter->context))->startIndex(); - case UITER_CURRENT: - return ((CharacterIterator *)(iter->context))->getIndex(); - case UITER_LIMIT: - return ((CharacterIterator *)(iter->context))->endIndex(); - case UITER_LENGTH: - return ((CharacterIterator *)(iter->context))->getLength(); - default: - /* not a valid origin */ - /* Should never get here! */ - return -1; - } -} - -static int32_t U_CALLCONV -characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { - switch(origin) { - case UITER_ZERO: - ((CharacterIterator *)(iter->context))->setIndex(delta); - return ((CharacterIterator *)(iter->context))->getIndex(); - case UITER_START: - case UITER_CURRENT: - case UITER_LIMIT: - return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin); - case UITER_LENGTH: - ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta); - return ((CharacterIterator *)(iter->context))->getIndex(); - default: - /* not a valid origin */ - /* Should never get here! */ - return -1; - } -} - -static UBool U_CALLCONV -characterIteratorHasNext(UCharIterator *iter) { - return ((CharacterIterator *)(iter->context))->hasNext(); -} - -static UBool U_CALLCONV -characterIteratorHasPrevious(UCharIterator *iter) { - return ((CharacterIterator *)(iter->context))->hasPrevious(); -} - -static UChar32 U_CALLCONV -characterIteratorCurrent(UCharIterator *iter) { - UChar32 c; - - c=((CharacterIterator *)(iter->context))->current(); - if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) { - return c; - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -characterIteratorNext(UCharIterator *iter) { - if(((CharacterIterator *)(iter->context))->hasNext()) { - return ((CharacterIterator *)(iter->context))->nextPostInc(); - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -characterIteratorPrevious(UCharIterator *iter) { - if(((CharacterIterator *)(iter->context))->hasPrevious()) { - return ((CharacterIterator *)(iter->context))->previous(); - } else { - return U_SENTINEL; - } -} - -static uint32_t U_CALLCONV -characterIteratorGetState(const UCharIterator *iter) { - return ((CharacterIterator *)(iter->context))->getIndex(); -} - -static void U_CALLCONV -characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - /* do nothing */ - } else if(iter==NULL || iter->context==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } else { - ((CharacterIterator *)(iter->context))->setIndex((int32_t)state); - } -} - -static const UCharIterator characterIteratorWrapper={ - 0, 0, 0, 0, 0, 0, - characterIteratorGetIndex, - characterIteratorMove, - characterIteratorHasNext, - characterIteratorHasPrevious, - characterIteratorCurrent, - characterIteratorNext, - characterIteratorPrevious, - NULL, - characterIteratorGetState, - characterIteratorSetState -}; - -U_CAPI void U_EXPORT2 -uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) { - if(iter!=0) { - if(charIter!=0) { - *iter=characterIteratorWrapper; - iter->context=charIter; - } else { - *iter=noopIterator; - } - } -} - -/* UCharIterator wrapper around Replaceable --------------------------------- */ - -/* - * This is an implementation of a code unit (UChar) iterator - * based on a Replaceable object. - * - * The UCharIterator.context field holds a pointer to the Replaceable. - * UCharIterator.length and UCharIterator.index hold Replaceable.length() - * and the iteration index. - */ - -static UChar32 U_CALLCONV -replaceableIteratorCurrent(UCharIterator *iter) { - if(iter->indexlimit) { - return ((Replaceable *)(iter->context))->charAt(iter->index); - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -replaceableIteratorNext(UCharIterator *iter) { - if(iter->indexlimit) { - return ((Replaceable *)(iter->context))->charAt(iter->index++); - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -replaceableIteratorPrevious(UCharIterator *iter) { - if(iter->index>iter->start) { - return ((Replaceable *)(iter->context))->charAt(--iter->index); - } else { - return U_SENTINEL; - } -} - -static const UCharIterator replaceableIterator={ - 0, 0, 0, 0, 0, 0, - stringIteratorGetIndex, - stringIteratorMove, - stringIteratorHasNext, - stringIteratorHasPrevious, - replaceableIteratorCurrent, - replaceableIteratorNext, - replaceableIteratorPrevious, - NULL, - stringIteratorGetState, - stringIteratorSetState -}; - -U_CAPI void U_EXPORT2 -uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) { - if(iter!=0) { - if(rep!=0) { - *iter=replaceableIterator; - iter->context=rep; - iter->limit=iter->length=rep->length(); - } else { - *iter=noopIterator; - } - } -} - -/* UCharIterator implementation for UTF-8 strings --------------------------- */ - -/* - * Possible, probably necessary only for an implementation for arbitrary - * converters: - * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text. - * This would require to turn reservedFn into a close function and - * to introduce a uiter_close(iter). - */ - -#define UITER_CNV_CAPACITY 16 - -/* - * Minimal implementation: - * Maintain a single-UChar buffer for an additional surrogate. - * The caller must not modify start and limit because they are used internally. - * - * Use UCharIterator fields as follows: - * context pointer to UTF-8 string - * length UTF-16 length of the string; -1 until lazy evaluation - * start current UTF-8 index - * index current UTF-16 index; may be -1="unknown" after setState() - * limit UTF-8 length of the string - * reservedField supplementary code point - * - * Since UCharIterator delivers 16-bit code units, the iteration can be - * currently in the middle of the byte sequence for a supplementary code point. - * In this case, reservedField will contain that code point and start will - * point to after the corresponding byte sequence. The UTF-16 index will be - * one less than what it would otherwise be corresponding to the UTF-8 index. - * Otherwise, reservedField will be 0. - */ - -/* - * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings: - * Add implementations that do not call strlen() for iteration but check for NUL. - */ - -static int32_t U_CALLCONV -utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { - switch(origin) { - case UITER_ZERO: - case UITER_START: - return 0; - case UITER_CURRENT: - if(iter->index<0) { - /* the current UTF-16 index is unknown after setState(), count from the beginning */ - const uint8_t *s; - UChar32 c; - int32_t i, limit, index; - - s=(const uint8_t *)iter->context; - i=index=0; - limit=iter->start; /* count up to the UTF-8 index */ - while(istart=i; /* just in case setState() did not get us to a code point boundary */ - if(i==iter->limit) { - iter->length=index; /* in case it was <0 or wrong */ - } - if(iter->reservedField!=0) { - --index; /* we are in the middle of a supplementary code point */ - } - iter->index=index; - } - return iter->index; - case UITER_LIMIT: - case UITER_LENGTH: - if(iter->length<0) { - const uint8_t *s; - UChar32 c; - int32_t i, limit, length; - - s=(const uint8_t *)iter->context; - if(iter->index<0) { - /* - * the current UTF-16 index is unknown after setState(), - * we must first count from the beginning to here - */ - i=length=0; - limit=iter->start; - - /* count from the beginning to the current index */ - while(istart, set the UTF-16 index */ - iter->start=i; /* just in case setState() did not get us to a code point boundary */ - iter->index= iter->reservedField!=0 ? length-1 : length; - } else { - i=iter->start; - length=iter->index; - if(iter->reservedField!=0) { - ++length; - } - } - - /* count from the current index to the end */ - limit=iter->limit; - while(ilength=length; - } - return iter->length; - default: - /* not a valid origin */ - /* Should never get here! */ - return -1; - } -} - -static int32_t U_CALLCONV -utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { - const uint8_t *s; - UChar32 c; - int32_t pos; /* requested UTF-16 index */ - int32_t i; /* UTF-8 index */ - UBool havePos; - - /* calculate the requested UTF-16 index */ - switch(origin) { - case UITER_ZERO: - case UITER_START: - pos=delta; - havePos=TRUE; - /* iter->index<0 (unknown) is possible */ - break; - case UITER_CURRENT: - if(iter->index>=0) { - pos=iter->index+delta; - havePos=TRUE; - } else { - /* the current UTF-16 index is unknown after setState(), use only delta */ - pos=0; - havePos=FALSE; - } - break; - case UITER_LIMIT: - case UITER_LENGTH: - if(iter->length>=0) { - pos=iter->length+delta; - havePos=TRUE; - } else { - /* pin to the end, avoid counting the length */ - iter->index=-1; - iter->start=iter->limit; - iter->reservedField=0; - if(delta>=0) { - return UITER_UNKNOWN_INDEX; - } else { - /* the current UTF-16 index is unknown, use only delta */ - pos=0; - havePos=FALSE; - } - } - break; - default: - return -1; /* Error */ - } - - if(havePos) { - /* shortcuts: pinning to the edges of the string */ - if(pos<=0) { - iter->index=iter->start=iter->reservedField=0; - return 0; - } else if(iter->length>=0 && pos>=iter->length) { - iter->index=iter->length; - iter->start=iter->limit; - iter->reservedField=0; - return iter->index; - } - - /* minimize the number of U8_NEXT/PREV operations */ - if(iter->index<0 || posindex/2) { - /* go forward from the start instead of backward from the current index */ - iter->index=iter->start=iter->reservedField=0; - } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) { - /* - * if we have the UTF-16 index and length and the new position is - * closer to the end than the current index, - * then go backward from the end instead of forward from the current index - */ - iter->index=iter->length; - iter->start=iter->limit; - iter->reservedField=0; - } - - delta=pos-iter->index; - if(delta==0) { - return iter->index; /* nothing to do */ - } - } else { - /* move relative to unknown UTF-16 index */ - if(delta==0) { - return UITER_UNKNOWN_INDEX; /* nothing to do */ - } else if(-delta>=iter->start) { - /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */ - iter->index=iter->start=iter->reservedField=0; - return 0; - } else if(delta>=(iter->limit-iter->start)) { - /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */ - iter->index=iter->length; /* may or may not be <0 (unknown) */ - iter->start=iter->limit; - iter->reservedField=0; - return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX; - } - } - - /* delta!=0 */ - - /* move towards the requested position, pin to the edges of the string */ - s=(const uint8_t *)iter->context; - pos=iter->index; /* could be <0 (unknown) */ - i=iter->start; - if(delta>0) { - /* go forward */ - int32_t limit=iter->limit; - if(iter->reservedField!=0) { - iter->reservedField=0; - ++pos; - --delta; - } - while(delta>0 && i=2) { - pos+=2; - delta-=2; - } else /* delta==1 */ { - /* stop in the middle of a supplementary code point */ - iter->reservedField=c; - ++pos; - break; /* delta=0; */ - } - } - if(i==limit) { - if(iter->length<0 && iter->index>=0) { - iter->length= iter->reservedField==0 ? pos : pos+1; - } else if(iter->index<0 && iter->length>=0) { - iter->index= iter->reservedField==0 ? iter->length : iter->length-1; - } - } - } else /* delta<0 */ { - /* go backward */ - if(iter->reservedField!=0) { - iter->reservedField=0; - i-=4; /* we stayed behind the supplementary code point; go before it now */ - --pos; - ++delta; - } - while(delta<0 && i>0) { - U8_PREV_OR_FFFD(s, 0, i, c); - if(c<=0xffff) { - --pos; - ++delta; - } else if(delta<=-2) { - pos-=2; - delta+=2; - } else /* delta==-1 */ { - /* stop in the middle of a supplementary code point */ - i+=4; /* back to behind this supplementary code point for consistent state */ - iter->reservedField=c; - --pos; - break; /* delta=0; */ - } - } - } - - iter->start=i; - if(iter->index>=0) { - return iter->index=pos; - } else { - /* we started with index<0 (unknown) so pos is bogus */ - if(i<=1) { - return iter->index=i; /* reached the beginning */ - } else { - /* we still don't know the UTF-16 index */ - return UITER_UNKNOWN_INDEX; - } - } -} - -static UBool U_CALLCONV -utf8IteratorHasNext(UCharIterator *iter) { - return iter->startlimit || iter->reservedField!=0; -} - -static UBool U_CALLCONV -utf8IteratorHasPrevious(UCharIterator *iter) { - return iter->start>0; -} - -static UChar32 U_CALLCONV -utf8IteratorCurrent(UCharIterator *iter) { - if(iter->reservedField!=0) { - return U16_TRAIL(iter->reservedField); - } else if(iter->startlimit) { - const uint8_t *s=(const uint8_t *)iter->context; - UChar32 c; - int32_t i=iter->start; - - U8_NEXT_OR_FFFD(s, i, iter->limit, c); - if(c<=0xffff) { - return c; - } else { - return U16_LEAD(c); - } - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -utf8IteratorNext(UCharIterator *iter) { - int32_t index; - - if(iter->reservedField!=0) { - UChar trail=U16_TRAIL(iter->reservedField); - iter->reservedField=0; - if((index=iter->index)>=0) { - iter->index=index+1; - } - return trail; - } else if(iter->startlimit) { - const uint8_t *s=(const uint8_t *)iter->context; - UChar32 c; - - U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c); - if((index=iter->index)>=0) { - iter->index=++index; - if(iter->length<0 && iter->start==iter->limit) { - iter->length= c<=0xffff ? index : index+1; - } - } else if(iter->start==iter->limit && iter->length>=0) { - iter->index= c<=0xffff ? iter->length : iter->length-1; - } - if(c<=0xffff) { - return c; - } else { - iter->reservedField=c; - return U16_LEAD(c); - } - } else { - return U_SENTINEL; - } -} - -static UChar32 U_CALLCONV -utf8IteratorPrevious(UCharIterator *iter) { - int32_t index; - - if(iter->reservedField!=0) { - UChar lead=U16_LEAD(iter->reservedField); - iter->reservedField=0; - iter->start-=4; /* we stayed behind the supplementary code point; go before it now */ - if((index=iter->index)>0) { - iter->index=index-1; - } - return lead; - } else if(iter->start>0) { - const uint8_t *s=(const uint8_t *)iter->context; - UChar32 c; - - U8_PREV_OR_FFFD(s, 0, iter->start, c); - if((index=iter->index)>0) { - iter->index=index-1; - } else if(iter->start<=1) { - iter->index= c<=0xffff ? iter->start : iter->start+1; - } - if(c<=0xffff) { - return c; - } else { - iter->start+=4; /* back to behind this supplementary code point for consistent state */ - iter->reservedField=c; - return U16_TRAIL(c); - } - } else { - return U_SENTINEL; - } -} - -static uint32_t U_CALLCONV -utf8IteratorGetState(const UCharIterator *iter) { - uint32_t state=(uint32_t)(iter->start<<1); - if(iter->reservedField!=0) { - state|=1; - } - return state; -} - -static void U_CALLCONV -utf8IteratorSetState(UCharIterator *iter, - uint32_t state, - UErrorCode *pErrorCode) -{ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - /* do nothing */ - } else if(iter==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else if(state==utf8IteratorGetState(iter)) { - /* setting to the current state: no-op */ - } else { - int32_t index=(int32_t)(state>>1); /* UTF-8 index */ - state&=1; /* 1 if in surrogate pair, must be index>=4 */ - - if((state==0 ? index<0 : index<4) || iter->limitstart=index; /* restore UTF-8 byte index */ - if(index<=1) { - iter->index=index; - } else { - iter->index=-1; /* unknown UTF-16 index */ - } - if(state==0) { - iter->reservedField=0; - } else { - /* verified index>=4 above */ - UChar32 c; - U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c); - if(c<=0xffff) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } else { - iter->reservedField=c; - } - } - } - } -} - -static const UCharIterator utf8Iterator={ - 0, 0, 0, 0, 0, 0, - utf8IteratorGetIndex, - utf8IteratorMove, - utf8IteratorHasNext, - utf8IteratorHasPrevious, - utf8IteratorCurrent, - utf8IteratorNext, - utf8IteratorPrevious, - NULL, - utf8IteratorGetState, - utf8IteratorSetState -}; - -U_CAPI void U_EXPORT2 -uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) { - if(iter!=0) { - if(s!=0 && length>=-1) { - *iter=utf8Iterator; - iter->context=s; - if(length>=0) { - iter->limit=length; - } else { - iter->limit=(int32_t)uprv_strlen(s); - } - iter->length= iter->limit<=1 ? iter->limit : -1; - } else { - *iter=noopIterator; - } - } -} - -/* Helper functions --------------------------------------------------------- */ - -U_CAPI UChar32 U_EXPORT2 -uiter_current32(UCharIterator *iter) { - UChar32 c, c2; - - c=iter->current(iter); - if(U16_IS_SURROGATE(c)) { - if(U16_IS_SURROGATE_LEAD(c)) { - /* - * go to the next code unit - * we know that we are not at the limit because c!=U_SENTINEL - */ - iter->move(iter, 1, UITER_CURRENT); - if(U16_IS_TRAIL(c2=iter->current(iter))) { - c=U16_GET_SUPPLEMENTARY(c, c2); - } - - /* undo index movement */ - iter->move(iter, -1, UITER_CURRENT); - } else { - if(U16_IS_LEAD(c2=iter->previous(iter))) { - c=U16_GET_SUPPLEMENTARY(c2, c); - } - if(c2>=0) { - /* undo index movement */ - iter->move(iter, 1, UITER_CURRENT); - } - } - } - return c; -} - -U_CAPI UChar32 U_EXPORT2 -uiter_next32(UCharIterator *iter) { - UChar32 c, c2; - - c=iter->next(iter); - if(U16_IS_LEAD(c)) { - if(U16_IS_TRAIL(c2=iter->next(iter))) { - c=U16_GET_SUPPLEMENTARY(c, c2); - } else if(c2>=0) { - /* unmatched first surrogate, undo index movement */ - iter->move(iter, -1, UITER_CURRENT); - } - } - return c; -} - -U_CAPI UChar32 U_EXPORT2 -uiter_previous32(UCharIterator *iter) { - UChar32 c, c2; - - c=iter->previous(iter); - if(U16_IS_TRAIL(c)) { - if(U16_IS_LEAD(c2=iter->previous(iter))) { - c=U16_GET_SUPPLEMENTARY(c2, c); - } else if(c2>=0) { - /* unmatched second surrogate, undo index movement */ - iter->move(iter, 1, UITER_CURRENT); - } - } - return c; -} - -U_CAPI uint32_t U_EXPORT2 -uiter_getState(const UCharIterator *iter) { - if(iter==NULL || iter->getState==NULL) { - return UITER_NO_STATE; - } else { - return iter->getState(iter); - } -} - -U_CAPI void U_EXPORT2 -uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - /* do nothing */ - } else if(iter==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else if(iter->setState==NULL) { - *pErrorCode=U_UNSUPPORTED_ERROR; - } else { - iter->setState(iter, state, pErrorCode); - } -} - -U_CDECL_END diff --git a/deps/node/deps/icu-small/source/common/ulayout_props_data.h b/deps/node/deps/icu-small/source/common/ulayout_props_data.h deleted file mode 100644 index f42d15fc..00000000 --- a/deps/node/deps/icu-small/source/common/ulayout_props_data.h +++ /dev/null @@ -1,722 +0,0 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// -// file name: ulayout_props_data.h -// -// machine-generated by: icu/tools/unicode/c/genprops/layoutpropsbuilder.cpp - - -#ifdef INCLUDED_FROM_UPROPS_CPP - -static const int32_t maxInPCValue = 14; - -static const uint16_t inpc_trieIndex[765]={ -0,0x40,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x80,0xc0,0xff,0x13f,0x17e,0x1be,0x17e,0x1fe,0x23e,0x27e,0x2bc,0x2fc, -0x33c,0x37b,0x23e,0x3bb,0x3fb,0x439,0x477,0x4ad,0x4e1,0x521,0x531,0x571,0x599,0x5d9,0x619,0x656, -0x2b7,0x2c6,0x2d2,0x2c6,0x2ed,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0,0x10,0x20, -0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20, -0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0x80,0x90,0xa0, -0xb0,0xc0,0xd0,0xe0,0xf0,0xff,0x10f,0x11f,0x12f,0x13f,0x14f,0x15f,0x16f,0x17e,0x18e,0x19e, -0x1ae,0x1be,0x1ce,0x1de,0x1ee,0x17e,0x18e,0x19e,0x1ae,0x1fe,0x20e,0x21e,0x22e,0x23e,0x24e,0x25e, -0x26e,0x27e,0x28e,0x29e,0x2ae,0x2bc,0x2cc,0x2dc,0x2ec,0x2fc,0x30c,0x31c,0x32c,0x33c,0x34c,0x35c, -0x36c,0x37b,0x38b,0x39b,0x3ab,0x23e,0x24e,0x25e,0x26e,0x3bb,0x3cb,0x3db,0x3eb,0x3fb,0x40b,0x41b, -0x42b,0x439,0x449,0x459,0x469,0x477,0x487,0x497,0x4a7,0x4ad,0x4bd,0x4cd,0x4dd,0x4e1,0x4f1,0x501, -0x511,0x521,0x531,0x541,0x551,0x531,0x541,0x551,0x561,0x571,0x581,0x591,0x5a1,0x599,0x5a9,0x5b9, -0x5c9,0x5d9,0x5e9,0x5f9,0x609,0x619,0x629,0x639,0x649,0x656,0x666,0x676,0x686,0,0,0x68b, -0x69a,0,0x6a9,0x6b8,0x6c7,0x6d5,0x6e5,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0x6f3,0,0x6f3, -0,0x701,0,0x701,0,0,0,0x70b,0x71b,0x729,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x739,0x749,0,0, -0,0,0,0,0,0x759,0x768,0,0,0,0x772,0,0,0,0x77e,0x78d, -0x79b,0,0,0,0,0,0,0,0,0x7ab,0,0,0x7b7,0x7c7,0,0x7cc, -0x52c,0x81,0,0x7dc,0,0,0,0x7ea,0x3fb,0,0,0x7fa,0x807,0,0,0, -0,0,0,0,0,0,0x817,0x827,0x835,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x2b3,0x83f,0,0x84c,0,0,0,0, -0,0x101,0,0,0x858,0x864,0,0x874,0x882,0,0,0x892,0,0x8a0,0x3fb,0, -0,0x80,0,0,0x8b0,0x8c0,0,0x2b9,0,0,0x8c7,0x8d6,0x8e3,0,0,0x8f1, -0,0,0,0x901,0x2bd,0,0x911,0x151,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0x921,0,0x930,0,0,0x940,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x950,0,0,0x958,0x966,0,0,0, -0x81,0,0,0x976,0,0,0,0,0x52d,0,0x981,0x991,0x3cb,0,0,0x659, -0x81,0,0,0x99e,0x9ae,0,0,0,0x9bb,0x9cb,0,0,0,0,0,0, -0,0,0,0x71,0x9db,0,0xff,0,0,0x9e6,0x9f6,0x14f,0xa04,0x52b,0,0, -0,0,0,0,0,0,0x99c,0xa14,0x16f,0,0,0,0,0,0xa24,0xa33, -0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb,0xa43,0xe3, -0x214,0,0,0,0xa53,0x2be,0,0,0,0,0,0xa63,0xa73,0,0,0, -0,0,0xa7b,0xa8b,0,0,0,0,0,0,0,0,0,0,0,0, -0,0xa97,0xaa6,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xab5, -0,0,0xac2,0,0xad1,0,0,0xadd,0xae7,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb, -0xaf7,0,0,0,0,0,0xb07,0xb0f,0xb1e,0,0,0,0,0,0,0, -0xb2d,0xb3c,0,0,0,0xb44,0xb54,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0xb61,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0x45,0x4d,0x4d,0x4d,0x5d,0x7d,0x9d,0xbd,0xdd, -2,2,0xec,0x10a,0x129,0x149,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,0x169,0x188,2,2,2,2,2,2,2,2, -2,2,0x1a8,2,2,0x1c8,0x1e6,0x203,0x221,0x23f,0x25f,0x27d,0x297 -}; - -static const uint8_t inpc_trieData[2930]={ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -8,8,8,7,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,8,7,1,0,7,4, -7,1,1,1,1,8,8,8,8,7,7,7,7,1,4,7, -0,8,1,8,8,8,1,1,0,0,0,0,0,0,0,0, -0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -8,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,1,0,7,4,7, -1,1,1,1,0,0,4,4,0,0,5,5,1,0,0,0, -0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0, -0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,8, -8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,7,4,7,1, -1,0,0,0,0,8,8,0,0,8,8,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8, -0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,1, -1,1,1,8,0,8,8,0xd,0,7,7,1,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,8,8,8,8,8,8,0,8, -7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,0,7,8,7,1, -1,1,1,0,0,4,0xb,0,0,5,0xc,1,0,0,0,0, -0,0,0,0,8,0xd,0,0,0,0,0,0,0,0,0,0, -1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,7,7,8,7,7,0, -0,0,4,4,4,0,5,5,5,8,0,0,0,0,0,0, -0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,8,7,7,7, -8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,8,8,7,7,7,7, -0,8,8,9,0,8,8,8,8,0,0,0,0,0,0,0, -8,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xd,7,7,7,7, -0,8,0xd,0xd,0,0xd,0xd,8,8,0,0,0,0,0,0,0, -7,7,0,0,0,0,0,0,0,0,0,0,0,1,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,8,8,7,7,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,8,8,0,7,7,7,1,1,0,4, -4,4,0,5,5,5,8,0,0,0,0,0,0,0,0,0, -7,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,7,7,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,8,0,0,0,0,7,7,7,8, -8,1,0,1,0,7,4,0xb,4,5,0xc,5,7,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7, -7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,7,8,7,7,8,8,8,8,1,1,1,0,0,0,0, -0,0xe,0xe,0xe,0xe,0xe,7,0,8,8,8,8,8,8,8,8, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,7,8,7,7,8,8,8,8,1,1,0,8,1,0,0, -0,0xe,0xe,0xe,0xe,0xe,0,0,0,8,8,8,8,8,8,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -1,0,8,0,0,0,0,7,4,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,8,9,1,1,9, -9,9,9,8,8,8,8,8,7,8,9,8,8,1,0,8, -8,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1, -1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,7,7,8,8,1,4,8,8,8,8, -8,1,7,0,8,7,0,1,1,0,0,0,0,0,0,7, -7,1,1,0,0,0,0,1,1,0,7,7,7,0,0,7, -7,7,7,7,7,7,0,0,8,8,8,8,0,0,0,0, -0,0,0,0,0,0,0,1,7,4,8,8,7,7,7,7, -7,7,1,0,7,0,0,0,0,0,0,0,0,0,0,7, -7,7,8,0,0,8,1,1,0,0,0,0,0,0,0,0, -0,0,0,8,1,0,0,0,0,0,0,0,0,0,0,0, -0,7,8,8,8,8,1,1,1,0xb,0xc,5,4,4,4,5, -5,8,7,7,8,8,8,8,8,8,8,0,8,0,0,0, -0,0,0,0,0,0,8,0,0,8,8,1,7,7,0xd,0xd, -8,8,7,7,7,0,0,0,0,7,7,1,7,7,7,7, -7,7,1,8,1,0,0,0,0,7,7,7,7,7,0xe,0xe, -0xe,7,7,0xe,7,7,7,7,7,0,0,0,0,0,0,0, -7,7,0,0,0,0,0,0,0,8,1,4,7,8,0,0, -0,0,0,4,1,7,8,8,8,1,1,1,1,0,7,8, -7,7,8,8,8,8,1,1,8,1,7,4,4,4,8,8, -8,8,8,8,8,8,8,8,0,0,1,8,8,8,8,7, -0,0,0,0,0,0,0,0,0,0,0,8,7,8,8,1, -1,1,3,9,0xa,4,4,5,5,8,0xd,7,0,0,0,0, -0,0,0,0,0,0,0,8,1,8,8,8,0,7,1,1, -8,1,4,7,8,8,7,0,1,1,0,0,0,0,0,0, -8,7,8,8,7,7,7,8,7,8,0,0,0,0,7,7, -7,4,4,0xb,7,7,1,8,8,8,8,4,4,8,1,0, -0,0,0,0,0,0,0,8,8,8,0,6,1,1,1,1, -1,8,8,1,1,1,1,8,7,6,6,6,6,6,6,6, -0,0,0,0,1,0,0,0,0,8,0,0,7,0,0,0, -0,0,0,0,0,8,0,0,0,0,8,0,0,0,0,7, -7,1,8,7,0,0,0,0,0,0,0,0,7,7,7,7, -7,7,7,7,7,7,7,7,1,8,0,0,0,0,0,0, -0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8, -8,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0, -0,8,0,0,0,0,0,0,0,0,0,0,0,1,1,1, -0,0,0,0,0,0,0,1,1,1,8,1,1,1,1,8, -0,0,0,8,7,7,8,8,1,1,4,4,8,7,7,2, -3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -8,8,8,8,1,8,4,8,1,7,4,1,1,0,0,0, -0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,8, -7,0,0,0,0,0,0,0,0,0,0,0,7,8,7,0, -0,8,7,8,8,1,0xe,0xe,8,8,0xe,7,0xe,0xe,7,8, -8,0,0,0,0,0,0,0,0,0,0,0,4,1,8,4, -7,0,0,0,7,7,8,7,7,1,7,7,0,7,1,0, -0,6,1,1,0,8,6,0,0,0,0,0,1,1,1,8, -0,0,0,0,0,0,0,0,8,1,1,0,0,0,0,0, -7,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0, -8,8,8,8,1,1,1,1,8,8,8,8,8,0,0,0, -0,0,0,0,0,0,7,4,7,1,1,8,8,7,7,1, -1,0,0,0,0,0,0,0,8,8,8,1,1,4,8,9, -9,8,1,1,0,8,0,0,0,0,0,0,0,0,0,0, -0,7,4,7,1,1,1,1,1,1,8,8,8,0xd,7,0, -0,0,0,0,0,0,0,1,0,8,1,0,0,0,0,0, -0,0,0,0,0,0,0,7,7,7,1,8,8,0xd,0xd,8, -7,8,8,0,0,0,0,0,0,8,0,7,4,7,1,1, -8,8,8,8,1,1,0,0,0,0,0,0,0,0,0,0, -0,1,1,0,7,7,8,7,7,7,7,0,0,4,4,0, -0,5,5,7,0,0,7,7,0,0,8,8,8,8,8,8, -8,0,0,0,7,7,1,8,8,7,1,0,0,0,0,0, -0,0,0,0,7,4,7,1,1,1,1,1,1,4,8,0xb, -5,7,5,8,7,1,1,0,0,0,0,0,0,0,0,0, -0,0,0,4,7,1,1,1,1,0,0,4,0xb,5,0xc,8, -8,7,1,7,7,7,1,1,1,1,1,1,8,8,7,7, -8,7,1,0,0,0,0,0,0,0,0,0,0,0,8,7, -8,4,7,1,1,8,8,8,8,7,1,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,8,7,7,8,8,1, -1,4,8,1,8,8,8,0,0,0,0,0,0,0,0,0, -0,0,0,7,4,7,1,1,1,8,8,8,8,8,7,1, -1,0,0,0,0,0,8,1,1,8,8,8,8,8,8,1, -0,0,0,0,0,1,1,8,8,8,8,7,0,1,1,1, -1,0,8,1,1,8,8,8,7,7,1,1,1,0,0,0, -0,0,0,0,0,0,0,1,1,1,1,1,1,8,7,8, -0,0,0,0,0,0,0,8,8,1,1,1,1,1,0,8, -8,8,8,8,8,7,1,0,0,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0,7,1,1,1,1,1,1,4, -1,8,7,8,8,0,0,0,0,0,0,0,0,0,8,8, -8,8,8,1,0,0,0,8,0,8,8,0,8,8,1,8, -1,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7, -7,7,7,0,8,8,0,7,7,8,7,0,0,0,0,0, -0,0,0,0,8,1,4,7,0,0,0,0,0,0,0,0, -0,0 -}; - -static const UCPTrie inpc_trie={ - inpc_trieIndex, - { inpc_trieData }, - 765, 2930, - 0x12000, 0x12, - 1, 2, - 0, 0, - 0x2, 0x0, - 0x0, -}; - -static const int32_t maxInSCValue = 35; - -static const uint16_t insc_trieIndex[834]={ -0,0x40,0x60,0x94,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0xd4,0x112,0x152,0x190,0x1cf,0x20d,0x24c,0x28a,0x2ca,0x308,0x346,0x384, -0x3c4,0x402,0x441,0x47f,0x4bf,0x4fd,0x53d,0x57d,0x5bc,0x5fc,0x63b,0x67b,0x69b,0x6db,0x71b,0x758, -0x2f8,0x30b,0x317,0x30b,0x332,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x60,0x70,0x80, -0x90,0x94,0xa4,0xb4,0xc4,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60, -0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60, -0x70,0x40,0x50,0x60,0x70,0xd4,0xe4,0xf4,0x104,0x112,0x122,0x132,0x142,0x152,0x162,0x172, -0x182,0x190,0x1a0,0x1b0,0x1c0,0x1cf,0x1df,0x1ef,0x1ff,0x20d,0x21d,0x22d,0x23d,0x24c,0x25c,0x26c, -0x27c,0x28a,0x29a,0x2aa,0x2ba,0x2ca,0x2da,0x2ea,0x2fa,0x308,0x318,0x328,0x338,0x346,0x356,0x366, -0x376,0x384,0x394,0x3a4,0x3b4,0x3c4,0x3d4,0x3e4,0x3f4,0x402,0x412,0x422,0x432,0x441,0x451,0x461, -0x471,0x47f,0x48f,0x49f,0x4af,0x4bf,0x4cf,0x4df,0x4ef,0x4fd,0x50d,0x51d,0x52d,0x53d,0x54d,0x55d, -0x56d,0x57d,0x58d,0x59d,0x5ad,0x5bc,0x5cc,0x5dc,0x5ec,0x5fc,0x60c,0x61c,0x62c,0x63b,0x64b,0x65b, -0x66b,0x67b,0x68b,0x69b,0x6ab,0x69b,0x6ab,0x6bb,0x6cb,0x6db,0x6eb,0x6fb,0x70b,0x71b,0x72b,0x73b, -0x74b,0x758,0x768,0x778,0x788,0xe9,0xe9,0x798,0x7a3,0x7b3,0x7c3,0x7d2,0x7e1,0x7ef,0x7ff,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x80f,0x81d,0xe6,0x81d,0xe6,0x82d,0x80f,0x83d,0xe9,0xe9,0x84d, -0x859,0x863,0x872,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x882,0x16c,0x892,0x8a2,0x22d,0xe9,0x8b2,0x8c2,0xe9,0xe9,0x374,0x8d2, -0x8e1,0x30,0x40,0x40,0xe9,0x8f1,0xe9,0xe9,0x901,0x90e,0x91e,0x92a,0x30,0x30,0x40,0x40, -0x40,0x40,0x40,0x40,0x93a,0xe6,0xe9,0x94a,0x956,0x30,0x40,0x40,0x966,0xe9,0x975,0x985, -0xe9,0xe9,0x995,0x9a5,0xe9,0xe9,0x9b5,0x9c2,0x9d2,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x9e2,0x9f0,0x9fe,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0xa08,0xa14,0xa24,0x40,0x40,0x40,0x40,0x40,0x75a,0xa32,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x74,0x40,0x40,0x40,0xa42,0xe9,0xa4f, -0x40,0xe9,0xa5f,0xa6d,0xa7c,0xd6,0xe7,0xe9,0xa8c,0xa98,0x30,0xaa8,0xab6,0xac6,0xe9,0xad4, -0xe9,0xae4,0xaf3,0x40,0x40,0xb03,0xe9,0xe9,0xb12,0x297,0x30,0xb22,0xb32,0xe3,0xe9,0x889, -0xb42,0xb52,0x30,0xe9,0xb61,0xe9,0xe9,0xe9,0xb71,0xb81,0x40,0xb91,0xba1,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xbb1,0xbc1,0xbce,0x30,0xbde,0xbee,0xe9, -0xbf8,0x31,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xc08,0xe6,0xe9, -0x88a,0xc18,0xc26,0xc30,0xc40,0xc50,0xe9,0xe9,0xc60,0x40,0x40,0x40,0x40,0xc70,0xe9,0x88b, -0xc80,0xc90,0xca0,0xe9,0xcad,0xd5,0xe8,0xe9,0xcbd,0xccd,0x30,0x6ba,0x35,0xe1,0x3eb,0x886, -0xcdd,0x40,0x40,0x40,0x40,0xced,0x16d,0xcfc,0xdf,0xe9,0xd0c,0xd1c,0x30,0xd2c,0x162,0x172, -0xd3c,0x308,0xd4c,0xd5c,0x9ed,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xe9, -0xd6c,0xd7a,0xd8a,0x40,0x40,0xd99,0xe9,0xe9,0x91f,0xda9,0x30,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xff,0xdb9,0xdc9,0xdd1,0x40,0x40,0xdb,0xe9,0xe9, -0xde1,0xdf1,0x30,0x40,0x40,0xdf,0xe9,0xe01,0xe0e,0x30,0x40,0x40,0x40,0xe9,0xe1e,0xe2e, -0xe3e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdf,0xe9,0x886, -0xe4e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xe5e,0xe9,0xe9, -0xe6b,0xe7b,0xe8b,0xe9,0xe9,0xe97,0xea1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xeb1,0xe9,0xff, -0xec1,0xed1,0x6bb,0xee1,0x555,0xe9,0xeef,0x72b,0xeff,0x40,0x40,0x40,0x40,0xf0f,0xe9,0xe9, -0xf1e,0xf2e,0x30,0xf3e,0xe9,0xf4a,0xf57,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0xe9,0xf67,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, -0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x45,0x55,0x55,0x55,0x65,0x85,0xa5,0xc5, -0xe5,4,4,0xf5,0x114,0x134,0x154,4,0x174,4,0x17d,4,4,4,4,4, -4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, -4,4,4,4,4,4,4,4,4,4,4,0x19d,0x1bd,4,4,4, -4,4,4,4,4,4,4,0x1dd,4,4,0x1fd,0x21d,0x23d,0x25d,0x27d,0x29d, -0x2bd,0x2d8 -}; - -static const uint8_t insc_trieData[3960]={ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0xc,0,0, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0xc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0x1c,0x1c,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0xc,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,2,2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23, -0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x22,0x22, -0x17,1,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f, -0x22,0x22,0,4,4,0,0,0x22,0x22,0x22,5,5,5,5,5,5, -5,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0,0,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5, -5,5,0xc,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0, -0,0x23,0x23,0,0,0x23,0x23,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, -5,5,5,0,5,0,0,0,5,5,5,5,0,0,0x17,1, -0x22,0x22,0x22,0x22,0x22,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,6,0, -0,0,0,0,0,0,0,0x22,0,0,0,0,5,5,0,5, -0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -5,5,0,0,0,0,0,0,0,0,0,0,2,0,0x1c,0, -2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0,0,0x23,0x23, -0,0,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5, -0,5,5,0,5,5,0,5,5,0,0,0x17,0,0x22,0x22,0x22, -0,0,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,0,0,0,4,0, -0,0,0,0,0,0,5,5,5,5,0,5,0,0,0,0, -0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,2,0x12,0xc, -0xc,0,0xb,0,0,0,0,0,0,0,0,0,0,2,2,0x20, -0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0,0x23, -0x23,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5, -0,5,5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0x22, -0,0x22,0x22,0x22,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x23,0x23,0x22,0x22,0,0, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0, -0,0,0,5,4,4,4,0x17,0x17,0x17,0,2,2,0x20,0,0x23, -0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,0x23,0,0,0x23,0x23,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,0,5,5,5,5,5,5,5,0,5,5,0,5, -5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0,0,0x22, -0x22,0,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0,0x22,0x22, -0,0,0,0,5,5,0,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,5,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0x15,0,0x23,0x23,0x23,0x23,0x23, -0x23,0,0,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,0,0,0,5, -5,0,5,0,5,5,0,0,0,5,5,0,0,0,5,5, -5,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, -0,0,0,0,0x22,0x22,0x22,0,0,0,0x22,0x22,0x22,0,0x22,0x22, -0x22,0x1f,0,0,0,0,0,0,0,0,0,0x22,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,2,2,2,0x20,2,0x23,0x23,0x23,0x23,0x23,0x23,0x23, -0x23,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, -0,1,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f, -0,0,0,0,0,0,0,0x22,0x22,0,5,5,5,0,0,0, -0,0,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23, -0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,5, -5,5,5,5,5,0,5,5,5,5,5,0,0,0x17,1,0x22, -0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0,0,0, -0,0,0,0,0x22,0x22,0,0,0,0,0,0,0,5,0,0x23, -0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0, -0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,2, -2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23, -0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,0x1a,0x1a,1,0x22,0x22,0x22, -0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0xd,0,0,0,0, -0,6,6,6,0x22,0,0,0,0,0,0,0,0x23,0x23,0x23,0x22, -0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0, -0,0,0,0,0,0,0,6,6,6,6,6,6,0,0,2, -0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23, -0x23,0x23,0x23,0x23,0,0,0,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, -5,5,5,5,5,5,5,5,5,0,5,0,0,5,5,5, -5,5,5,5,0,0,0,0x1f,0,0,0,0,0x22,0x22,0x22,0x22, -0x22,0x22,0,0x22,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0,0, -0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0x22, -0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,0,0x22,0x22,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0,0x22,0x22,0x22,0x22, -0x22,0x22,0,0x22,0x1e,0x1e,0x1e,0x1e,0xa,2,0x1a,0,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,0,5, -0,0,5,5,0,5,0,0,5,0,0,0,0,0,0,5, -5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,0, -5,0,5,0,0,5,5,0,5,5,0,0x22,0x22,0x22,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,0,0x22,0xb,0xb,0,0,0x22,0x22,0x22,0x22,0x22, -0,0,0,0x1e,0x1e,0x1e,0x1e,0,2,0,0,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0,0,5,5,5,5,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0, -0x1c,0,0x1c,0,0x17,0,0,0,0,0,0,5,5,5,5,5, -5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,5,5,5,5,5,5,0,0,0,0,0x22,0x22,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x22,2,2,0x1a, -1,0,0,8,8,8,8,8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xf,0xf,0xf,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, -0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0,0,0,0,0,0,0x1c,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,5,0x23,0x23,0x23,0x23,0x23,0x23,0x23, -0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e,0x20,0x13,0x1a,0xb,0xb, -0xb,0xb,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0xc,0, -0,0xc,0,5,5,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,5,5,5, -5,0xb,0xb,5,0x22,0x1e,0x1e,5,5,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e, -5,5,0x22,0x22,0x22,0x22,5,5,5,5,5,5,5,5,5,5, -5,0xb,0x22,0x22,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,5,0x1e,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x1e,0x1e,0x22,0x22,0,0,0x23, -0x23,0x23,5,5,5,5,5,5,5,5,5,5,0,5,5,0x22, -0x22,0x1a,0,0,0,0,0,0,0,0,0,0,0,5,5,0x22, -0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0x22, -0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5, -0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x1b,0x1b,0x1c,0x10, -0xa,0x1c,0x1c,0x1a,0x13,0x1c,0,0,0,0,0,0,0,0,1,0x1c, -0,0,0xc,5,5,5,5,5,5,5,5,5,5,5,5,5, -5,5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf,0xf,0xf,0,0, -0,0,7,7,2,7,7,7,7,7,7,7,0x22,0x1c,0,0, -0,0,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21, -0,0,0x1d,0x1d,0x1d,0x1d,0x1d,0,0,0,0,0,0,0,0,0, -0,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, -0x22,0x22,7,7,7,7,7,7,7,0x1e,0x1e,0,0,0,0,0, -0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0,0, -0,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23,0x23, -0x23,5,5,0xb,0xb,0xf,7,7,9,0xf,0xf,0xf,0xf,0,0x13,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e, -0x1e,0x1e,0x1e,0x1e,0x1a,0x1c,0x1c,0,0,0x1c,2,2,2,0x10,0x20,0x23, -0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,0x17,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,5,5,5,5,5, -5,5,0,0,0,0,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23, -5,5,5,5,5,5,0xf,0xf,0xf,0x22,0x22,0x22,0x22,0x22,0x22,0x1a, -0x13,0xf,0xf,5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,1, -5,5,5,7,7,5,5,5,5,0x23,0x23,0x17,0x22,0x22,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,7,7,0x1a,0x1a,0,0,0,0,0,0,0, -0,0,0,0,0,5,5,5,5,0xf,0xf,0x22,0x22,0x22,0x22,0x22, -0x22,0x22,7,7,7,7,2,2,0x1c,0x17,0,0,0,0,0,0, -0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,5, -5,5,4,4,4,0,4,4,4,4,4,4,4,4,4,4, -4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x20,0x20,4,0x11,0x11,4,4,4,0,0,0,0,0,0,0,0, -0,0,0,0x1c,0,0,0,0,0,0,0,0,0,0,0,0, -0x16,0x14,0,0,0xc,0xc,0xc,0xc,0xc,0,0,0,0,0,0,0, -0,0,0,0,0x1c,0x1c,0x1c,0,0,0,0,0,0,0,0,0, -0,0,0x23,0x23,0,0x23,0x23,0x23,0x1a,5,5,5,5,2,5,5, -5,5,0x22,0x22,0x22,0x22,0x22,0,0,0,0,0,0,0,0,5, -5,5,5,5,5,5,5,5,5,5,5,5,5,0x21,0x21,5, -5,5,5,0x21,0xf,0xf,5,5,5,5,5,5,5,0xf,5,2, -0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, -0xb,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,0,0, -0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4, -4,4,4,4,4,4,4,4,2,2,0,0,0,0,0,0, -0,0,0,0,0x23,0x22,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -5,5,5,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x1e, -0x1e,0x1e,0,0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22, -0x22,0x22,0x22,7,7,7,0x1a,0,0,0,0,0,0,0,0,0, -0,0,0,2,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,5,5,5,0x23, -0x23,0x23,5,5,5,0x17,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf, -0xb,0xb,5,5,5,5,5,0x22,0,5,5,5,5,5,5,5, -5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,5,5,5,5, -5,0,0x22,0x22,0x22,0xb,0xb,0xb,0xb,0,0,0,0,0,0,0, -0,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7, -0,0,5,5,5,0xc,0xc,0xc,0,0,0,5,0x1e,0x1e,0x1e,5, -5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, -0x1e,0x1d,0x1e,0x1d,0,0,0,0,0,0,0,0,0,0,0,0, -0,0x23,0x23,5,5,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22, -0x22,0,0,0,0,0,0x20,0x13,0,0,0,0,0,0,0,0, -0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23, -0x23,5,0x23,5,5,5,5,5,5,5,5,5,7,7,7,7, -7,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x1e,0x1a,0,0,5,0x22, -0x22,0x22,0,0x22,0x22,0,0,0,0,0,0x22,0x22,2,0x20,5,5, -5,5,0,5,5,5,0,5,5,5,5,5,5,5,0,0, -0x17,0x17,0x17,0,0,0,0,0x13,2,2,0x20,0x11,0x11,0x23,0x23,0x23, -0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0, -0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x19, -2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0, -2,2,0x20,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5, -0x22,0x22,0x22,0x13,0x1a,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, -0,0,0,0,5,0x22,0x22,0,0,0,0,0,0,0,0,0, -0x21,0x21,0x21,0x21,0x21,5,5,5,5,5,5,5,5,5,5,5, -0x17,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,1,0xe, -0xe,0,0,0,0,0,0x1c,0x17,0x22,0x22,0,0,0,0x22,0x22,0x22, -0x22,2,0x1f,0x17,0x12,0,0,0,0,0,0,4,0,0x23,0x23,0x23, -0x23,5,5,5,0,5,0,5,5,5,5,0,5,5,5,5, -5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5, -5,5,5,5,5,5,5,5,5,5,5,2,0x22,0x22,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,0x17,0x1a,0,0,0,0,0,2,2,2,0x20, -0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,5,0,5,5, -0,5,5,5,5,5,0,0x17,0x17,1,0x22,0x22,0,0,0,0, -0,0,0,0x22,0,0,0,0,0,0,2,2,0x23,0x23,0x22,0x22, -0,0,4,4,4,4,4,4,4,0,0,0,5,5,5,5, -5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,2,0x20, -0x17,1,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18, -0x18,0x18,0x18,0x18,0,0,0,0,0x1c,0,0x23,0x23,0x23,0x23,0x23,0x23, -0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,2,0x20,0x1f,0x17,1,0,0, -0,0,0,0,0,0,0,0,0,0x22,0x22,0x22,0x22,0x22,0x22,0, -0,0x22,0x22,0x22,0x22,2,2,0x20,0x1f,0x17,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0x23,0x23,0x23,0x23,0x22,0x22,0, -0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20, -0x1f,0x22,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,5,5,5,5,5,5,5,5,5,5,5,2,0x20,0x22,0x22, -0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0,0,0,0,5,5, -5,5,5,5,5,5,5,5,5,0,0,0xb,0xb,0xb,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0x18,0x18, -0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,2,0x20,0x1f,0x17,0,0,0,0,0,0x23,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,5,0x1c,0x1a, -2,2,2,2,0x20,0xe,0xb,0xb,0xb,0xb,0xc,0,0,0,0,0, -0xc,0,0x13,0,0,0,0,0,0,0,0,0x23,0x22,0x22,0x22,0x22, -0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,0,0,0xe,0xe,0xe, -0xe,7,7,7,7,7,7,2,0x20,0x12,0x13,0,0,0,1,0, -0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0x23,5, -5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0x22,2,2,0x20, -0x1f,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0, -0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x22, -0x22,0x22,0x22,0x22,2,2,0,0,0,0,0,0,0,0,0,0x23, -0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,5,5,5,5,0x22, -0x22,0x22,0x22,0x22,0x22,0,0,0,0x22,0,0x22,0x22,0,0x22,2,0x20, -0x17,0x22,0x1a,0x13,0xd,0xb,0,0,0,0,0,0,0,0,0x23,0x23, -0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,0x23,5,5,5,5,5,5, -5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,2,0x20,0x13,0, -0,0,0,0,0,0,0,5,5,0xc,0x22,0x22,0x22,0x22,0,0, -0,0,0,0,0,0,0,0 -}; - -static const UCPTrie insc_trie={ - insc_trieIndex, - { insc_trieData }, - 834, 3960, - 0x12000, 0x12, - 1, 2, - 0, 0, - 0x4, 0x40, - 0x0, -}; - -static const int32_t maxVoValue = 3; - -static const uint16_t vo_trieIndex[1100]={ -0,0x40,0x59,0x98,0,0,0,0,0,0,0,0xd0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x33b,0x355,0x363,0x379,0x399,0x3b7,0x3d2,0x3ec,0x355,0x355,0x355,0x40c,0x355,0x355,0x355,0x40c, -0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c, -0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c, -0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x355,0x355,0x355,0x40c, -0x355,0x355,0x355,0x40c,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x59,0x69,0x79,0x89, -0x98,0xa8,0xb8,0xc8,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30, -0,0x10,0x20,0x30,0xd0,0xe0,0xf0,0x100,0,0x10,0x20,0x30,0,0x10,0x20,0x30, -0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30, -0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x10f,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0xa9,0x96,0x11e,0x12c,0xae,0xaa,0,0,0,0,0, -0,0x103,0x13c,0,0x14c,0x158,0x166,0x10b,0x175,0x110,0x110,0x110,0x184,0,0,0, -0,0,0,0,0x72,0,0xf6,0,0,0,0,0,0,0,0,0, -0,0,0,0x190,0x110,0x198,0,0,0,0,0x103,0x110,0x115,0,0xec,0x1a8, -0x1b6,0x10e,0x110,0x110,0x1c6,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110, -0x110,0x110,0x116,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x118,0x10a,0x110,0x1d2,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0x10e,0x110,0,0, -0x116,0,0,0,0,0,0x108,0x110,0x1e2,0x114,0x110,0,0,0,0,0, -0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x1f1,0x1ff,0x110,0x20e,0x21d, -0x110,0x22a,0x110,0x237,0x246,0x256,0x110,0x22a,0x110,0x237,0x261,0x110,0x110,0x26e,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x27e,0x27e,0x27e,0x27e, -0x286,0x110,0x28e,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0,0, -0,0,0,0x110,0,0x110,0x117,0x29b,0x2aa,0,0,0,0,0,0,0, -0,0,0x2ba,0x2c9,0x110,0x2d9,0x110,0x2e9,0x2f8,0,0,0,0,0,0,0, -0x308,0x318,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0, -0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0,0,0,0,0,0,0,0,0x328,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, -0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x112,0x84,0x98,0xa8,0xa8,0xa8, -0xa8,0xa8,0xa8,0xc8,0xc,0xe8,0x100,0x115,0xc,0xc,0xc,0x134,0x153,0x172,0x191,0xc, -0x1ab,0xc,0x1cb,0x1eb,0x20b,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223, -0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223, -0x223,0x223,0x223,0x223,0x223,0xfb,0xc,0x243,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223, -0x223,0x223,0x223,0x223,0x223,0xc,0xc,0xc,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223, -0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0x262,0xc,0xc,0xc,0xc,0x282,0xc,0xc, -0xc,0xc,0xc,0x29c,0xc,0xc,0xfd,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc, -0xc,0x223,0x223,0x2b9,0xc,0xc,0xc,0xc,0xc,0x223,0x100,0xc,0xc,0xc,0xc,0xc, -0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2bc,0x223, -0x223,0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc, -0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2da,0xf8,0xc,0xc,0xc,0xc, -0xc,0xc,0xc,0xc,0x223,0x2fa,0xc,0xc,0x223,0xfd,0xc,0xc,0xc,0xc,0xc,0xc, -0xc,0xc,0xc,0xc,0x223,0x31a,0x223,0x223,0xc8,0x2b5,0xc,0xc,0x223,0x223,0x223,0x223, -0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223, -0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x31b,0xc,0xc,0xc,0xc, -0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc, -0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc -}; - -static const uint8_t vo_trieData[828]={ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,3,0,0,0,0,3,0,0,3,0,0,0,0,0, -0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, -0,0,0,0,0,0,0,0,0,3,3,0,0,0,3,0, -0,0,0,3,3,3,0,0,0,0,0,0,3,0,3,3, -3,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3, -3,3,3,3,3,3,0,0,0,0,0,3,3,0,3,3, -0,0,0,0,0,0,3,3,3,3,0,3,0,3,0,3, -0,0,0,0,3,0,0,0,0,0,3,3,3,3,3,3, -0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0, -3,3,3,3,3,3,3,3,0,0,0,0,3,3,3,3, -3,1,1,3,0,0,0,0,3,3,3,3,3,3,3,3, -3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3, -3,3,0,0,0,0,3,3,3,0,3,3,3,3,3,3, -3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0, -0,0,3,3,0,3,3,3,3,3,3,3,3,3,3,3, -3,3,2,2,3,3,3,3,3,1,1,1,1,1,1,1, -1,3,3,1,1,1,1,1,1,1,1,1,1,1,1,3, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, -2,3,2,3,2,3,2,3,3,3,3,3,3,2,3,3, -3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3, -3,3,3,3,3,2,3,3,3,3,3,2,2,3,3,3, -3,2,2,3,3,3,1,2,3,2,3,2,3,2,3,2, -3,3,3,3,3,3,2,2,3,3,3,3,3,1,3,3, -3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2, -2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3, -3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3, -3,3,3,0,1,1,1,1,1,1,3,3,3,0,0,0, -0,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3, -3,3,1,1,3,3,2,0,2,3,3,3,3,3,3,3, -3,3,3,1,1,0,0,0,2,3,3,3,3,3,3,3, -3,3,3,3,1,3,1,3,1,3,3,3,3,3,3,3, -3,3,3,3,1,1,1,1,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,3,3,3,1,3,3,3,3, -0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3, -3,0,0,0,3,3,0,0,2,2,3,3,3,3,3,3, -3,3,3,3,3,3,3,3,0,0,0,0 -}; - -static const UCPTrie vo_trie={ - vo_trieIndex, - { vo_trieData }, - 1100, 828, - 0x110000, 0x110, - 1, 2, - 0, 0, - 0xc, 0x0, - 0x0, -}; - -#endif // INCLUDED_FROM_UPROPS_CPP diff --git a/deps/node/deps/icu-small/source/common/ulist.cpp b/deps/node/deps/icu-small/source/common/ulist.cpp deleted file mode 100644 index 07cbcc83..00000000 --- a/deps/node/deps/icu-small/source/common/ulist.cpp +++ /dev/null @@ -1,270 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2009-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -*/ - -#include "ulist.h" -#include "cmemory.h" -#include "cstring.h" -#include "uenumimp.h" - -typedef struct UListNode UListNode; -struct UListNode { - void *data; - - UListNode *next; - UListNode *previous; - - /* When data is created with uprv_malloc, needs to be freed during deleteList function. */ - UBool forceDelete; -}; - -struct UList { - UListNode *curr; - UListNode *head; - UListNode *tail; - - int32_t size; -}; - -static void ulist_addFirstItem(UList *list, UListNode *newItem); - -U_CAPI UList *U_EXPORT2 ulist_createEmptyList(UErrorCode *status) { - UList *newList = NULL; - - if (U_FAILURE(*status)) { - return NULL; - } - - newList = (UList *)uprv_malloc(sizeof(UList)); - if (newList == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - newList->curr = NULL; - newList->head = NULL; - newList->tail = NULL; - newList->size = 0; - - return newList; -} - -/* - * Function called by addItemEndList or addItemBeginList when the first item is added to the list. - * This function properly sets the pointers for the first item added. - */ -static void ulist_addFirstItem(UList *list, UListNode *newItem) { - newItem->next = NULL; - newItem->previous = NULL; - list->head = newItem; - list->tail = newItem; -} - -static void ulist_removeItem(UList *list, UListNode *p) { - if (p->previous == NULL) { - // p is the list head. - list->head = p->next; - } else { - p->previous->next = p->next; - } - if (p->next == NULL) { - // p is the list tail. - list->tail = p->previous; - } else { - p->next->previous = p->previous; - } - if (p == list->curr) { - list->curr = p->next; - } - --list->size; - if (p->forceDelete) { - uprv_free(p->data); - } - uprv_free(p); -} - -U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { - UListNode *newItem = NULL; - - if (U_FAILURE(*status) || list == NULL || data == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - return; - } - - newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); - if (newItem == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - newItem->data = (void *)(data); - newItem->forceDelete = forceDelete; - - if (list->size == 0) { - ulist_addFirstItem(list, newItem); - } else { - newItem->next = NULL; - newItem->previous = list->tail; - list->tail->next = newItem; - list->tail = newItem; - } - - list->size++; -} - -U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { - UListNode *newItem = NULL; - - if (U_FAILURE(*status) || list == NULL || data == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - return; - } - - newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); - if (newItem == NULL) { - if (forceDelete) { - uprv_free((void *)data); - } - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - newItem->data = (void *)(data); - newItem->forceDelete = forceDelete; - - if (list->size == 0) { - ulist_addFirstItem(list, newItem); - } else { - newItem->previous = NULL; - newItem->next = list->head; - list->head->previous = newItem; - list->head = newItem; - } - - list->size++; -} - -U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length) { - if (list != NULL) { - const UListNode *pointer; - for (pointer = list->head; pointer != NULL; pointer = pointer->next) { - if (length == (int32_t)uprv_strlen((const char *)pointer->data)) { - if (uprv_memcmp(data, pointer->data, length) == 0) { - return TRUE; - } - } - } - } - return FALSE; -} - -U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data) { - if (list != NULL) { - UListNode *pointer; - for (pointer = list->head; pointer != NULL; pointer = pointer->next) { - if (uprv_strcmp(data, (const char *)pointer->data) == 0) { - ulist_removeItem(list, pointer); - // Remove only the first occurrence, like Java LinkedList.remove(Object). - return TRUE; - } - } - } - return FALSE; -} - -U_CAPI void *U_EXPORT2 ulist_getNext(UList *list) { - UListNode *curr = NULL; - - if (list == NULL || list->curr == NULL) { - return NULL; - } - - curr = list->curr; - list->curr = curr->next; - - return curr->data; -} - -U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list) { - if (list != NULL) { - return list->size; - } - - return -1; -} - -U_CAPI void U_EXPORT2 ulist_resetList(UList *list) { - if (list != NULL) { - list->curr = list->head; - } -} - -U_CAPI void U_EXPORT2 ulist_deleteList(UList *list) { - UListNode *listHead = NULL; - - if (list != NULL) { - listHead = list->head; - while (listHead != NULL) { - UListNode *listPointer = listHead->next; - - if (listHead->forceDelete) { - uprv_free(listHead->data); - } - - uprv_free(listHead); - listHead = listPointer; - } - uprv_free(list); - list = NULL; - } -} - -U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en) { - if (en != NULL) { - ulist_deleteList((UList *)(en->context)); - uprv_free(en); - } -} - -U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status) { - if (U_FAILURE(*status)) { - return -1; - } - - return ulist_getListSize((UList *)(en->context)); -} - -U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t *resultLength, UErrorCode *status) { - const char *s; - if (U_FAILURE(*status)) { - return NULL; - } - - s = (const char *)ulist_getNext((UList *)(en->context)); - if (s != NULL && resultLength != NULL) { - *resultLength = static_cast(uprv_strlen(s)); - } - return s; -} - -U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration *en, UErrorCode *status) { - if (U_FAILURE(*status)) { - return ; - } - - ulist_resetList((UList *)(en->context)); -} - -U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en) { - return (UList *)(en->context); -} diff --git a/deps/node/deps/icu-small/source/common/ulist.h b/deps/node/deps/icu-small/source/common/ulist.h deleted file mode 100644 index de58a4ad..00000000 --- a/deps/node/deps/icu-small/source/common/ulist.h +++ /dev/null @@ -1,50 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2009-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -*/ - -#ifndef ULIST_H -#define ULIST_H - -#include "unicode/utypes.h" -#include "unicode/uenum.h" - -struct UList; -typedef struct UList UList; - -U_CAPI UList * U_EXPORT2 ulist_createEmptyList(UErrorCode *status); - -U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status); - -U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status); - -U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length); - -U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data); - -U_CAPI void *U_EXPORT2 ulist_getNext(UList *list); - -U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list); - -U_CAPI void U_EXPORT2 ulist_resetList(UList *list); - -U_CAPI void U_EXPORT2 ulist_deleteList(UList *list); - -/* - * The following are for use when creating UEnumeration object backed by UList. - */ -U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en); - -U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status); - -U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration* en, int32_t *resultLength, UErrorCode* status); - -U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration* en, UErrorCode* status); - -U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en); - -#endif diff --git a/deps/node/deps/icu-small/source/common/uloc.cpp b/deps/node/deps/icu-small/source/common/uloc.cpp deleted file mode 100644 index 81b6e0f6..00000000 --- a/deps/node/deps/icu-small/source/common/uloc.cpp +++ /dev/null @@ -1,2672 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File ULOC.CPP -* -* Modification History: -* -* Date Name Description -* 04/01/97 aliu Creation. -* 08/21/98 stephen JDK 1.2 sync -* 12/08/98 rtg New Locale implementation and C API -* 03/15/99 damiba overhaul. -* 04/06/99 stephen changed setDefault() to realloc and copy -* 06/14/99 stephen Changed calls to ures_open for new params -* 07/21/99 stephen Modified setDefault() to propagate to C++ -* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, -* brought canonicalization code into line with spec -*****************************************************************************/ - -/* - POSIX's locale format, from putil.c: [no spaces] - - ll [ _CC ] [ . MM ] [ @ VV] - - l = lang, C = ctry, M = charmap, V = variant -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/uloc.h" - -#include "putilimp.h" -#include "ustr_imp.h" -#include "ulocimp.h" -#include "umutex.h" -#include "cstring.h" -#include "cmemory.h" -#include "locmap.h" -#include "uarrsort.h" -#include "uenumimp.h" -#include "uassert.h" -#include "charstr.h" - -#include /* for sprintf */ - -U_NAMESPACE_USE - -/* ### Declarations **************************************************/ - -/* Locale stuff from locid.cpp */ -U_CFUNC void locale_set_default(const char *id); -U_CFUNC const char *locale_get_default(void); -U_CFUNC int32_t -locale_getKeywords(const char *localeID, - char prev, - char *keywords, int32_t keywordCapacity, - char *values, int32_t valuesCapacity, int32_t *valLen, - UBool valuesToo, - UErrorCode *status); - -/* ### Data tables **************************************************/ - -/** - * Table of language codes, both 2- and 3-letter, with preference - * given to 2-letter codes where possible. Includes 3-letter codes - * that lack a 2-letter equivalent. - * - * This list must be in sorted order. This list is returned directly - * to the user by some API. - * - * This list must be kept in sync with LANGUAGES_3, with corresponding - * entries matched. - * - * This table should be terminated with a NULL entry, followed by a - * second list, and another NULL entry. The first list is visible to - * user code when this array is returned by API. The second list - * contains codes we support, but do not expose through user API. - * - * Notes - * - * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to - * include the revisions up to 2001/7/27 *CWB* - * - * The 3 character codes are the terminology codes like RFC 3066. This - * is compatible with prior ICU codes - * - * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the - * table but now at the end of the table because 3 character codes are - * duplicates. This avoids bad searches going from 3 to 2 character - * codes. - * - * The range qaa-qtz is reserved for local use - */ -/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ -/* ISO639 table version is 20150505 */ -/* Subsequent hand addition of selected languages */ -static const char * const LANGUAGES[] = { - "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", - "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", - "aln", "alt", "am", "an", "ang", "anp", "ar", "arc", - "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as", - "asa", "ase", "ast", "av", "avk", "awa", "ay", "az", - "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", - "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg", - "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", - "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", - "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", - "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg", - "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", - "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", - "cs", "csb", "cu", "cv", "cy", - "da", "dak", "dar", "dav", "de", "del", "den", "dgr", - "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv", - "dyo", "dyu", "dz", "dzg", - "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx", - "en", "enm", "eo", "es", "esu", "et", "eu", "ewo", - "ext", - "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj", - "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr", - "frs", "fur", "fy", - "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd", - "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom", - "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc", - "gur", "guz", "gv", "gwi", - "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil", - "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu", - "hup", "hy", "hz", - "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik", - "ilo", "inh", "io", "is", "it", "iu", "izh", - "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", - "jv", - "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", - "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp", - "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk", - "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi", - "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl", - "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut", - "kv", "kw", "ky", - "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn", - "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo", - "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui", - "lun", "luo", "lus", "luy", "lv", "lzh", "lzz", - "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", - "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", - "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", - "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj", - "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", - "my", "mye", "myv", "mzn", - "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne", - "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn", - "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso", - "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", - "oc", "oj", "om", "or", "os", "osa", "ota", - "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", - "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt", - "pon", "prg", "pro", "ps", "pt", - "qu", "quc", "qug", - "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro", - "rof", "rom", "rtm", "ru", "rue", "rug", "rup", - "rw", "rwk", - "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz", - "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh", - "se", "see", "seh", "sei", "sel", "ses", "sg", "sga", - "sgs", "shi", "shn", "shu", "si", "sid", "sk", - "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", - "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr", - "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux", - "sv", "sw", "swb", "swc", "syc", "syr", "szl", - "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", - "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl", - "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", - "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", - "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", - "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", - "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo", - "vot", "vro", "vun", - "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu", - "xal", "xh", "xmf", "xog", - "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue", - "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", - "zun", "zxx", "zza", -NULL, - "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ -NULL -}; - -static const char* const DEPRECATED_LANGUAGES[]={ - "in", "iw", "ji", "jw", NULL, NULL -}; -static const char* const REPLACEMENT_LANGUAGES[]={ - "id", "he", "yi", "jv", NULL, NULL -}; - -/** - * Table of 3-letter language codes. - * - * This is a lookup table used to convert 3-letter language codes to - * their 2-letter equivalent, where possible. It must be kept in sync - * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the - * same language as LANGUAGES_3[i]. The commented-out lines are - * copied from LANGUAGES to make eyeballing this baby easier. - * - * Where a 3-letter language code has no 2-letter equivalent, the - * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. - * - * This table should be terminated with a NULL entry, followed by a - * second list, and another NULL entry. The two lists correspond to - * the two lists in LANGUAGES. - */ -/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ -/* ISO639 table version is 20150505 */ -/* Subsequent hand addition of selected languages */ -static const char * const LANGUAGES_3[] = { - "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", - "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", - "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc", - "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm", - "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze", - "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", - "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul", - "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", - "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", - "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", - "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg", - "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", - "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", - "ces", "csb", "chu", "chv", "cym", - "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr", - "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div", - "dyo", "dyu", "dzo", "dzg", - "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx", - "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo", - "ext", - "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij", - "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr", - "frs", "fur", "fry", - "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla", - "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom", - "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc", - "gur", "guz", "glv", "gwi", - "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil", - "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun", - "hup", "hye", "her", - "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk", - "ilo", "inh", "ido", "isl", "ita", "iku", "izh", - "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", - "jav", - "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", - "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp", - "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz", - "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi", - "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl", - "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut", - "kom", "cor", "kir", - "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn", - "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao", - "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui", - "lun", "luo", "lus", "luy", "lav", "lzh", "lzz", - "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", - "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", - "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", - "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj", - "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", - "mya", "mye", "myv", "mzn", - "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep", - "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno", - "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso", - "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", - "oci", "oji", "orm", "ori", "oss", "osa", "ota", - "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", - "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt", - "pon", "prg", "pro", "pus", "por", - "que", "quc", "qug", - "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron", - "rof", "rom", "rtm", "rus", "rue", "rug", "rup", - "kin", "rwk", - "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz", - "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh", - "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga", - "sgs", "shi", "shn", "shu", "sin", "sid", "slk", - "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms", - "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr", - "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux", - "swe", "swa", "swb", "swc", "syc", "syr", "szl", - "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", - "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl", - "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", - "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", - "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", - "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", - "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol", - "vot", "vro", "vun", - "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu", - "xal", "xho", "xmf", "xog", - "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue", - "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", - "zun", "zxx", "zza", -NULL, -/* "in", "iw", "ji", "jw", "sh", */ - "ind", "heb", "yid", "jaw", "srp", -NULL -}; - -/** - * Table of 2-letter country codes. - * - * This list must be in sorted order. This list is returned directly - * to the user by some API. - * - * This list must be kept in sync with COUNTRIES_3, with corresponding - * entries matched. - * - * This table should be terminated with a NULL entry, followed by a - * second list, and another NULL entry. The first list is visible to - * user code when this array is returned by API. The second list - * contains codes we support, but do not expose through user API. - * - * Notes: - * - * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per - * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added - * new codes keeping the old ones for compatibility updated to include - * 1999/12/03 revisions *CWB* - * - * RO(ROM) is now RO(ROU) according to - * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html - */ -static const char * const COUNTRIES[] = { - "AD", "AE", "AF", "AG", "AI", "AL", "AM", - "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", - "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", - "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", - "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", - "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", - "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", - "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", - "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", - "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", - "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", - "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", - "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", - "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", - "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", - "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", - "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", - "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", - "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", - "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", - "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", - "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", - "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", - "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", - "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", - "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", - "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", - "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", - "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", - "WS", "YE", "YT", "ZA", "ZM", "ZW", -NULL, - "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ -NULL -}; - -static const char* const DEPRECATED_COUNTRIES[] = { - "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ -}; -static const char* const REPLACEMENT_COUNTRIES[] = { -/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ - "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ -}; - -/** - * Table of 3-letter country codes. - * - * This is a lookup table used to convert 3-letter country codes to - * their 2-letter equivalent. It must be kept in sync with COUNTRIES. - * For all valid i, COUNTRIES[i] must refer to the same country as - * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES - * to make eyeballing this baby easier. - * - * This table should be terminated with a NULL entry, followed by a - * second list, and another NULL entry. The two lists correspond to - * the two lists in COUNTRIES. - */ -static const char * const COUNTRIES_3[] = { -/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ - "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", -/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ - "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", -/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ - "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", -/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ - "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", -/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ - "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", -/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ - "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", -/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ - "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", -/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ - "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", -/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ - "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", -/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ - "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", -/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ - "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", -/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ - "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", -/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ - "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", -/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ - "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", -/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ - "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", -/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ - "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", -/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ - "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", -/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ - "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", -/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ - "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", -/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ - "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", -/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ - "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", -/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ - "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", -/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ - "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", -/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ - "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", -/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ - "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", -/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ - "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", -/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ - "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", -/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ - "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", -/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ - "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", -/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ - "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", -NULL, -/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ - "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", -NULL -}; - -typedef struct CanonicalizationMap { - const char *id; /* input ID */ - const char *canonicalID; /* canonicalized output ID */ - const char *keyword; /* keyword, or NULL if none */ - const char *value; /* keyword value, or NULL if kw==NULL */ -} CanonicalizationMap; - -/** - * A map to canonicalize locale IDs. This handles a variety of - * different semantic kinds of transformations. - */ -static const CanonicalizationMap CANONICALIZE_MAP[] = { - { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ - { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ - { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ - { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ - { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ - { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ - { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, - { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ - { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, - { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, - { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, - { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, - { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, - { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, - { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ - { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, - { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, - { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, - { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, - { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, - { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, - { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, - { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, - { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ - { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, - { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ - { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ - { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, - { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, - { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, - { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ - { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ - { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ - { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ - { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ - { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ - { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ - { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ - { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ - { "zh_GAN", "gan", NULL, NULL }, /* registered name */ - { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ - { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */ - { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */ - { "zh_WUU", "wuu", NULL, NULL }, /* registered name */ - { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */ - { "zh_YUE", "yue", NULL, NULL }, /* registered name */ -}; - -typedef struct VariantMap { - const char *variant; /* input ID */ - const char *keyword; /* keyword, or NULL if none */ - const char *value; /* keyword value, or NULL if kw==NULL */ -} VariantMap; - -static const VariantMap VARIANT_MAP[] = { - { "EURO", "currency", "EUR" }, - { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ - { "STROKE", "collation", "stroke" } /* Solaris variant */ -}; - -/* ### BCP47 Conversion *******************************************/ -/* Test if the locale id has BCP47 u extension and does not have '@' */ -#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) -/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ -#define _ConvertBCP47(finalID, id, buffer, length,err) \ - if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \ - U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \ - finalID=id; \ - if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \ - } else { \ - finalID=buffer; \ - } -/* Gets the size of the shortest subtag in the given localeID. */ -static int32_t getShortestSubtagLength(const char *localeID) { - int32_t localeIDLength = static_cast(uprv_strlen(localeID)); - int32_t length = localeIDLength; - int32_t tmpLength = 0; - int32_t i; - UBool reset = TRUE; - - for (i = 0; i < localeIDLength; i++) { - if (localeID[i] != '_' && localeID[i] != '-') { - if (reset) { - tmpLength = 0; - reset = FALSE; - } - tmpLength++; - } else { - if (tmpLength != 0 && tmpLength < length) { - length = tmpLength; - } - reset = TRUE; - } - } - - return length; -} - -/* ### Keywords **************************************************/ -#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) -#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) -/* Punctuation/symbols allowed in legacy key values */ -#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/') - -#define ULOC_KEYWORD_BUFFER_LEN 25 -#define ULOC_MAX_NO_KEYWORDS 25 - -U_CAPI const char * U_EXPORT2 -locale_getKeywordsStart(const char *localeID) { - const char *result = NULL; - if((result = uprv_strchr(localeID, '@')) != NULL) { - return result; - } -#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) - else { - /* We do this because the @ sign is variant, and the @ sign used on one - EBCDIC machine won't be compiled the same way on other EBCDIC based - machines. */ - static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; - const uint8_t *charToFind = ebcdicSigns; - while(*charToFind) { - if((result = uprv_strchr(localeID, *charToFind)) != NULL) { - return result; - } - charToFind++; - } - } -#endif - return NULL; -} - -/** - * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] - * @param keywordName incoming name to be canonicalized - * @param status return status (keyword too long) - * @return length of the keyword name - */ -static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) -{ - int32_t keywordNameLen = 0; - - for (; *keywordName != 0; keywordName++) { - if (!UPRV_ISALPHANUM(*keywordName)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ - return 0; - } - if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) { - buf[keywordNameLen++] = uprv_tolower(*keywordName); - } else { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; - } - } - if (keywordNameLen == 0) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */ - return 0; - } - buf[keywordNameLen] = 0; /* terminate */ - - return keywordNameLen; -} - -typedef struct { - char keyword[ULOC_KEYWORD_BUFFER_LEN]; - int32_t keywordLen; - const char *valueStart; - int32_t valueLen; -} KeywordStruct; - -static int32_t U_CALLCONV -compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { - const char* leftString = ((const KeywordStruct *)left)->keyword; - const char* rightString = ((const KeywordStruct *)right)->keyword; - return uprv_strcmp(leftString, rightString); -} - -/** - * Both addKeyword and addValue must already be in canonical form. - * Either both addKeyword and addValue are NULL, or neither is NULL. - * If they are not NULL they must be zero terminated. - * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. - */ -static int32_t -_getKeywords(const char *localeID, - char prev, - char *keywords, int32_t keywordCapacity, - char *values, int32_t valuesCapacity, int32_t *valLen, - UBool valuesToo, - const char* addKeyword, - const char* addValue, - UErrorCode *status) -{ - KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; - - int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; - int32_t numKeywords = 0; - const char* pos = localeID; - const char* equalSign = NULL; - const char* semicolon = NULL; - int32_t i = 0, j, n; - int32_t keywordsLen = 0; - int32_t valuesLen = 0; - - if(prev == '@') { /* start of keyword definition */ - /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ - do { - UBool duplicate = FALSE; - /* skip leading spaces */ - while(*pos == ' ') { - pos++; - } - if (!*pos) { /* handle trailing "; " */ - break; - } - if(numKeywords == maxKeywords) { - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; - } - equalSign = uprv_strchr(pos, '='); - semicolon = uprv_strchr(pos, ';'); - /* lack of '=' [foo@currency] is illegal */ - /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ - if(!equalSign || (semicolon && semicolon= ULOC_KEYWORD_BUFFER_LEN) { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; - } - for(i = 0, n = 0; i < equalSign - pos; ++i) { - if (pos[i] != ' ') { - keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); - } - } - - /* zero-length keyword is an error. */ - if (n == 0) { - *status = U_INVALID_FORMAT_ERROR; - return 0; - } - - keywordList[numKeywords].keyword[n] = 0; - keywordList[numKeywords].keywordLen = n; - /* now grab the value part. First we skip the '=' */ - equalSign++; - /* then we leading spaces */ - while(*equalSign == ' ') { - equalSign++; - } - - /* Premature end or zero-length value */ - if (!*equalSign || equalSign == semicolon) { - *status = U_INVALID_FORMAT_ERROR; - return 0; - } - - keywordList[numKeywords].valueStart = equalSign; - - pos = semicolon; - i = 0; - if(pos) { - while(*(pos - i - 1) == ' ') { - i++; - } - keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); - pos++; - } else { - i = (int32_t)uprv_strlen(equalSign); - while(i && equalSign[i-1] == ' ') { - i--; - } - keywordList[numKeywords].valueLen = i; - } - /* If this is a duplicate keyword, then ignore it */ - for (j=0; j startSearchHere && *(keyValueTail-1) == ' ') { - keyValueTail--; - } - /* now keyValueTail points to first char after the keyName */ - /* copy & normalize keyName from locale */ - if (startSearchHere == keyValueTail) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ - return 0; - } - keyValueLen = 0; - while (startSearchHere < keyValueTail) { - if (!UPRV_ISALPHANUM(*startSearchHere)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ - return 0; - } - if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { - localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++); - } else { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; - } - } - localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ - - startSearchHere = uprv_strchr(nextSeparator, ';'); - - if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { - /* current entry matches the keyword. */ - nextSeparator++; /* skip '=' */ - /* First strip leading & trailing spaces (TC decided to tolerate these) */ - while(*nextSeparator == ' ') { - nextSeparator++; - } - keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator); - while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') { - keyValueTail--; - } - /* Now copy the value, but check well-formedness */ - if (nextSeparator == keyValueTail) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */ - return 0; - } - keyValueLen = 0; - while (nextSeparator < keyValueTail) { - if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ - return 0; - } - if (keyValueLen < bufferCapacity) { - /* Should we lowercase value to return here? Tests expect as-is. */ - buffer[keyValueLen++] = *nextSeparator++; - } else { /* keep advancing so we return correct length in case of overflow */ - keyValueLen++; - nextSeparator++; - } - } - result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status); - return result; - } - } - } - return 0; -} - -U_CAPI int32_t U_EXPORT2 -uloc_setKeywordValue(const char* keywordName, - const char* keywordValue, - char* buffer, int32_t bufferCapacity, - UErrorCode* status) -{ - /* TODO: sorting. removal. */ - int32_t keywordNameLen; - int32_t keywordValueLen; - int32_t bufLen; - int32_t needLen = 0; - char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; - char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1]; - char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; - int32_t rc; - char* nextSeparator = NULL; - char* nextEqualsign = NULL; - char* startSearchHere = NULL; - char* keywordStart = NULL; - CharString updatedKeysAndValues; - int32_t updatedKeysAndValuesLen; - UBool handledInputKeyAndValue = FALSE; - char keyValuePrefix = '@'; - - if(U_FAILURE(*status)) { - return -1; - } - if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - bufLen = (int32_t)uprv_strlen(buffer); - if(bufferCapacity= bufferCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - return needLen; /* no change */ - } - *startSearchHere++ = '@'; - uprv_strcpy(startSearchHere, keywordNameBuffer); - startSearchHere += keywordNameLen; - *startSearchHere++ = '='; - uprv_strcpy(startSearchHere, keywordValueBuffer); - return needLen; - } /* end shortcut - no @ */ - - keywordStart = startSearchHere; - /* search for keyword */ - while(keywordStart) { - const char* keyValueTail; - int32_t keyValueLen; - - keywordStart++; /* skip @ or ; */ - nextEqualsign = uprv_strchr(keywordStart, '='); - if (!nextEqualsign) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ - return 0; - } - /* strip leading & trailing spaces (TC decided to tolerate these) */ - while(*keywordStart == ' ') { - keywordStart++; - } - keyValueTail = nextEqualsign; - while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') { - keyValueTail--; - } - /* now keyValueTail points to first char after the keyName */ - /* copy & normalize keyName from locale */ - if (keywordStart == keyValueTail) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ - return 0; - } - keyValueLen = 0; - while (keywordStart < keyValueTail) { - if (!UPRV_ISALPHANUM(*keywordStart)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ - return 0; - } - if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { - localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++); - } else { - /* keyword name too long for internal buffer */ - *status = U_INTERNAL_PROGRAM_ERROR; - return 0; - } - } - localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ - - nextSeparator = uprv_strchr(nextEqualsign, ';'); - - /* start processing the value part */ - nextEqualsign++; /* skip '=' */ - /* First strip leading & trailing spaces (TC decided to tolerate these) */ - while(*nextEqualsign == ' ') { - nextEqualsign++; - } - keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign); - while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') { - keyValueTail--; - } - if (nextEqualsign == keyValueTail) { - *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */ - return 0; - } - - rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); - if(rc == 0) { - /* Current entry matches the input keyword. Update the entry */ - if(keywordValueLen > 0) { /* updating a value */ - updatedKeysAndValues.append(keyValuePrefix, *status); - keyValuePrefix = ';'; /* for any subsequent key-value pair */ - updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); - updatedKeysAndValues.append('=', *status); - updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); - } /* else removing this entry, don't emit anything */ - handledInputKeyAndValue = TRUE; - } else { - /* input keyword sorts earlier than current entry, add before current entry */ - if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) { - /* insert new entry at this location */ - updatedKeysAndValues.append(keyValuePrefix, *status); - keyValuePrefix = ';'; /* for any subsequent key-value pair */ - updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); - updatedKeysAndValues.append('=', *status); - updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); - handledInputKeyAndValue = TRUE; - } - /* copy the current entry */ - updatedKeysAndValues.append(keyValuePrefix, *status); - keyValuePrefix = ';'; /* for any subsequent key-value pair */ - updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status); - updatedKeysAndValues.append('=', *status); - updatedKeysAndValues.append(nextEqualsign, static_cast(keyValueTail-nextEqualsign), *status); - } - if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) { - /* append new entry at the end, it sorts later than existing entries */ - updatedKeysAndValues.append(keyValuePrefix, *status); - /* skip keyValuePrefix update, no subsequent key-value pair */ - updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); - updatedKeysAndValues.append('=', *status); - updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); - handledInputKeyAndValue = TRUE; - } - keywordStart = nextSeparator; - } /* end loop searching */ - - /* Any error from updatedKeysAndValues.append above would be internal and not due to - * problems with the passed-in locale. So if we did encounter problems with the - * passed-in locale above, those errors took precedence and overrode any error - * status from updatedKeysAndValues.append, and also caused a return of 0. If there - * are errors here they are from updatedKeysAndValues.append; they do cause an - * error return but the passed-in locale is unmodified and the original bufLen is - * returned. - */ - if (!handledInputKeyAndValue || U_FAILURE(*status)) { - /* if input key/value specified removal of a keyword not present in locale, or - * there was an error in CharString.append, leave original locale alone. */ - return bufLen; - } - - updatedKeysAndValuesLen = updatedKeysAndValues.length(); - /* needLen = length of the part before '@' + length of updated key-value part including '@' */ - needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen; - if(needLen >= bufferCapacity) { - *status = U_BUFFER_OVERFLOW_ERROR; - return needLen; /* no change */ - } - if (updatedKeysAndValuesLen > 0) { - uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen); - } - buffer[needLen]=0; - return needLen; -} - -/* ### ID parsing implementation **************************************************/ - -#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) - -/*returns TRUE if one of the special prefixes is here (s=string) - 'x-' or 'i-' */ -#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) - -/* Dot terminates it because of POSIX form where dot precedes the codepage - * except for variant - */ -#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) - -static char* _strnchr(const char* str, int32_t len, char c) { - U_ASSERT(str != 0 && len >= 0); - while (len-- != 0) { - char d = *str; - if (d == c) { - return (char*) str; - } else if (d == 0) { - break; - } - ++str; - } - return NULL; -} - -/** - * Lookup 'key' in the array 'list'. The array 'list' should contain - * a NULL entry, followed by more entries, and a second NULL entry. - * - * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or - * COUNTRIES_3. - */ -static int16_t _findIndex(const char* const* list, const char* key) -{ - const char* const* anchor = list; - int32_t pass = 0; - - /* Make two passes through two NULL-terminated arrays at 'list' */ - while (pass++ < 2) { - while (*list) { - if (uprv_strcmp(key, *list) == 0) { - return (int16_t)(list - anchor); - } - list++; - } - ++list; /* skip final NULL *CWB*/ - } - return -1; -} - -/* count the length of src while copying it to dest; return strlen(src) */ -static inline int32_t -_copyCount(char *dest, int32_t destCapacity, const char *src) { - const char *anchor; - char c; - - anchor=src; - for(;;) { - if((c=*src)==0) { - return (int32_t)(src-anchor); - } - if(destCapacity<=0) { - return (int32_t)((src-anchor)+uprv_strlen(src)); - } - ++src; - *dest++=c; - --destCapacity; - } -} - -U_CFUNC const char* -uloc_getCurrentCountryID(const char* oldID){ - int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); - if (offset >= 0) { - return REPLACEMENT_COUNTRIES[offset]; - } - return oldID; -} -U_CFUNC const char* -uloc_getCurrentLanguageID(const char* oldID){ - int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); - if (offset >= 0) { - return REPLACEMENT_LANGUAGES[offset]; - } - return oldID; -} -/* - * the internal functions _getLanguage(), _getCountry(), _getVariant() - * avoid duplicating code to handle the earlier locale ID pieces - * in the functions for the later ones by - * setting the *pEnd pointer to where they stopped parsing - * - * TODO try to use this in Locale - */ -U_CFUNC int32_t -ulocimp_getLanguage(const char *localeID, - char *language, int32_t languageCapacity, - const char **pEnd) { - int32_t i=0; - int32_t offset; - char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ - - /* if it starts with i- or x- then copy that prefix */ - if(_isIDPrefix(localeID)) { - if(i=0); - lang[i]=(char)uprv_tolower(*localeID); - } - i++; - localeID++; - } - - if(i==3) { - /* convert 3 character code to 2 character code if possible *CWB*/ - offset=_findIndex(LANGUAGES_3, lang); - if(offset>=0) { - i=_copyCount(language, languageCapacity, LANGUAGES[offset]); - } - } - - if(pEnd!=NULL) { - *pEnd=localeID; - } - return i; -} - -U_CFUNC int32_t -ulocimp_getScript(const char *localeID, - char *script, int32_t scriptCapacity, - const char **pEnd) -{ - int32_t idLen = 0; - - if (pEnd != NULL) { - *pEnd = localeID; - } - - /* copy the second item as far as possible and count its length */ - while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) - && uprv_isASCIILetter(localeID[idLen])) { - idLen++; - } - - /* If it's exactly 4 characters long, then it's a script and not a country. */ - if (idLen == 4) { - int32_t i; - if (pEnd != NULL) { - *pEnd = localeID+idLen; - } - if(idLen > scriptCapacity) { - idLen = scriptCapacity; - } - if (idLen >= 1) { - script[0]=(char)uprv_toupper(*(localeID++)); - } - for (i = 1; i < idLen; i++) { - script[i]=(char)uprv_tolower(*(localeID++)); - } - } - else { - idLen = 0; - } - return idLen; -} - -U_CFUNC int32_t -ulocimp_getCountry(const char *localeID, - char *country, int32_t countryCapacity, - const char **pEnd) -{ - int32_t idLen=0; - char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; - int32_t offset; - - /* copy the country as far as possible and count its length */ - while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { - if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ - cnty[idLen]=(char)uprv_toupper(localeID[idLen]); - } - idLen++; - } - - /* the country should be either length 2 or 3 */ - if (idLen == 2 || idLen == 3) { - UBool gotCountry = FALSE; - /* convert 3 character code to 2 character code if possible *CWB*/ - if(idLen==3) { - offset=_findIndex(COUNTRIES_3, cnty); - if(offset>=0) { - idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); - gotCountry = TRUE; - } - } - if (!gotCountry) { - int32_t i = 0; - for (i = 0; i < idLen; i++) { - if (i < countryCapacity) { - country[i]=(char)uprv_toupper(localeID[i]); - } - } - } - localeID+=idLen; - } else { - idLen = 0; - } - - if(pEnd!=NULL) { - *pEnd=localeID; - } - - return idLen; -} - -/** - * @param needSeparator if true, then add leading '_' if any variants - * are added to 'variant' - */ -static int32_t -_getVariantEx(const char *localeID, - char prev, - char *variant, int32_t variantCapacity, - UBool needSeparator) { - int32_t i=0; - - /* get one or more variant tags and separate them with '_' */ - if(_isIDSeparator(prev)) { - /* get a variant string after a '-' or '_' */ - while(!_isTerminator(*localeID)) { - if (needSeparator) { - if (i "FOO_BAR". - * @param variants the source string of one or more variants, - * separated by '_'. This will be MODIFIED IN PLACE. Not zero - * terminated; if it is, trailing zero will NOT be maintained. - * @param variantsLen length of variants - * @param toDelete variant to delete, without separators, e.g. "EURO" - * or "PREEURO"; not zero terminated - * @param toDeleteLen length of toDelete - * @return number of characters deleted from variants - */ -static int32_t -_deleteVariant(char* variants, int32_t variantsLen, - const char* toDelete, int32_t toDeleteLen) -{ - int32_t delta = 0; /* number of chars deleted */ - for (;;) { - UBool flag = FALSE; - if (variantsLen < toDeleteLen) { - return delta; - } - if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && - (variantsLen == toDeleteLen || - (flag=(variants[toDeleteLen] == '_')) != 0)) - { - int32_t d = toDeleteLen + (flag?1:0); - variantsLen -= d; - delta += d; - if (variantsLen > 0) { - uprv_memmove(variants, variants+d, variantsLen); - } - } else { - char* p = _strnchr(variants, variantsLen, '_'); - if (p == NULL) { - return delta; - } - ++p; - variantsLen -= (int32_t)(p - variants); - variants = p; - } - } -} - -/* Keyword enumeration */ - -typedef struct UKeywordsContext { - char* keywords; - char* current; -} UKeywordsContext; - -U_CDECL_BEGIN - -static void U_CALLCONV -uloc_kw_closeKeywords(UEnumeration *enumerator) { - uprv_free(((UKeywordsContext *)enumerator->context)->keywords); - uprv_free(enumerator->context); - uprv_free(enumerator); -} - -static int32_t U_CALLCONV -uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { - char *kw = ((UKeywordsContext *)en->context)->keywords; - int32_t result = 0; - while(*kw) { - result++; - kw += uprv_strlen(kw)+1; - } - return result; -} - -static const char * U_CALLCONV -uloc_kw_nextKeyword(UEnumeration* en, - int32_t* resultLength, - UErrorCode* /*status*/) { - const char* result = ((UKeywordsContext *)en->context)->current; - int32_t len = 0; - if(*result) { - len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); - ((UKeywordsContext *)en->context)->current += len+1; - } else { - result = NULL; - } - if (resultLength) { - *resultLength = len; - } - return result; -} - -static void U_CALLCONV -uloc_kw_resetKeywords(UEnumeration* en, - UErrorCode* /*status*/) { - ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; -} - -U_CDECL_END - - -static const UEnumeration gKeywordsEnum = { - NULL, - NULL, - uloc_kw_closeKeywords, - uloc_kw_countKeywords, - uenum_unextDefault, - uloc_kw_nextKeyword, - uloc_kw_resetKeywords -}; - -U_CAPI UEnumeration* U_EXPORT2 -uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) -{ - UKeywordsContext *myContext = NULL; - UEnumeration *result = NULL; - - if(U_FAILURE(*status)) { - return NULL; - } - result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); - /* Null pointer test */ - if (result == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); - myContext = static_cast(uprv_malloc(sizeof(UKeywordsContext))); - if (myContext == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(result); - return NULL; - } - myContext->keywords = (char *)uprv_malloc(keywordListSize+1); - uprv_memcpy(myContext->keywords, keywordList, keywordListSize); - myContext->keywords[keywordListSize] = 0; - myContext->current = myContext->keywords; - result->context = myContext; - return result; -} - -U_CAPI UEnumeration* U_EXPORT2 -uloc_openKeywords(const char* localeID, - UErrorCode* status) -{ - int32_t i=0; - char keywords[256]; - int32_t keywordsCapacity = 256; - char tempBuffer[ULOC_FULLNAME_CAPACITY]; - const char* tmpLocaleID; - - if(status==NULL || U_FAILURE(*status)) { - return 0; - } - - if (_hasBCP47Extension(localeID)) { - _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); - } else { - if (localeID==NULL) { - localeID=uloc_getDefault(); - } - tmpLocaleID=localeID; - } - - /* Skip the language */ - ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); - if(_isIDSeparator(*tmpLocaleID)) { - const char *scriptID; - /* Skip the script if available */ - ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); - if(scriptID != tmpLocaleID+1) { - /* Found optional script */ - tmpLocaleID = scriptID; - } - /* Skip the Country */ - if (_isIDSeparator(*tmpLocaleID)) { - ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); - if(_isIDSeparator(*tmpLocaleID)) { - _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); - } - } - } - - /* keywords are located after '@' */ - if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { - i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); - } - - if(i) { - return uloc_openKeywordList(keywords, i, status); - } else { - return NULL; - } -} - - -/* bit-flags for 'options' parameter of _canonicalize */ -#define _ULOC_STRIP_KEYWORDS 0x2 -#define _ULOC_CANONICALIZE 0x1 - -#define OPTION_SET(options, mask) ((options & mask) != 0) - -static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; -#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default) - -/** - * Canonicalize the given localeID, to level 1 or to level 2, - * depending on the options. To specify level 1, pass in options=0. - * To specify level 2, pass in options=_ULOC_CANONICALIZE. - * - * This is the code underlying uloc_getName and uloc_canonicalize. - */ -static int32_t -_canonicalize(const char* localeID, - char* result, - int32_t resultCapacity, - uint32_t options, - UErrorCode* err) { - int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - char tempBuffer[ULOC_FULLNAME_CAPACITY]; - const char* origLocaleID; - const char* tmpLocaleID; - const char* keywordAssign = NULL; - const char* separatorIndicator = NULL; - const char* addKeyword = NULL; - const char* addValue = NULL; - char* name; - char* variant = NULL; /* pointer into name, or NULL */ - - if (U_FAILURE(*err)) { - return 0; - } - - if (_hasBCP47Extension(localeID)) { - _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); - } else { - if (localeID==NULL) { - localeID=uloc_getDefault(); - } - tmpLocaleID=localeID; - } - - origLocaleID=tmpLocaleID; - - /* if we are doing a full canonicalization, then put results in - localeBuffer, if necessary; otherwise send them to result. */ - if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ - (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) { - name = localeBuffer; - nameCapacity = (int32_t)sizeof(localeBuffer); - } else { - name = result; - nameCapacity = resultCapacity; - } - - /* get all pieces, one after another, and separate with '_' */ - len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); - - if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { - const char *d = uloc_getDefault(); - - len = (int32_t)uprv_strlen(d); - - if (name != NULL) { - uprv_strncpy(name, d, len); - } - } else if(_isIDSeparator(*tmpLocaleID)) { - const char *scriptID; - - ++fieldCount; - if(len 0) { - /* Found optional script */ - tmpLocaleID = scriptID; - ++fieldCount; - len+=scriptSize; - if (_isIDSeparator(*tmpLocaleID)) { - /* If there is something else, then we add the _ */ - if(len 0) { - /* Found optional country */ - tmpLocaleID = cntryID; - len+=cntrySize; - } - if(_isIDSeparator(*tmpLocaleID)) { - /* If there is something else, then we add the _ if we found country before. */ - if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { - ++fieldCount; - if(len 0) { - variant = len 0)) { - do { - if(len 0)); - if (posixVariantSize > 0) { - if (variant == NULL) { - variant = name+len; - } - len += posixVariantSize; - variantSize += posixVariantSize; - } - } - - /* Handle generic variants first */ - if (variant) { - for (j=0; j 0) { - if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ - --len; - } - addKeyword = VARIANT_MAP[j].keyword; - addValue = VARIANT_MAP[j].value; - break; - } - } - if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ - --len; - } - } - - /* Look up the ID in the canonicalization map */ - for (j=0; j keywordAssign)) { - if(len 0) - { - // Windows found an LCID, return that - return lcid; - } - - uloc_getLanguage(localeID, langID, sizeof(langID), &status); - if (U_FAILURE(status)) { - return 0; - } - - if (uprv_strchr(localeID, '@')) { - // uprv_convertToLCID does not support keywords other than collation. - // Remove all keywords except collation. - int32_t len; - char collVal[ULOC_KEYWORDS_CAPACITY]; - char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; - - len = uloc_getKeywordValue(localeID, "collation", collVal, - UPRV_LENGTHOF(collVal) - 1, &status); - - if (U_SUCCESS(status) && len > 0) { - collVal[len] = 0; - - len = uloc_getBaseName(localeID, tmpLocaleID, - UPRV_LENGTHOF(tmpLocaleID) - 1, &status); - - if (U_SUCCESS(status) && len > 0) { - tmpLocaleID[len] = 0; - - len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, - UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status); - - if (U_SUCCESS(status) && len > 0) { - tmpLocaleID[len] = 0; - return uprv_convertToLCID(langID, tmpLocaleID, &status); - } - } - } - - // fall through - all keywords are simply ignored - status = U_ZERO_ERROR; - } - - return uprv_convertToLCID(langID, localeID, &status); -} - -U_CAPI int32_t U_EXPORT2 -uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, - UErrorCode *status) -{ - return uprv_convertToPosix(hostid, locale, localeCapacity, status); -} - -/* ### Default locale **************************************************/ - -U_CAPI const char* U_EXPORT2 -uloc_getDefault() -{ - return locale_get_default(); -} - -U_CAPI void U_EXPORT2 -uloc_setDefault(const char* newDefaultLocale, - UErrorCode* err) -{ - if (U_FAILURE(*err)) - return; - /* the error code isn't currently used for anything by this function*/ - - /* propagate change to C++ */ - locale_set_default(newDefaultLocale); -} - -/** - * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer - * to an array of pointers to arrays of char. All of these pointers are owned - * by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - */ -U_CAPI const char* const* U_EXPORT2 -uloc_getISOLanguages() -{ - return LANGUAGES; -} - -/** - * Returns a list of all 2-letter country codes defined in ISO 639. This is a - * pointer to an array of pointers to arrays of char. All of these pointers are - * owned by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - */ -U_CAPI const char* const* U_EXPORT2 -uloc_getISOCountries() -{ - return COUNTRIES; -} - - -/* this function to be moved into cstring.c later */ -static char gDecimal = 0; - -static /* U_CAPI */ -double -/* U_EXPORT2 */ -_uloc_strtod(const char *start, char **end) { - char *decimal; - char *myEnd; - char buf[30]; - double rv; - if (!gDecimal) { - char rep[5]; - /* For machines that decide to change the decimal on you, - and try to be too smart with localization. - This normally should be just a '.'. */ - sprintf(rep, "%+1.1f", 1.0); - gDecimal = rep[2]; - } - - if(gDecimal == '.') { - return uprv_strtod(start, end); /* fall through to OS */ - } else { - uprv_strncpy(buf, start, 29); - buf[29]=0; - decimal = uprv_strchr(buf, '.'); - if(decimal) { - *decimal = gDecimal; - } else { - return uprv_strtod(start, end); /* no decimal point */ - } - rv = uprv_strtod(buf, &myEnd); - if(end) { - *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ - } - return rv; - } -} - -typedef struct { - float q; - int32_t dummy; /* to avoid uninitialized memory copy from qsort */ - char locale[ULOC_FULLNAME_CAPACITY+1]; -} _acceptLangItem; - -static int32_t U_CALLCONV -uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b) -{ - const _acceptLangItem *aa = (const _acceptLangItem*)a; - const _acceptLangItem *bb = (const _acceptLangItem*)b; - - int32_t rc = 0; - if(bb->q < aa->q) { - rc = -1; /* A > B */ - } else if(bb->q > aa->q) { - rc = 1; /* A < B */ - } else { - rc = 0; /* A = B */ - } - - if(rc==0) { - rc = uprv_stricmp(aa->locale, bb->locale); - } - -#if defined(ULOC_DEBUG) - /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", - aa->locale, aa->q, - bb->locale, bb->q, - rc);*/ -#endif - - return rc; -} - -/* -mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 -*/ - -U_CAPI int32_t U_EXPORT2 -uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, - const char *httpAcceptLanguage, - UEnumeration* availableLocales, - UErrorCode *status) -{ - MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items. - char tmp[ULOC_FULLNAME_CAPACITY +1]; - int32_t n = 0; - const char *itemEnd; - const char *paramEnd; - const char *s; - const char *t; - int32_t res; - int32_t i; - int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); - - if(U_FAILURE(*status)) { - return -1; - } - - for(s=httpAcceptLanguage;s&&*s;) { - while(isspace(*s)) /* eat space at the beginning */ - s++; - itemEnd=uprv_strchr(s,','); - paramEnd=uprv_strchr(s,';'); - if(!itemEnd) { - itemEnd = httpAcceptLanguage+l; /* end of string */ - } - if(paramEnd && paramEnds)&&isspace(*t);t--) - ; - int32_t slen = static_cast(((t+1)-s)); - if(slen > ULOC_FULLNAME_CAPACITY) { - *status = U_BUFFER_OVERFLOW_ERROR; - return -1; // too big - } - uprv_strncpy(items[n].locale, s, slen); - items[n].locale[slen]=0; // terminate - int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status); - if(U_FAILURE(*status)) return -1; - if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) { - // canonicalization had an effect- copy back - uprv_strncpy(items[n].locale, tmp, clen); - items[n].locale[clen] = 0; // terminate - } -#if defined(ULOC_DEBUG) - /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ -#endif - n++; - s = itemEnd; - while(*s==',') { /* eat duplicate commas */ - s++; - } - if(n>=items.getCapacity()) { // If we need more items - if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) { - *status = U_MEMORY_ALLOCATION_ERROR; - return -1; - } -#if defined(ULOC_DEBUG) - fprintf(stderr,"malloced at size %d\n", items.getCapacity()); -#endif - } - } - uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); - if (U_FAILURE(*status)) { - return -1; - } - LocalMemory strs(NULL); - if (strs.allocateInsteadAndReset(n) == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return -1; - } - for(i=0;i q <%g>\n", i, j[i].locale, j[i].q);*/ -#endif - strs[i]=items[i].locale; - } - res = uloc_acceptLanguage(result, resultAvailable, outResult, - strs.getAlias(), n, availableLocales, status); - return res; -} - - -U_CAPI int32_t U_EXPORT2 -uloc_acceptLanguage(char *result, int32_t resultAvailable, - UAcceptResult *outResult, const char **acceptList, - int32_t acceptListCount, - UEnumeration* availableLocales, - UErrorCode *status) -{ - int32_t i,j; - int32_t len; - int32_t maxLen=0; - char tmp[ULOC_FULLNAME_CAPACITY+1]; - const char *l; - char **fallbackList; - if(U_FAILURE(*status)) { - return -1; - } - fallbackList = static_cast(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount))); - if(fallbackList==NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return -1; - } - for(i=0;i0) { - uprv_strncpy(result, l, uprv_min(len, resultAvailable)); - } - for(j=0;jmaxLen) { - maxLen = len; - } - } - uenum_reset(availableLocales, status); - /* save off parent info */ - if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) { - fallbackList[i] = uprv_strdup(tmp); - } else { - fallbackList[i]=0; - } - } - - for(maxLen--;maxLen>0;maxLen--) { - for(i=0;i0) { - uprv_strncpy(result, l, uprv_min(len, resultAvailable)); - } - for(j=0;jtypeMap != NULL) { - uhash_close(keyData->typeMap); - } - uprv_free(keyData); -} - -static void U_CALLCONV -uloc_deleteTypeEntry(void* obj) { - uprv_free(obj); -} - -U_CDECL_END - - -static void U_CALLCONV -initFromResourceBundle(UErrorCode& sts) { - U_NAMESPACE_USE - ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup); - - gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts); - - LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts)); - LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts)); - LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts)); - - if (U_FAILURE(sts)) { - return; - } - - UErrorCode tmpSts = U_ZERO_ERROR; - LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts)); - tmpSts = U_ZERO_ERROR; - LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts)); - - // initialize vectors storing dynamically allocated objects - gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts); - if (gKeyTypeStringPool == NULL) { - if (U_SUCCESS(sts)) { - sts = U_MEMORY_ALLOCATION_ERROR; - } - } - if (U_FAILURE(sts)) { - return; - } - gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts); - if (gLocExtKeyDataEntries == NULL) { - if (U_SUCCESS(sts)) { - sts = U_MEMORY_ALLOCATION_ERROR; - } - } - if (U_FAILURE(sts)) { - return; - } - gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts); - if (gLocExtTypeEntries == NULL) { - if (U_SUCCESS(sts)) { - sts = U_MEMORY_ALLOCATION_ERROR; - } - } - if (U_FAILURE(sts)) { - return; - } - - // iterate through keyMap resource - LocalUResourceBundlePointer keyMapEntry; - - while (ures_hasNext(keyMapRes.getAlias())) { - keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts)); - if (U_FAILURE(sts)) { - break; - } - const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias()); - int32_t bcpKeyIdLen = 0; - const UChar* uBcpKeyId = ures_getString(keyMapEntry.getAlias(), &bcpKeyIdLen, &sts); - if (U_FAILURE(sts)) { - break; - } - - // empty value indicates that BCP key is same with the legacy key. - const char* bcpKeyId = legacyKeyId; - if (bcpKeyIdLen > 0) { - char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1); - if (bcpKeyIdBuf == NULL) { - sts = U_MEMORY_ALLOCATION_ERROR; - break; - } - u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen); - bcpKeyIdBuf[bcpKeyIdLen] = 0; - gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts); - if (U_FAILURE(sts)) { - break; - } - bcpKeyId = bcpKeyIdBuf; - } - - UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0; - - UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts); - if (U_FAILURE(sts)) { - break; - } - uint32_t specialTypes = SPECIALTYPE_NONE; - - LocalUResourceBundlePointer typeAliasResByKey; - LocalUResourceBundlePointer bcpTypeAliasResByKey; - - if (typeAliasRes.isValid()) { - tmpSts = U_ZERO_ERROR; - typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts)); - if (U_FAILURE(tmpSts)) { - typeAliasResByKey.orphan(); - } - } - if (bcpTypeAliasRes.isValid()) { - tmpSts = U_ZERO_ERROR; - bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts)); - if (U_FAILURE(tmpSts)) { - bcpTypeAliasResByKey.orphan(); - } - } - - // look up type map for the key, and walk through the mapping data - tmpSts = U_ZERO_ERROR; - LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts)); - if (U_FAILURE(tmpSts)) { - // type map for each key must exist - U_ASSERT(FALSE); - } else { - LocalUResourceBundlePointer typeMapEntry; - - while (ures_hasNext(typeMapResByKey.getAlias())) { - typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts)); - if (U_FAILURE(sts)) { - break; - } - const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias()); - - // special types - if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) { - specialTypes |= SPECIALTYPE_CODEPOINTS; - continue; - } - if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) { - specialTypes |= SPECIALTYPE_REORDER_CODE; - continue; - } - if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) { - specialTypes |= SPECIALTYPE_RG_KEY_VALUE; - continue; - } - - if (isTZ) { - // a timezone key uses a colon instead of a slash in the resource. - // e.g. America:Los_Angeles - if (uprv_strchr(legacyTypeId, ':') != NULL) { - int32_t legacyTypeIdLen = static_cast(uprv_strlen(legacyTypeId)); - char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1); - if (legacyTypeIdBuf == NULL) { - sts = U_MEMORY_ALLOCATION_ERROR; - break; - } - const char* p = legacyTypeId; - char* q = legacyTypeIdBuf; - while (*p) { - if (*p == ':') { - *q++ = '/'; - } else { - *q++ = *p; - } - p++; - } - *q = 0; - - gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts); - if (U_FAILURE(sts)) { - break; - } - legacyTypeId = legacyTypeIdBuf; - } - } - - int32_t bcpTypeIdLen = 0; - const UChar* uBcpTypeId = ures_getString(typeMapEntry.getAlias(), &bcpTypeIdLen, &sts); - if (U_FAILURE(sts)) { - break; - } - - // empty value indicates that BCP type is same with the legacy type. - const char* bcpTypeId = legacyTypeId; - if (bcpTypeIdLen > 0) { - char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1); - if (bcpTypeIdBuf == NULL) { - sts = U_MEMORY_ALLOCATION_ERROR; - break; - } - u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen); - bcpTypeIdBuf[bcpTypeIdLen] = 0; - gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts); - if (U_FAILURE(sts)) { - break; - } - bcpTypeId = bcpTypeIdBuf; - } - - // Note: legacy type value should never be - // equivalent to bcp type value of a different - // type under the same key. So we use a single - // map for lookup. - LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType)); - if (t == NULL) { - sts = U_MEMORY_ALLOCATION_ERROR; - break; - } - t->bcpId = bcpTypeId; - t->legacyId = legacyTypeId; - gLocExtTypeEntries->addElement((void*)t, sts); - if (U_FAILURE(sts)) { - break; - } - - uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts); - if (bcpTypeId != legacyTypeId) { - // different type value - uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts); - } - if (U_FAILURE(sts)) { - break; - } - - // also put aliases in the map - if (typeAliasResByKey.isValid()) { - LocalUResourceBundlePointer typeAliasDataEntry; - - ures_resetIterator(typeAliasResByKey.getAlias()); - while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) { - int32_t toLen; - typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts)); - const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts); - if (U_FAILURE(sts)) { - break; - } - // check if this is an alias of canoncal legacy type - if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) { - const char* from = ures_getKey(typeAliasDataEntry.getAlias()); - if (isTZ) { - // replace colon with slash if necessary - if (uprv_strchr(from, ':') != NULL) { - int32_t fromLen = static_cast(uprv_strlen(from)); - char* fromBuf = (char*)uprv_malloc(fromLen + 1); - if (fromBuf == NULL) { - sts = U_MEMORY_ALLOCATION_ERROR; - break; - } - const char* p = from; - char* q = fromBuf; - while (*p) { - if (*p == ':') { - *q++ = '/'; - } else { - *q++ = *p; - } - p++; - } - *q = 0; - - gKeyTypeStringPool->addElement(fromBuf, sts); - if (U_FAILURE(sts)) { - break; - } - from = fromBuf; - } - } - uhash_put(typeDataMap, (void*)from, t, &sts); - } - } - if (U_FAILURE(sts)) { - break; - } - } - - if (bcpTypeAliasResByKey.isValid()) { - LocalUResourceBundlePointer bcpTypeAliasDataEntry; - - ures_resetIterator(bcpTypeAliasResByKey.getAlias()); - while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) { - int32_t toLen; - bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts)); - const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts); - if (U_FAILURE(sts)) { - break; - } - // check if this is an alias of bcp type - if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) { - const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias()); - uhash_put(typeDataMap, (void*)from, t, &sts); - } - } - if (U_FAILURE(sts)) { - break; - } - } - } - } - if (U_FAILURE(sts)) { - break; - } - - LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData)); - if (keyData == NULL) { - sts = U_MEMORY_ALLOCATION_ERROR; - break; - } - keyData->bcpId = bcpKeyId; - keyData->legacyId = legacyKeyId; - keyData->specialTypes = specialTypes; - keyData->typeMap = typeDataMap; - - gLocExtKeyDataEntries->addElement((void*)keyData, sts); - if (U_FAILURE(sts)) { - break; - } - - uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts); - if (legacyKeyId != bcpKeyId) { - // different key value - uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts); - } - if (U_FAILURE(sts)) { - break; - } - } -} - -static UBool -init() { - UErrorCode sts = U_ZERO_ERROR; - umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts); - if (U_FAILURE(sts)) { - return FALSE; - } - return TRUE; -} - -static UBool -isSpecialTypeCodepoints(const char* val) { - int32_t subtagLen = 0; - const char* p = val; - while (*p) { - if (*p == '-') { - if (subtagLen < 4 || subtagLen > 6) { - return FALSE; - } - subtagLen = 0; - } else if ((*p >= '0' && *p <= '9') || - (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous - (*p >= 'a' && *p <= 'f')) { // also in EBCDIC - subtagLen++; - } else { - return FALSE; - } - p++; - } - return (subtagLen >= 4 && subtagLen <= 6); -} - -static UBool -isSpecialTypeReorderCode(const char* val) { - int32_t subtagLen = 0; - const char* p = val; - while (*p) { - if (*p == '-') { - if (subtagLen < 3 || subtagLen > 8) { - return FALSE; - } - subtagLen = 0; - } else if (uprv_isASCIILetter(*p)) { - subtagLen++; - } else { - return FALSE; - } - p++; - } - return (subtagLen >=3 && subtagLen <=8); -} - -static UBool -isSpecialTypeRgKeyValue(const char* val) { - int32_t subtagLen = 0; - const char* p = val; - while (*p) { - if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) || - (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) { - subtagLen++; - } else { - return FALSE; - } - p++; - } - return (subtagLen == 6); -} - -U_CFUNC const char* -ulocimp_toBcpKey(const char* key) { - if (!init()) { - return NULL; - } - - LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); - if (keyData != NULL) { - return keyData->bcpId; - } - return NULL; -} - -U_CFUNC const char* -ulocimp_toLegacyKey(const char* key) { - if (!init()) { - return NULL; - } - - LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); - if (keyData != NULL) { - return keyData->legacyId; - } - return NULL; -} - -U_CFUNC const char* -ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) { - if (isKnownKey != NULL) { - *isKnownKey = FALSE; - } - if (isSpecialType != NULL) { - *isSpecialType = FALSE; - } - - if (!init()) { - return NULL; - } - - LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); - if (keyData != NULL) { - if (isKnownKey != NULL) { - *isKnownKey = TRUE; - } - LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type); - if (t != NULL) { - return t->bcpId; - } - if (keyData->specialTypes != SPECIALTYPE_NONE) { - UBool matched = FALSE; - if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) { - matched = isSpecialTypeCodepoints(type); - } - if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) { - matched = isSpecialTypeReorderCode(type); - } - if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) { - matched = isSpecialTypeRgKeyValue(type); - } - if (matched) { - if (isSpecialType != NULL) { - *isSpecialType = TRUE; - } - return type; - } - } - } - return NULL; -} - - -U_CFUNC const char* -ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) { - if (isKnownKey != NULL) { - *isKnownKey = FALSE; - } - if (isSpecialType != NULL) { - *isSpecialType = FALSE; - } - - if (!init()) { - return NULL; - } - - LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); - if (keyData != NULL) { - if (isKnownKey != NULL) { - *isKnownKey = TRUE; - } - LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type); - if (t != NULL) { - return t->legacyId; - } - if (keyData->specialTypes != SPECIALTYPE_NONE) { - UBool matched = FALSE; - if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) { - matched = isSpecialTypeCodepoints(type); - } - if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) { - matched = isSpecialTypeReorderCode(type); - } - if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) { - matched = isSpecialTypeRgKeyValue(type); - } - if (matched) { - if (isSpecialType != NULL) { - *isSpecialType = TRUE; - } - return type; - } - } - } - return NULL; -} diff --git a/deps/node/deps/icu-small/source/common/uloc_tag.cpp b/deps/node/deps/icu-small/source/common/uloc_tag.cpp deleted file mode 100644 index 8120331c..00000000 --- a/deps/node/deps/icu-small/source/common/uloc_tag.cpp +++ /dev/null @@ -1,2884 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2009-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/ures.h" -#include "unicode/putil.h" -#include "unicode/uloc.h" -#include "ustr_imp.h" -#include "charstr.h" -#include "cmemory.h" -#include "cstring.h" -#include "putilimp.h" -#include "uinvchar.h" -#include "ulocimp.h" -#include "uvector.h" -#include "uassert.h" - - -/* struct holding a single variant */ -typedef struct VariantListEntry { - const char *variant; - struct VariantListEntry *next; -} VariantListEntry; - -/* struct holding a single attribute value */ -typedef struct AttributeListEntry { - const char *attribute; - struct AttributeListEntry *next; -} AttributeListEntry; - -/* struct holding a single extension */ -typedef struct ExtensionListEntry { - const char *key; - const char *value; - struct ExtensionListEntry *next; -} ExtensionListEntry; - -#define MAXEXTLANG 3 -typedef struct ULanguageTag { - char *buf; /* holding parsed subtags */ - const char *language; - const char *extlang[MAXEXTLANG]; - const char *script; - const char *region; - VariantListEntry *variants; - ExtensionListEntry *extensions; - const char *privateuse; - const char *grandfathered; -} ULanguageTag; - -#define MINLEN 2 -#define SEP '-' -#define PRIVATEUSE 'x' -#define LDMLEXT 'u' - -#define LOCALE_SEP '_' -#define LOCALE_EXT_SEP '@' -#define LOCALE_KEYWORD_SEP ';' -#define LOCALE_KEY_TYPE_SEP '=' - -#define ISALPHA(c) uprv_isASCIILetter(c) -#define ISNUMERIC(c) ((c)>='0' && (c)<='9') - -static const char EMPTY[] = ""; -static const char LANG_UND[] = "und"; -static const char PRIVATEUSE_KEY[] = "x"; -static const char _POSIX[] = "_POSIX"; -static const char POSIX_KEY[] = "va"; -static const char POSIX_VALUE[] = "posix"; -static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; -static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; -static const char LOCALE_TYPE_YES[] = "yes"; - -#define LANG_UND_LEN 3 - -/* - Updated on 2018-09-12 from - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . - - This table has 2 parts. The parts for Grandfathered tags is generated by the - following scripts from the IANA language tag registry. - - curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ - egrep -A 7 'Type: grandfathered' | \ - egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \ - awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\ - tr 'A-Z' 'a-z' - - - The 2nd part is made of five ICU-specific entries. They're kept for - the backward compatibility for now, even though there are no preferred - values. They may have to be removed for the strict BCP 47 compliance. - -*/ -static const char* const GRANDFATHERED[] = { -/* grandfathered preferred */ - "art-lojban", "jbo", - "en-gb-oed", "en-gb-oxendict", - "i-ami", "ami", - "i-bnn", "bnn", - "i-hak", "hak", - "i-klingon", "tlh", - "i-lux", "lb", - "i-navajo", "nv", - "i-pwn", "pwn", - "i-tao", "tao", - "i-tay", "tay", - "i-tsu", "tsu", - "no-bok", "nb", - "no-nyn", "nn", - "sgn-be-fr", "sfb", - "sgn-be-nl", "vgt", - "sgn-ch-de", "sgg", - "zh-guoyu", "cmn", - "zh-hakka", "hak", - "zh-min-nan", "nan", - "zh-xiang", "hsn", - - // Grandfathered tags with no preferred value in the IANA - // registry. Kept for now for the backward compatibility - // because ICU has mapped them this way. - "cel-gaulish", "xtg-x-cel-gaulish", - "i-default", "en-x-i-default", - "i-enochian", "und-x-i-enochian", - "i-mingo", "see-x-i-mingo", - "zh-min", "nan-x-zh-min", -}; - -/* - Updated on 2018-09-12 from - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . - - The table lists redundant tags with preferred value in the IANA languate tag registry. - It's generated with the following command: - - curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ - grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \ - awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \ - tr 'A-Z' 'a-z' - - In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because - a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'. -*/ - -static const char* const REDUNDANT[] = { -// redundant preferred - "sgn-br", "bzs", - "sgn-co", "csn", - "sgn-de", "gsg", - "sgn-dk", "dsl", - "sgn-es", "ssp", - "sgn-fr", "fsl", - "sgn-gb", "bfi", - "sgn-gr", "gss", - "sgn-ie", "isg", - "sgn-it", "ise", - "sgn-jp", "jsl", - "sgn-mx", "mfs", - "sgn-ni", "ncs", - "sgn-nl", "dse", - "sgn-no", "nsl", - "sgn-pt", "psr", - "sgn-se", "swl", - "sgn-us", "ase", - "sgn-za", "sfs", - "zh-cmn", "cmn", - "zh-cmn-hans", "cmn-hans", - "zh-cmn-hant", "cmn-hant", - "zh-gan", "gan", - "zh-wuu", "wuu", - "zh-yue", "yue", - - // variant tag with preferred value - "ja-latn-hepburn-heploc", "ja-latn-alalc97", -}; - -/* - Updated on 2018-09-12 from - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . - - grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \ - grep -B1 'Preferred' | grep -v '^--' | \ - awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' - - Make sure that 2-letter language subtags come before 3-letter subtags. -*/ -static const char DEPRECATEDLANGS[][4] = { -/* deprecated new */ - "in", "id", - "iw", "he", - "ji", "yi", - "jw", "jv", - "mo", "ro", - "aam", "aas", - "adp", "dz", - "aue", "ktz", - "ayx", "nun", - "bgm", "bcg", - "bjd", "drl", - "ccq", "rki", - "cjr", "mom", - "cka", "cmr", - "cmk", "xch", - "coy", "pij", - "cqu", "quh", - "drh", "khk", - "drw", "prs", - "gav", "dev", - "gfx", "vaj", - "ggn", "gvr", - "gti", "nyc", - "guv", "duz", - "hrr", "jal", - "ibi", "opa", - "ilw", "gal", - "jeg", "oyb", - "kgc", "tdf", - "kgh", "kml", - "koj", "kwv", - "krm", "bmf", - "ktr", "dtp", - "kvs", "gdj", - "kwq", "yam", - "kxe", "tvd", - "kzj", "dtp", - "kzt", "dtp", - "lii", "raq", - "lmm", "rmx", - "meg", "cir", - "mst", "mry", - "mwj", "vaj", - "myt", "mry", - "nad", "xny", - "ncp", "kdz", - "nnx", "ngv", - "nts", "pij", - "oun", "vaj", - "pcr", "adx", - "pmc", "huw", - "pmu", "phr", - "ppa", "bfy", - "ppr", "lcq", - "pry", "prt", - "puz", "pub", - "sca", "hle", - "skk", "oyb", - "tdu", "dtp", - "thc", "tpo", - "thx", "oyb", - "tie", "ras", - "tkk", "twm", - "tlw", "weo", - "tmp", "tyj", - "tne", "kak", - "tnf", "prs", - "tsf", "taj", - "uok", "ema", - "xba", "cax", - "xia", "acn", - "xkh", "waw", - "xsj", "suj", - "ybd", "rki", - "yma", "lrr", - "ymt", "mtm", - "yos", "zom", - "yuu", "yug", -}; - -/* - Updated on 2018-04-24 from - - curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \ - grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \ - grep -B1 'Preferred' | \ - awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' -*/ -static const char DEPRECATEDREGIONS[][3] = { -/* deprecated new */ - "BU", "MM", - "DD", "DE", - "FX", "FR", - "TP", "TL", - "YD", "YE", - "ZR", "CD", -}; - -/* -* ------------------------------------------------- -* -* These ultag_ functions may be exposed as APIs later -* -* ------------------------------------------------- -*/ - -static ULanguageTag* -ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); - -static void -ultag_close(ULanguageTag* langtag); - -static const char* -ultag_getLanguage(const ULanguageTag* langtag); - -#if 0 -static const char* -ultag_getJDKLanguage(const ULanguageTag* langtag); -#endif - -static const char* -ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getExtlangSize(const ULanguageTag* langtag); - -static const char* -ultag_getScript(const ULanguageTag* langtag); - -static const char* -ultag_getRegion(const ULanguageTag* langtag); - -static const char* -ultag_getVariant(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getVariantsSize(const ULanguageTag* langtag); - -static const char* -ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); - -static const char* -ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getExtensionsSize(const ULanguageTag* langtag); - -static const char* -ultag_getPrivateUse(const ULanguageTag* langtag); - -#if 0 -static const char* -ultag_getGrandfathered(const ULanguageTag* langtag); -#endif - -namespace { - -// Helper class to memory manage CharString objects. -// Only ever stack-allocated, does not need to inherit UMemory. -class CharStringPool { -public: - CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {} - ~CharStringPool() = default; - - CharStringPool(const CharStringPool&) = delete; - CharStringPool& operator=(const CharStringPool&) = delete; - - icu::CharString* create() { - if (U_FAILURE(status)) { - return nullptr; - } - icu::CharString* const obj = new icu::CharString; - if (obj == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - pool.addElement(obj, status); - if (U_FAILURE(status)) { - delete obj; - return nullptr; - } - return obj; - } - -private: - static void U_CALLCONV deleter(void* obj) { - delete static_cast(obj); - } - - UErrorCode status; - icu::UVector pool; -}; - -} // namespace - -/* -* ------------------------------------------------- -* -* Language subtag syntax validation functions -* -* ------------------------------------------------- -*/ - -static UBool -_isAlphaString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISALPHA(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isNumericString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISNUMERIC(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isAlphaNumericString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isLanguageSubtag(const char* s, int32_t len) { - /* - * language = 2*3ALPHA ; shortest ISO 639 code - * ["-" extlang] ; sometimes followed by - * ; extended language subtags - * / 4ALPHA ; or reserved for future use - * / 5*8ALPHA ; or registered language subtag - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtlangSubtag(const char* s, int32_t len) { - /* - * extlang = 3ALPHA ; selected ISO 639 codes - * *2("-" 3ALPHA) ; permanently reserved - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 3 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isScriptSubtag(const char* s, int32_t len) { - /* - * script = 4ALPHA ; ISO 15924 code - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 4 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isRegionSubtag(const char* s, int32_t len) { - /* - * region = 2ALPHA ; ISO 3166-1 code - * / 3DIGIT ; UN M.49 code - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 2 && _isAlphaString(s, len)) { - return TRUE; - } - if (len == 3 && _isNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isVariantSubtag(const char* s, int32_t len) { - /* - * variant = 5*8alphanum ; registered variants - * / (DIGIT 3alphanum) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isPrivateuseVariantSubtag(const char* s, int32_t len) { - /* - * variant = 1*8alphanum ; registered variants - * / (DIGIT 3alphanum) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSingleton(const char* s, int32_t len) { - /* - * extension = singleton 1*("-" (2*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSubtag(const char* s, int32_t len) { - /* - * extension = singleton 1*("-" (2*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSubtags(const char* s, int32_t len) { - const char *p = s; - const char *pSubtag = NULL; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - while ((p - s) < len) { - if (*p == SEP) { - if (pSubtag == NULL) { - return FALSE; - } - if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { - return FALSE; - } - pSubtag = NULL; - } else if (pSubtag == NULL) { - pSubtag = p; - } - p++; - } - if (pSubtag == NULL) { - return FALSE; - } - return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); -} - -static UBool -_isPrivateuseValueSubtag(const char* s, int32_t len) { - /* - * privateuse = "x" 1*("-" (1*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isPrivateuseValueSubtags(const char* s, int32_t len) { - const char *p = s; - const char *pSubtag = NULL; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - while ((p - s) < len) { - if (*p == SEP) { - if (pSubtag == NULL) { - return FALSE; - } - if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { - return FALSE; - } - pSubtag = NULL; - } else if (pSubtag == NULL) { - pSubtag = p; - } - p++; - } - if (pSubtag == NULL) { - return FALSE; - } - return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); -} - -U_CFUNC UBool -ultag_isUnicodeLocaleKey(const char* s, int32_t len) { - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 2 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -U_CFUNC UBool -ultag_isUnicodeLocaleType(const char*s, int32_t len) { - const char* p; - int32_t subtagLen = 0; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - for (p = s; len > 0; p++, len--) { - if (*p == SEP) { - if (subtagLen < 3) { - return FALSE; - } - subtagLen = 0; - } else if (ISALPHA(*p) || ISNUMERIC(*p)) { - subtagLen++; - if (subtagLen > 8) { - return FALSE; - } - } else { - return FALSE; - } - } - - return (subtagLen >= 3); -} -/* -* ------------------------------------------------- -* -* Helper functions -* -* ------------------------------------------------- -*/ - -static UBool -_addVariantToList(VariantListEntry **first, VariantListEntry *var) { - UBool bAdded = TRUE; - - if (*first == NULL) { - var->next = NULL; - *first = var; - } else { - VariantListEntry *prev, *cur; - int32_t cmp; - - /* variants order should be preserved */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = var; - var->next = NULL; - break; - } - - /* Checking for duplicate variant */ - cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); - if (cmp == 0) { - /* duplicated variant */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - -static UBool -_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { - UBool bAdded = TRUE; - - if (*first == NULL) { - attr->next = NULL; - *first = attr; - } else { - AttributeListEntry *prev, *cur; - int32_t cmp; - - /* reorder variants in alphabetical order */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = attr; - attr->next = NULL; - break; - } - cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); - if (cmp < 0) { - if (prev == NULL) { - *first = attr; - } else { - prev->next = attr; - } - attr->next = cur; - break; - } - if (cmp == 0) { - /* duplicated variant */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - - -static UBool -_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { - UBool bAdded = TRUE; - - if (*first == NULL) { - ext->next = NULL; - *first = ext; - } else { - ExtensionListEntry *prev, *cur; - int32_t cmp; - - /* reorder variants in alphabetical order */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = ext; - ext->next = NULL; - break; - } - if (localeToBCP) { - /* special handling for locale to bcp conversion */ - int32_t len, curlen; - - len = (int32_t)uprv_strlen(ext->key); - curlen = (int32_t)uprv_strlen(cur->key); - - if (len == 1 && curlen == 1) { - if (*(ext->key) == *(cur->key)) { - cmp = 0; - } else if (*(ext->key) == PRIVATEUSE) { - cmp = 1; - } else if (*(cur->key) == PRIVATEUSE) { - cmp = -1; - } else { - cmp = *(ext->key) - *(cur->key); - } - } else if (len == 1) { - cmp = *(ext->key) - LDMLEXT; - } else if (curlen == 1) { - cmp = LDMLEXT - *(cur->key); - } else { - cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); - /* Both are u extension keys - we need special handling for 'attribute' */ - if (cmp != 0) { - if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { - cmp = 1; - } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { - cmp = -1; - } - } - } - } else { - cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); - } - if (cmp < 0) { - if (prev == NULL) { - *first = ext; - } else { - prev->next = ext; - } - ext->next = cur; - break; - } - if (cmp == 0) { - /* duplicated extension key */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - -static void -_initializeULanguageTag(ULanguageTag* langtag) { - int32_t i; - - langtag->buf = NULL; - - langtag->language = EMPTY; - for (i = 0; i < MAXEXTLANG; i++) { - langtag->extlang[i] = NULL; - } - - langtag->script = EMPTY; - langtag->region = EMPTY; - - langtag->variants = NULL; - langtag->extensions = NULL; - - langtag->grandfathered = EMPTY; - langtag->privateuse = EMPTY; -} - -static int32_t -_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_LANG_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - len = 0; - } - - /* Note: returned language code is in lower case letters */ - - if (len == 0) { - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); - } - reslen += LANG_UND_LEN; - } else if (!_isLanguageSubtag(buf, len)) { - /* invalid language code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); - } - reslen += LANG_UND_LEN; - } else { - /* resolve deprecated */ - for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { - // 2-letter deprecated subtags are listede before 3-letter - // ones in DEPRECATEDLANGS[]. Get out of loop on coming - // across the 1st 3-letter subtag, if the input is a 2-letter code. - // to avoid continuing to try when there's no match. - if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break; - if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { - uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); - len = (int32_t)uprv_strlen(buf); - break; - } - } - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_SCRIPT_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - if (!_isScriptSubtag(buf, len)) { - /* invalid script code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_COUNTRY_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - if (!_isRegionSubtag(buf, len)) { - /* invalid region code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - /* resolve deprecated */ - for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) { - if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) { - uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]); - len = (int32_t)uprv_strlen(buf); - break; - } - } - - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { - char buf[ULOC_FULLNAME_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - char *p, *pVar; - UBool bNext = TRUE; - VariantListEntry *var; - VariantListEntry *varFirst = NULL; - - pVar = NULL; - p = buf; - while (bNext) { - if (*p == SEP || *p == LOCALE_SEP || *p == 0) { - if (*p == 0) { - bNext = FALSE; - } else { - *p = 0; /* terminate */ - } - if (pVar == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - /* ignore empty variant */ - } else { - /* ICU uses upper case letters for variants, but - the canonical format is lowercase in BCP47 */ - for (i = 0; *(pVar + i) != 0; i++) { - *(pVar + i) = uprv_tolower(*(pVar + i)); - } - - /* validate */ - if (_isVariantSubtag(pVar, -1)) { - if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) { - /* emit the variant to the list */ - var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); - if (var == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - var->variant = pVar; - if (!_addVariantToList(&varFirst, var)) { - /* duplicated variant */ - uprv_free(var); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } else { - /* Special handling for POSIX variant, need to remember that we had it and then */ - /* treat it like an extension later. */ - *hadPosix = TRUE; - } - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } else if (_isPrivateuseValueSubtag(pVar, -1)) { - /* Handle private use subtags separately */ - break; - } - } - /* reset variant starting position */ - pVar = NULL; - } else if (pVar == NULL) { - pVar = p; - } - p++; - } - - if (U_SUCCESS(*status)) { - if (varFirst != NULL) { - int32_t varLen; - - /* write out validated/normalized variants to the target */ - var = varFirst; - while (var != NULL) { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - varLen = (int32_t)uprv_strlen(var->variant); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); - } - reslen += varLen; - var = var->next; - } - } - } - - /* clean up */ - var = varFirst; - while (var != NULL) { - VariantListEntry *tmpVar = var->next; - uprv_free(var); - var = tmpVar; - } - - if (U_FAILURE(*status)) { - return 0; - } - } - - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; - int32_t attrBufLength = 0; - UEnumeration *keywordEnum = NULL; - int32_t reslen = 0; - - keywordEnum = uloc_openKeywords(localeID, status); - if (U_FAILURE(*status) && !hadPosix) { - uenum_close(keywordEnum); - return 0; - } - if (keywordEnum != NULL || hadPosix) { - /* reorder extensions */ - int32_t len; - const char *key; - ExtensionListEntry *firstExt = NULL; - ExtensionListEntry *ext; - AttributeListEntry *firstAttr = NULL; - AttributeListEntry *attr; - char *attrValue; - CharStringPool extBufPool; - const char *bcpKey=nullptr, *bcpValue=nullptr; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t keylen; - UBool isBcpUExt; - - while (TRUE) { - icu::CharString buf; - key = uenum_next(keywordEnum, NULL, status); - if (key == NULL) { - break; - } - char* buffer; - int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY; - - for (;;) { - buffer = buf.getAppendBuffer( - /*minCapacity=*/resultCapacity, - /*desiredCapacityHint=*/resultCapacity, - resultCapacity, - tmpStatus); - - if (U_FAILURE(tmpStatus)) { - break; - } - - len = uloc_getKeywordValue( - localeID, key, buffer, resultCapacity, &tmpStatus); - - if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - resultCapacity = len; - tmpStatus = U_ZERO_ERROR; - } - - if (U_FAILURE(tmpStatus)) { - if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - /* ignore this keyword */ - tmpStatus = U_ZERO_ERROR; - continue; - } - - buf.append(buffer, len, tmpStatus); - if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. - } - - keylen = (int32_t)uprv_strlen(key); - isBcpUExt = (keylen > 1); - - /* special keyword used for representing Unicode locale attributes */ - if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { - if (len > 0) { - int32_t i = 0; - while (TRUE) { - attrBufLength = 0; - for (; i < len; i++) { - if (buf[i] != '-') { - attrBuf[attrBufLength++] = buf[i]; - } else { - i++; - break; - } - } - if (attrBufLength > 0) { - attrBuf[attrBufLength] = 0; - - } else if (i >= len){ - break; - } - - /* create AttributeListEntry */ - attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); - if (attr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - attrValue = (char*)uprv_malloc(attrBufLength + 1); - if (attrValue == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - uprv_strcpy(attrValue, attrBuf); - attr->attribute = attrValue; - - if (!_addAttributeToList(&firstAttr, attr)) { - uprv_free(attr); - uprv_free(attrValue); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - /* for a place holder ExtensionListEntry */ - bcpKey = LOCALE_ATTRIBUTE_KEY; - bcpValue = NULL; - } - } else if (isBcpUExt) { - bcpKey = uloc_toUnicodeLocaleKey(key); - if (bcpKey == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - - /* we've checked buf is null-terminated above */ - bcpValue = uloc_toUnicodeLocaleType(key, buf.data()); - if (bcpValue == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - if (bcpValue == buf.data()) { - /* - When uloc_toUnicodeLocaleType(key, buf) returns the - input value as is, the value is well-formed, but has - no known mapping. This implementation normalizes the - value to lower case - */ - icu::CharString* extBuf = extBufPool.create(); - if (extBuf == nullptr) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - int32_t bcpValueLen = static_cast(uprv_strlen(bcpValue)); - int32_t resultCapacity; - char* pExtBuf = extBuf->getAppendBuffer( - /*minCapacity=*/bcpValueLen, - /*desiredCapacityHint=*/bcpValueLen, - resultCapacity, - tmpStatus); - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - break; - } - - uprv_strcpy(pExtBuf, bcpValue); - T_CString_toLowerCase(pExtBuf); - - extBuf->append(pExtBuf, bcpValueLen, tmpStatus); - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - break; - } - - bcpValue = extBuf->data(); - } - } else { - if (*key == PRIVATEUSE) { - if (!_isPrivateuseValueSubtags(buf.data(), len)) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - } else { - if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - } - bcpKey = key; - icu::CharString* extBuf = extBufPool.create(); - if (extBuf == nullptr) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - extBuf->append(buf.data(), len, tmpStatus); - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - break; - } - bcpValue = extBuf->data(); - } - - /* create ExtensionListEntry */ - ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (ext == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - ext->key = bcpKey; - ext->value = bcpValue; - - if (!_addExtensionToList(&firstExt, ext, TRUE)) { - uprv_free(ext); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - - /* Special handling for POSIX variant - add the keywords for POSIX */ - if (hadPosix) { - /* create ExtensionListEntry for POSIX */ - ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (ext == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - ext->key = POSIX_KEY; - ext->value = POSIX_VALUE; - - if (!_addExtensionToList(&firstExt, ext, TRUE)) { - uprv_free(ext); - } - } - - if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { - UBool startLDMLExtension = FALSE; - for (ext = firstExt; ext; ext = ext->next) { - if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { - /* first LDML u singlton extension */ - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - if (reslen < capacity) { - *(appendAt + reslen) = LDMLEXT; - } - reslen++; - - startLDMLExtension = TRUE; - } - - /* write out the sorted BCP47 attributes, extensions and private use */ - if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { - /* write the value for the attributes */ - for (attr = firstAttr; attr; attr = attr->next) { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(attr->attribute); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(ext->key); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); - } - reslen += len; - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(ext->value); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - } -cleanup: - /* clean up */ - ext = firstExt; - while (ext != NULL) { - ExtensionListEntry *tmpExt = ext->next; - uprv_free(ext); - ext = tmpExt; - } - - attr = firstAttr; - while (attr != NULL) { - AttributeListEntry *tmpAttr = attr->next; - char *pValue = (char *)attr->attribute; - uprv_free(pValue); - uprv_free(attr); - attr = tmpAttr; - } - - uenum_close(keywordEnum); - - if (U_FAILURE(*status)) { - return 0; - } - } - - return u_terminateChars(appendAt, capacity, reslen, status); -} - -/** - * Append keywords parsed from LDML extension value - * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} - * Note: char* buf is used for storing keywords - */ -static void -_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { - const char *pTag; /* beginning of current subtag */ - const char *pKwds; /* beginning of key-type pairs */ - UBool variantExists = *posixVariant; - - ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ - ExtensionListEntry *kwd, *nextKwd; - - AttributeListEntry *attrFirst = NULL; /* first attribute */ - AttributeListEntry *attr, *nextAttr; - - int32_t len; - int32_t bufIdx = 0; - - char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - int32_t attrBufIdx = 0; - - /* Reset the posixVariant value */ - *posixVariant = FALSE; - - pTag = ldmlext; - pKwds = NULL; - - /* Iterate through u extension attributes */ - while (*pTag) { - /* locate next separator char */ - for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); - - if (ultag_isUnicodeLocaleKey(pTag, len)) { - pKwds = pTag; - break; - } - - /* add this attribute to the list */ - attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); - if (attr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { - uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); - attrBuf[attrBufIdx + len] = 0; - attr->attribute = &attrBuf[attrBufIdx]; - attrBufIdx += (len + 1); - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(attr); - goto cleanup; - } - - if (!_addAttributeToList(&attrFirst, attr)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(attr); - goto cleanup; - } - - /* next tag */ - pTag += len; - if (*pTag) { - /* next to the separator */ - pTag++; - } - } - - if (attrFirst) { - /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ - - if (attrBufIdx > bufSize) { - /* attrBufIdx == + 1 */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - kwd->key = LOCALE_ATTRIBUTE_KEY; - kwd->value = buf; - - /* attribute subtags sorted in alphabetical order as type */ - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - - /* buffer size check is done above */ - if (attr != attrFirst) { - *(buf + bufIdx) = SEP; - bufIdx++; - } - - len = static_cast(uprv_strlen(attr->attribute)); - uprv_memcpy(buf + bufIdx, attr->attribute, len); - bufIdx += len; - - attr = nextAttr; - } - *(buf + bufIdx) = 0; - bufIdx++; - - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(kwd); - goto cleanup; - } - - /* once keyword entry is created, delete the attribute list */ - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - uprv_free(attr); - attr = nextAttr; - } - attrFirst = NULL; - } - - if (pKwds) { - const char *pBcpKey = NULL; /* u extenstion key subtag */ - const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ - int32_t bcpKeyLen = 0; - int32_t bcpTypeLen = 0; - UBool isDone = FALSE; - - pTag = pKwds; - /* BCP47 representation of LDML key/type pairs */ - while (!isDone) { - const char *pNextBcpKey = NULL; - int32_t nextBcpKeyLen = 0; - UBool emitKeyword = FALSE; - - if (*pTag) { - /* locate next separator char */ - for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); - - if (ultag_isUnicodeLocaleKey(pTag, len)) { - if (pBcpKey) { - emitKeyword = TRUE; - pNextBcpKey = pTag; - nextBcpKeyLen = len; - } else { - pBcpKey = pTag; - bcpKeyLen = len; - } - } else { - U_ASSERT(pBcpKey != NULL); - /* within LDML type subtags */ - if (pBcpType) { - bcpTypeLen += (len + 1); - } else { - pBcpType = pTag; - bcpTypeLen = len; - } - } - - /* next tag */ - pTag += len; - if (*pTag) { - /* next to the separator */ - pTag++; - } - } else { - /* processing last one */ - emitKeyword = TRUE; - isDone = TRUE; - } - - if (emitKeyword) { - const char *pKey = NULL; /* LDML key */ - const char *pType = NULL; /* LDML type */ - - char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ - - U_ASSERT(pBcpKey != NULL); - - if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) { - /* the BCP key is invalid */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); - bcpKeyBuf[bcpKeyLen] = 0; - - /* u extension key to LDML key */ - pKey = uloc_toLegacyKey(bcpKeyBuf); - if (pKey == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - if (pKey == bcpKeyBuf) { - /* - The key returned by toLegacyKey points to the input buffer. - We normalize the result key to lower case. - */ - T_CString_toLowerCase(bcpKeyBuf); - if (bufSize - bufIdx - 1 >= bcpKeyLen) { - uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); - pKey = buf + bufIdx; - bufIdx += bcpKeyLen; - *(buf + bufIdx) = 0; - bufIdx++; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - goto cleanup; - } - } - - if (pBcpType) { - char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ - if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) { - /* the BCP type is too long */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); - bcpTypeBuf[bcpTypeLen] = 0; - - /* BCP type to locale type */ - pType = uloc_toLegacyType(pKey, bcpTypeBuf); - if (pType == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - if (pType == bcpTypeBuf) { - /* - The type returned by toLegacyType points to the input buffer. - We normalize the result type to lower case. - */ - /* normalize to lower case */ - T_CString_toLowerCase(bcpTypeBuf); - if (bufSize - bufIdx - 1 >= bcpTypeLen) { - uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); - pType = buf + bufIdx; - bufIdx += bcpTypeLen; - *(buf + bufIdx) = 0; - bufIdx++; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - goto cleanup; - } - } - } else { - /* typeless - default type value is "yes" */ - pType = LOCALE_TYPE_YES; - } - - /* Special handling for u-va-posix, since we want to treat this as a variant, - not as a keyword */ - if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { - *posixVariant = TRUE; - } else { - /* create an ExtensionListEntry for this keyword */ - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - kwd->key = pKey; - kwd->value = pType; - - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - // duplicate keyword is allowed, Only the first - // is honored. - uprv_free(kwd); - } - } - - pBcpKey = pNextBcpKey; - bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; - pBcpType = NULL; - bcpTypeLen = 0; - } - } - } - - kwd = kwdFirst; - while (kwd != NULL) { - nextKwd = kwd->next; - _addExtensionToList(appendTo, kwd, FALSE); - kwd = nextKwd; - } - - return; - -cleanup: - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - uprv_free(attr); - attr = nextAttr; - } - - kwd = kwdFirst; - while (kwd != NULL) { - nextKwd = kwd->next; - uprv_free(kwd); - kwd = nextKwd; - } -} - - -static int32_t -_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { - int32_t reslen = 0; - int32_t i, n; - int32_t len; - ExtensionListEntry *kwdFirst = NULL; - ExtensionListEntry *kwd; - const char *key, *type; - char *kwdBuf = NULL; - int32_t kwdBufLength = capacity; - UBool posixVariant = FALSE; - - if (U_FAILURE(*status)) { - return 0; - } - - kwdBuf = (char*)uprv_malloc(kwdBufLength); - if (kwdBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - /* Determine if variants already exists */ - if (ultag_getVariantsSize(langtag)) { - posixVariant = TRUE; - } - - n = ultag_getExtensionsSize(langtag); - - /* resolve locale keywords and reordering keys */ - for (i = 0; i < n; i++) { - key = ultag_getExtensionKey(langtag, i); - type = ultag_getExtensionValue(langtag, i); - if (*key == LDMLEXT) { - _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); - if (U_FAILURE(*status)) { - break; - } - } else { - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - kwd->key = key; - kwd->value = type; - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - uprv_free(kwd); - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - - if (U_SUCCESS(*status)) { - type = ultag_getPrivateUse(langtag); - if ((int32_t)uprv_strlen(type) > 0) { - /* add private use as a keyword */ - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - kwd->key = PRIVATEUSE_KEY; - kwd->value = type; - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - uprv_free(kwd); - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - } - } - } - - /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ - - if (U_SUCCESS(*status) && posixVariant) { - len = (int32_t) uprv_strlen(_POSIX); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - - if (U_SUCCESS(*status) && kwdFirst != NULL) { - /* write out the sorted keywords */ - UBool firstValue = TRUE; - kwd = kwdFirst; - do { - if (reslen < capacity) { - if (firstValue) { - /* '@' */ - *(appendAt + reslen) = LOCALE_EXT_SEP; - firstValue = FALSE; - } else { - /* ';' */ - *(appendAt + reslen) = LOCALE_KEYWORD_SEP; - } - } - reslen++; - - /* key */ - len = (int32_t)uprv_strlen(kwd->key); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); - } - reslen += len; - - /* '=' */ - if (reslen < capacity) { - *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; - } - reslen++; - - /* type */ - len = (int32_t)uprv_strlen(kwd->value); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); - } - reslen += len; - - kwd = kwd->next; - } while (kwd); - } - - /* clean up */ - kwd = kwdFirst; - while (kwd != NULL) { - ExtensionListEntry *tmpKwd = kwd->next; - uprv_free(kwd); - kwd = tmpKwd; - } - - uprv_free(kwdBuf); - - if (U_FAILURE(*status)) { - return 0; - } - - return u_terminateChars(appendAt, capacity, reslen, status); -} - -static int32_t -_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - (void)hadPosix; - char buf[ULOC_FULLNAME_CAPACITY]; - char tmpAppend[ULOC_FULLNAME_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - char *p, *pPriv; - UBool bNext = TRUE; - UBool firstValue = TRUE; - UBool writeValue; - - pPriv = NULL; - p = buf; - while (bNext) { - writeValue = FALSE; - if (*p == SEP || *p == LOCALE_SEP || *p == 0) { - if (*p == 0) { - bNext = FALSE; - } else { - *p = 0; /* terminate */ - } - if (pPriv != NULL) { - /* Private use in the canonical format is lowercase in BCP47 */ - for (i = 0; *(pPriv + i) != 0; i++) { - *(pPriv + i) = uprv_tolower(*(pPriv + i)); - } - - /* validate */ - if (_isPrivateuseValueSubtag(pPriv, -1)) { - if (firstValue) { - if (!_isVariantSubtag(pPriv, -1)) { - writeValue = TRUE; - } - } else { - writeValue = TRUE; - } - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } else { - break; - } - - if (writeValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - if (firstValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = *PRIVATEUSE_KEY; - } - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); - } - reslen += len; - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - firstValue = FALSE; - } - - len = (int32_t)uprv_strlen(pPriv); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - /* reset private use starting position */ - pPriv = NULL; - } else if (pPriv == NULL) { - pPriv = p; - } - p++; - } - - if (U_FAILURE(*status)) { - return 0; - } - } - - if (U_SUCCESS(*status)) { - len = reslen; - if (reslen < capacity) { - uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); - } - } - - u_terminateChars(appendAt, capacity, reslen, status); - - return reslen; -} - -/* -* ------------------------------------------------- -* -* ultag_ functions -* -* ------------------------------------------------- -*/ - -/* Bit flags used by the parser */ -#define LANG 0x0001 -#define EXTL 0x0002 -#define SCRT 0x0004 -#define REGN 0x0008 -#define VART 0x0010 -#define EXTS 0x0020 -#define EXTV 0x0040 -#define PRIV 0x0080 - -/** - * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing - * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ ) - * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above. - */ -#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) -#pragma optimize( "", off ) -#endif - -static ULanguageTag* -ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { - ULanguageTag *t; - char *tagBuf; - int16_t next; - char *pSubtag, *pNext, *pLastGoodPosition; - int32_t subtagLen; - int32_t extlangIdx; - ExtensionListEntry *pExtension; - char *pExtValueSubtag, *pExtValueSubtagEnd; - int32_t i; - UBool privateuseVar = FALSE; - int32_t grandfatheredLen = 0; - - if (parsedLen != NULL) { - *parsedLen = 0; - } - - if (U_FAILURE(*status)) { - return NULL; - } - - if (tagLen < 0) { - tagLen = (int32_t)uprv_strlen(tag); - } - - /* copy the entire string */ - tagBuf = (char*)uprv_malloc(tagLen + 1); - if (tagBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(tagBuf, tag, tagLen); - *(tagBuf + tagLen) = 0; - - /* create a ULanguageTag */ - t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); - if (t == NULL) { - uprv_free(tagBuf); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - _initializeULanguageTag(t); - t->buf = tagBuf; - - if (tagLen < MINLEN) { - /* the input tag is too short - return empty ULanguageTag */ - return t; - } - - /* check if the tag is grandfathered */ - for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) { - if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { - int32_t newTagLength; - - grandfatheredLen = tagLen; /* back up for output parsedLen */ - newTagLength = static_cast(uprv_strlen(GRANDFATHERED[i+1])); - if (tagLen < newTagLength) { - uprv_free(tagBuf); - tagBuf = (char*)uprv_malloc(newTagLength + 1); - if (tagBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - ultag_close(t); - return NULL; - } - t->buf = tagBuf; - tagLen = newTagLength; - } - uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); - break; - } - } - - size_t parsedLenDelta = 0; - if (grandfatheredLen == 0) { - for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) { - const char* redundantTag = REDUNDANT[i]; - size_t redundantTagLen = uprv_strlen(redundantTag); - // The preferred tag for a redundant tag is always shorter than redundant - // tag. A redundant tag may or may not be followed by other subtags. - // (i.e. "zh-yue" or "zh-yue-u-co-pinyin"). - if (uprv_strnicmp(redundantTag, tagBuf, static_cast(redundantTagLen)) == 0) { - const char* redundantTagEnd = tagBuf + redundantTagLen; - if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) { - const char* preferredTag = REDUNDANT[i + 1]; - size_t preferredTagLen = uprv_strlen(preferredTag); - uprv_strncpy(t->buf, preferredTag, preferredTagLen); - if (*redundantTagEnd == SEP) { - uprv_memmove(tagBuf + preferredTagLen, - redundantTagEnd, - tagLen - redundantTagLen + 1); - } else { - tagBuf[preferredTagLen] = '\0'; - } - // parsedLen should be the length of the input - // before redundantTag is replaced by preferredTag. - // Save the delta to add it back later. - parsedLenDelta = redundantTagLen - preferredTagLen; - break; - } - } - } - } - - /* - * langtag = language - * ["-" script] - * ["-" region] - * *("-" variant) - * *("-" extension) - * ["-" privateuse] - */ - - next = LANG | PRIV; - pNext = pLastGoodPosition = tagBuf; - extlangIdx = 0; - pExtension = NULL; - pExtValueSubtag = NULL; - pExtValueSubtagEnd = NULL; - - while (pNext) { - char *pSep; - - pSubtag = pNext; - - /* locate next separator char */ - pSep = pSubtag; - while (*pSep) { - if (*pSep == SEP) { - break; - } - pSep++; - } - if (*pSep == 0) { - /* last subtag */ - pNext = NULL; - } else { - pNext = pSep + 1; - } - subtagLen = (int32_t)(pSep - pSubtag); - - if (next & LANG) { - if (_isLanguageSubtag(pSubtag, subtagLen)) { - *pSep = 0; /* terminate */ - // TODO: move deprecated language code handling here. - t->language = T_CString_toLowerCase(pSubtag); - - pLastGoodPosition = pSep; - next = SCRT | REGN | VART | EXTS | PRIV; - if (subtagLen <= 3) - next |= EXTL; - continue; - } - } - if (next & EXTL) { - if (_isExtlangSubtag(pSubtag, subtagLen)) { - *pSep = 0; - t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); - - pLastGoodPosition = pSep; - if (extlangIdx < 3) { - next = EXTL | SCRT | REGN | VART | EXTS | PRIV; - } else { - next = SCRT | REGN | VART | EXTS | PRIV; - } - continue; - } - } - if (next & SCRT) { - if (_isScriptSubtag(pSubtag, subtagLen)) { - char *p = pSubtag; - - *pSep = 0; - - /* to title case */ - *p = uprv_toupper(*p); - p++; - for (; *p; p++) { - *p = uprv_tolower(*p); - } - - t->script = pSubtag; - - pLastGoodPosition = pSep; - next = REGN | VART | EXTS | PRIV; - continue; - } - } - if (next & REGN) { - if (_isRegionSubtag(pSubtag, subtagLen)) { - *pSep = 0; - // TODO: move deprecated region code handling here. - t->region = T_CString_toUpperCase(pSubtag); - - pLastGoodPosition = pSep; - next = VART | EXTS | PRIV; - continue; - } - } - if (next & VART) { - if (_isVariantSubtag(pSubtag, subtagLen) || - (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { - VariantListEntry *var; - UBool isAdded; - - var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); - if (var == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto error; - } - *pSep = 0; - var->variant = T_CString_toUpperCase(pSubtag); - isAdded = _addVariantToList(&(t->variants), var); - if (!isAdded) { - /* duplicated variant entry */ - uprv_free(var); - break; - } - pLastGoodPosition = pSep; - next = VART | EXTS | PRIV; - continue; - } - } - if (next & EXTS) { - if (_isExtensionSingleton(pSubtag, subtagLen)) { - if (pExtension != NULL) { - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - } else { - /* stop parsing here */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - } - - /* create a new extension */ - pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (pExtension == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto error; - } - *pSep = 0; - pExtension->key = T_CString_toLowerCase(pSubtag); - pExtension->value = NULL; /* will be set later */ - - /* - * reset the start and the end location of extension value - * subtags for this extension - */ - pExtValueSubtag = NULL; - pExtValueSubtagEnd = NULL; - - next = EXTV; - continue; - } - } - if (next & EXTV) { - if (_isExtensionSubtag(pSubtag, subtagLen)) { - if (pExtValueSubtag == NULL) { - /* if the start postion of this extension's value is not yet, - this one is the first value subtag */ - pExtValueSubtag = pSubtag; - } - - /* Mark the end of this subtag */ - pExtValueSubtagEnd = pSep; - next = EXTS | EXTV | PRIV; - - continue; - } - } - if (next & PRIV) { - if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) { - char *pPrivuseVal; - - if (pExtension != NULL) { - /* Process the last extension */ - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - pExtension = NULL; - break; - } else { - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - pExtension = NULL; - } else { - /* stop parsing here */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - } - } - - /* The rest of part will be private use value subtags */ - if (pNext == NULL) { - /* empty private use subtag */ - break; - } - /* back up the private use value start position */ - pPrivuseVal = pNext; - - /* validate private use value subtags */ - while (pNext) { - pSubtag = pNext; - pSep = pSubtag; - while (*pSep) { - if (*pSep == SEP) { - break; - } - pSep++; - } - if (*pSep == 0) { - /* last subtag */ - pNext = NULL; - } else { - pNext = pSep + 1; - } - subtagLen = (int32_t)(pSep - pSubtag); - - if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { - *pSep = 0; - next = VART; - privateuseVar = TRUE; - break; - } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { - pLastGoodPosition = pSep; - } else { - break; - } - } - - if (next == VART) { - continue; - } - - if (pLastGoodPosition - pPrivuseVal > 0) { - *pLastGoodPosition = 0; - t->privateuse = T_CString_toLowerCase(pPrivuseVal); - } - /* No more subtags, exiting the parse loop */ - break; - } - break; - } - - /* If we fell through here, it means this subtag is illegal - quit parsing */ - break; - } - - if (pExtension != NULL) { - /* Process the last extension */ - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - } else { - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - } else { - uprv_free(pExtension); - } - } - } - - if (parsedLen != NULL) { - *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : - (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta); - } - - return t; - -error: - ultag_close(t); - return NULL; -} - -/** -* Ticket #12705 - Turn optimization back on. -*/ -#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) -#pragma optimize( "", on ) -#endif - -static void -ultag_close(ULanguageTag* langtag) { - - if (langtag == NULL) { - return; - } - - uprv_free(langtag->buf); - - if (langtag->variants) { - VariantListEntry *curVar = langtag->variants; - while (curVar) { - VariantListEntry *nextVar = curVar->next; - uprv_free(curVar); - curVar = nextVar; - } - } - - if (langtag->extensions) { - ExtensionListEntry *curExt = langtag->extensions; - while (curExt) { - ExtensionListEntry *nextExt = curExt->next; - uprv_free(curExt); - curExt = nextExt; - } - } - - uprv_free(langtag); -} - -static const char* -ultag_getLanguage(const ULanguageTag* langtag) { - return langtag->language; -} - -#if 0 -static const char* -ultag_getJDKLanguage(const ULanguageTag* langtag) { - int32_t i; - for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { - if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { - return DEPRECATEDLANGS[i + 1]; - } - } - return langtag->language; -} -#endif - -static const char* -ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { - if (idx >= 0 && idx < MAXEXTLANG) { - return langtag->extlang[idx]; - } - return NULL; -} - -static int32_t -ultag_getExtlangSize(const ULanguageTag* langtag) { - int32_t size = 0; - int32_t i; - for (i = 0; i < MAXEXTLANG; i++) { - if (langtag->extlang[i]) { - size++; - } - } - return size; -} - -static const char* -ultag_getScript(const ULanguageTag* langtag) { - return langtag->script; -} - -static const char* -ultag_getRegion(const ULanguageTag* langtag) { - return langtag->region; -} - -static const char* -ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { - const char *var = NULL; - VariantListEntry *cur = langtag->variants; - int32_t i = 0; - while (cur) { - if (i == idx) { - var = cur->variant; - break; - } - cur = cur->next; - i++; - } - return var; -} - -static int32_t -ultag_getVariantsSize(const ULanguageTag* langtag) { - int32_t size = 0; - VariantListEntry *cur = langtag->variants; - while (TRUE) { - if (cur == NULL) { - break; - } - size++; - cur = cur->next; - } - return size; -} - -static const char* -ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { - const char *key = NULL; - ExtensionListEntry *cur = langtag->extensions; - int32_t i = 0; - while (cur) { - if (i == idx) { - key = cur->key; - break; - } - cur = cur->next; - i++; - } - return key; -} - -static const char* -ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { - const char *val = NULL; - ExtensionListEntry *cur = langtag->extensions; - int32_t i = 0; - while (cur) { - if (i == idx) { - val = cur->value; - break; - } - cur = cur->next; - i++; - } - return val; -} - -static int32_t -ultag_getExtensionsSize(const ULanguageTag* langtag) { - int32_t size = 0; - ExtensionListEntry *cur = langtag->extensions; - while (TRUE) { - if (cur == NULL) { - break; - } - size++; - cur = cur->next; - } - return size; -} - -static const char* -ultag_getPrivateUse(const ULanguageTag* langtag) { - return langtag->privateuse; -} - -#if 0 -static const char* -ultag_getGrandfathered(const ULanguageTag* langtag) { - return langtag->grandfathered; -} -#endif - - -/* -* ------------------------------------------------- -* -* Locale/BCP47 conversion APIs, exposed as uloc_* -* -* ------------------------------------------------- -*/ -U_CAPI int32_t U_EXPORT2 -uloc_toLanguageTag(const char* localeID, - char* langtag, - int32_t langtagCapacity, - UBool strict, - UErrorCode* status) { - icu::CharString canonical; - int32_t reslen; - UErrorCode tmpStatus = U_ZERO_ERROR; - UBool hadPosix = FALSE; - const char* pKeywordStart; - - /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ - int32_t resultCapacity = static_cast(uprv_strlen(localeID)); - if (resultCapacity > 0) { - char* buffer; - - for (;;) { - buffer = canonical.getAppendBuffer( - /*minCapacity=*/resultCapacity, - /*desiredCapacityHint=*/resultCapacity, - resultCapacity, - tmpStatus); - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return 0; - } - - reslen = - uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus); - - if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - resultCapacity = reslen; - tmpStatus = U_ZERO_ERROR; - } - - if (U_FAILURE(tmpStatus)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - canonical.append(buffer, reslen, tmpStatus); - if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. - } - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return 0; - } - } - - reslen = 0; - - /* For handling special case - private use only tag */ - pKeywordStart = locale_getKeywordsStart(canonical.data()); - if (pKeywordStart == canonical.data()) { - UEnumeration *kwdEnum; - int kwdCnt = 0; - UBool done = FALSE; - - kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus); - if (kwdEnum != NULL) { - kwdCnt = uenum_count(kwdEnum, &tmpStatus); - if (kwdCnt == 1) { - const char *key; - int32_t len = 0; - - key = uenum_next(kwdEnum, &len, &tmpStatus); - if (len == 1 && *key == PRIVATEUSE) { - char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - buf[0] = PRIVATEUSE; - buf[1] = SEP; - len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); - if (U_SUCCESS(tmpStatus)) { - if (_isPrivateuseValueSubtags(&buf[2], len)) { - /* return private use only tag */ - reslen = len + 2; - uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); - u_terminateChars(langtag, langtagCapacity, reslen, status); - done = TRUE; - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - done = TRUE; - } - /* if not strict mode, then "und" will be returned */ - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; - done = TRUE; - } - } - } - uenum_close(kwdEnum); - if (done) { - return reslen; - } - } - } - - reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status); - reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); - reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - - return reslen; -} - - -U_CAPI int32_t U_EXPORT2 -uloc_forLanguageTag(const char* langtag, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* status) { - return ulocimp_forLanguageTag( - langtag, - -1, - localeID, - localeIDCapacity, - parsedLength, - status); -} - - -U_CAPI int32_t U_EXPORT2 -ulocimp_forLanguageTag(const char* langtag, - int32_t tagLen, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* status) { - ULanguageTag *lt; - int32_t reslen = 0; - const char *subtag, *p; - int32_t len; - int32_t i, n; - UBool noRegion = TRUE; - - lt = ultag_parse(langtag, tagLen, parsedLength, status); - if (U_FAILURE(*status)) { - return 0; - } - - /* language */ - subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); - if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); - } - reslen += len; - } - } - - /* script */ - subtag = ultag_getScript(lt); - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - - /* write out the script in title case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - if (p == subtag) { - *(localeID + reslen) = uprv_toupper(*p); - } else { - *(localeID + reslen) = *p; - } - } - reslen++; - p++; - } - } - - /* region */ - subtag = ultag_getRegion(lt); - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - /* write out the retion in upper case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = uprv_toupper(*p); - } - reslen++; - p++; - } - noRegion = FALSE; - } - - /* variants */ - n = ultag_getVariantsSize(lt); - if (n > 0) { - if (noRegion) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - } - - for (i = 0; i < n; i++) { - subtag = ultag_getVariant(lt, i); - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - /* write out the variant in upper case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = uprv_toupper(*p); - } - reslen++; - p++; - } - } - } - - /* keywords */ - n = ultag_getExtensionsSize(lt); - subtag = ultag_getPrivateUse(lt); - if (n > 0 || uprv_strlen(subtag) > 0) { - if (reslen == 0 && n > 0) { - /* need a language */ - if (reslen < localeIDCapacity) { - uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); - } - reslen += LANG_UND_LEN; - } - len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); - reslen += len; - } - - ultag_close(lt); - return u_terminateChars(localeID, localeIDCapacity, reslen, status); -} diff --git a/deps/node/deps/icu-small/source/common/ulocimp.h b/deps/node/deps/icu-small/source/common/ulocimp.h deleted file mode 100644 index 6dd8e33e..00000000 --- a/deps/node/deps/icu-small/source/common/ulocimp.h +++ /dev/null @@ -1,147 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2004-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef ULOCIMP_H -#define ULOCIMP_H - -#include "unicode/uloc.h" - -/** - * Create an iterator over the specified keywords list - * @param keywordList double-null terminated list. Will be copied. - * @param keywordListSize size in bytes of keywordList - * @param status err code - * @return enumeration (owned by caller) of the keyword list. - * @internal ICU 3.0 - */ -U_CAPI UEnumeration* U_EXPORT2 -uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); - -/** - * Look up a resource bundle table item with fallback on the table level. - * This is accessible so it can be called by C++ code. - */ -U_CAPI const UChar * U_EXPORT2 -uloc_getTableStringWithFallback( - const char *path, - const char *locale, - const char *tableKey, - const char *subTableKey, - const char *itemKey, - int32_t *pLength, - UErrorCode *pErrorCode); - -/*returns TRUE if a is an ID separator FALSE otherwise*/ -#define _isIDSeparator(a) (a == '_' || a == '-') - -U_CFUNC const char* -uloc_getCurrentCountryID(const char* oldID); - -U_CFUNC const char* -uloc_getCurrentLanguageID(const char* oldID); - -U_CFUNC int32_t -ulocimp_getLanguage(const char *localeID, - char *language, int32_t languageCapacity, - const char **pEnd); - -U_CFUNC int32_t -ulocimp_getScript(const char *localeID, - char *script, int32_t scriptCapacity, - const char **pEnd); - -U_CFUNC int32_t -ulocimp_getCountry(const char *localeID, - char *country, int32_t countryCapacity, - const char **pEnd); - -/** - * Returns a locale ID for the specified BCP47 language tag string. - * If the specified language tag contains any ill-formed subtags, - * the first such subtag and all following subtags are ignored. - *

- * This implements the 'Language-Tag' production of BCP47, and so - * supports grandfathered (regular and irregular) as well as private - * use language tags. Private use tags are represented as 'x-whatever', - * and grandfathered tags are converted to their canonical replacements - * where they exist. Note that a few grandfathered tags have no modern - * replacement, these will be converted using the fallback described in - * the first paragraph, so some information might be lost. - * @param langtag the input BCP47 language tag. - * @param tagLen the length of langtag, or -1 to call uprv_strlen(). - * @param localeID the output buffer receiving a locale ID for the - * specified BCP47 language tag. - * @param localeIDCapacity the size of the locale ID output buffer. - * @param parsedLength if not NULL, successfully parsed length - * for the input language tag is set. - * @param err error information if receiving the locald ID - * failed. - * @return the length of the locale ID. - * @internal ICU 63 - */ -U_CAPI int32_t U_EXPORT2 -ulocimp_forLanguageTag(const char* langtag, - int32_t tagLen, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* err); - -/** - * Get the region to use for supplemental data lookup. Uses - * (1) any region specified by locale tag "rg"; if none then - * (2) any unicode_region_tag in the locale ID; if none then - * (3) if inferRegion is TRUE, the region suggested by - * getLikelySubtags on the localeID. - * If no region is found, returns length 0. - * - * @param localeID - * The complete locale ID (with keywords) from which - * to get the region to use for supplemental data. - * @param inferRegion - * If TRUE, will try to infer region from localeID if - * no other region is found. - * @param region - * Buffer in which to put the region ID found; should - * have a capacity at least ULOC_COUNTRY_CAPACITY. - * @param regionCapacity - * The actual capacity of the region buffer. - * @param status - * Pointer to in/out UErrorCode value for latest status. - * @return - * The length of any region code found, or 0 if none. - * @internal ICU 57 - */ -U_CAPI int32_t U_EXPORT2 -ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, - char *region, int32_t regionCapacity, UErrorCode* status); - -U_CAPI const char * U_EXPORT2 -locale_getKeywordsStart(const char *localeID); - - -U_CFUNC UBool -ultag_isUnicodeLocaleKey(const char* s, int32_t len); - -U_CFUNC UBool -ultag_isUnicodeLocaleType(const char* s, int32_t len); - -U_CFUNC const char* -ulocimp_toBcpKey(const char* key); - -U_CFUNC const char* -ulocimp_toLegacyKey(const char* key); - -U_CFUNC const char* -ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); - -U_CFUNC const char* -ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); - -#endif diff --git a/deps/node/deps/icu-small/source/common/umapfile.cpp b/deps/node/deps/icu-small/source/common/umapfile.cpp deleted file mode 100644 index a32573bb..00000000 --- a/deps/node/deps/icu-small/source/common/umapfile.cpp +++ /dev/null @@ -1,528 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - - -/*---------------------------------------------------------------------------- - * - * Memory mapped file wrappers for use by the ICU Data Implementation - * All of the platform-specific implementation for mapping data files - * is here. The rest of the ICU Data implementation uses only the - * wrapper functions. - * - *----------------------------------------------------------------------------*/ -/* Defines _XOPEN_SOURCE for access to POSIX functions. - * Must be before any other #includes. */ -#include "uposixdefs.h" - -#include "unicode/putil.h" -#include "unicode/ustring.h" -#include "udatamem.h" -#include "umapfile.h" - -/* memory-mapping base definitions ------------------------------------------ */ - -#if MAP_IMPLEMENTATION==MAP_WIN32 -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# include -# include "cmemory.h" - - typedef HANDLE MemoryMap; - -# define IS_MAP(map) ((map)!=NULL) -#elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL - typedef size_t MemoryMap; - -# define IS_MAP(map) ((map)!=0) - -# include -# include -# include -# include - -# ifndef MAP_FAILED -# define MAP_FAILED ((void*)-1) -# endif - -# if MAP_IMPLEMENTATION==MAP_390DLL - /* No memory mapping for 390 batch mode. Fake it using dll loading. */ -# include -# include "cstring.h" -# include "cmemory.h" -# include "unicode/udata.h" -# define LIB_PREFIX "lib" -# define LIB_SUFFIX ".dll" - /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ -# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" -# endif -#elif MAP_IMPLEMENTATION==MAP_STDIO -# include -# include "cmemory.h" - - typedef void *MemoryMap; - -# define IS_MAP(map) ((map)!=NULL) -#endif - -/*----------------------------------------------------------------------------* - * * - * Memory Mapped File support. Platform dependent implementation of * - * functions used by the rest of the implementation.* - * * - *----------------------------------------------------------------------------*/ -#if MAP_IMPLEMENTATION==MAP_NONE - U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { - if (U_FAILURE(*status)) { - return FALSE; - } - UDataMemory_init(pData); /* Clear the output struct. */ - return FALSE; /* no file access */ - } - - U_CFUNC void uprv_unmapFile(UDataMemory *pData) { - /* nothing to do */ - } -#elif MAP_IMPLEMENTATION==MAP_WIN32 - U_CFUNC UBool - uprv_mapFile( - UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ - /* Output only; any original contents are cleared. */ - const char *path, /* File path to be opened/mapped. */ - UErrorCode *status /* Error status, used to report out-of-memory errors. */ - ) - { - HANDLE map; - HANDLE file; - - if (U_FAILURE(*status)) { - return FALSE; - } - - UDataMemory_init(pData); /* Clear the output struct. */ - - /* open the input file */ -#if U_PLATFORM_HAS_WINUWP_API == 0 - file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, NULL); -#else - // Convert from UTF-8 string to UTF-16 string. - wchar_t utf16Path[MAX_PATH]; - int32_t pathUtf16Len = 0; - u_strFromUTF8(reinterpret_cast(utf16Path), static_cast(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status); - - if (U_FAILURE(*status)) { - return FALSE; - } - if (*status == U_STRING_NOT_TERMINATED_WARNING) { - // Report back an error instead of a warning. - *status = U_BUFFER_OVERFLOW_ERROR; - return FALSE; - } - - // TODO: Is it worth setting extended parameters to specify random access? - file = CreateFile2(utf16Path, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, NULL); -#endif - if (file == INVALID_HANDLE_VALUE) { - // If we failed to open the file due to an out-of-memory error, then we want - // to report that error back to the caller. - if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { - *status = U_MEMORY_ALLOCATION_ERROR; - } - return FALSE; - } - - /* Declare and initialize a security descriptor. - This is required for multiuser systems on Windows 2000 SP4 and beyond */ - // TODO: UWP does not have this function and I do not think it is required? -#if U_PLATFORM_HAS_WINUWP_API == 0 - - SECURITY_ATTRIBUTES mappingAttributes; - SECURITY_ATTRIBUTES *mappingAttributesPtr = NULL; - SECURITY_DESCRIPTOR securityDesc; - - if (InitializeSecurityDescriptor(&securityDesc, SECURITY_DESCRIPTOR_REVISION)) { - /* give the security descriptor a Null Dacl done using the "TRUE, (PACL)NULL" here */ - if (SetSecurityDescriptorDacl(&securityDesc, TRUE, (PACL)NULL, FALSE)) { - /* Make the security attributes point to the security descriptor */ - uprv_memset(&mappingAttributes, 0, sizeof(mappingAttributes)); - mappingAttributes.nLength = sizeof(mappingAttributes); - mappingAttributes.lpSecurityDescriptor = &securityDesc; - mappingAttributes.bInheritHandle = FALSE; /* object uninheritable */ - mappingAttributesPtr = &mappingAttributes; - } - } - /* else creating security descriptors can fail when we are on Windows 98, - and mappingAttributesPtr == NULL for that case. */ - - /* create an unnamed Windows file-mapping object for the specified file */ - map=CreateFileMapping(file, mappingAttributesPtr, PAGE_READONLY, 0, 0, NULL); -#else - map = CreateFileMappingFromApp(file, NULL, PAGE_READONLY, 0, NULL); -#endif - CloseHandle(file); - if (map == NULL) { - // If we failed to create the mapping due to an out-of-memory error, then - // we want to report that error back to the caller. - if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { - *status = U_MEMORY_ALLOCATION_ERROR; - } - return FALSE; - } - - /* map a view of the file into our address space */ - pData->pHeader=(const DataHeader *)MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0); - if(pData->pHeader==NULL) { - CloseHandle(map); - return FALSE; - } - pData->map=map; - return TRUE; - } - - U_CFUNC void - uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - UnmapViewOfFile(pData->pHeader); - CloseHandle(pData->map); - pData->pHeader=NULL; - pData->map=NULL; - } - } - - - -#elif MAP_IMPLEMENTATION==MAP_POSIX - U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { - int fd; - int length; - struct stat mystat; - void *data; - - if (U_FAILURE(*status)) { - return FALSE; - } - - UDataMemory_init(pData); /* Clear the output struct. */ - - /* determine the length of the file */ - if(stat(path, &mystat)!=0 || mystat.st_size<=0) { - return FALSE; - } - length=mystat.st_size; - - /* open the file */ - fd=open(path, O_RDONLY); - if(fd==-1) { - return FALSE; - } - - /* get a view of the mapping */ -#if U_PLATFORM != U_PF_HPUX - data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0); -#else - data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); -#endif - close(fd); /* no longer needed */ - if(data==MAP_FAILED) { - // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? - return FALSE; - } - - pData->map = (char *)data + length; - pData->pHeader=(const DataHeader *)data; - pData->mapAddr = data; -#if U_PLATFORM == U_PF_IPHONE - posix_madvise(data, length, POSIX_MADV_RANDOM); -#endif - return TRUE; - } - - U_CFUNC void - uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - size_t dataLen = (char *)pData->map - (char *)pData->mapAddr; - if(munmap(pData->mapAddr, dataLen)==-1) { - } - pData->pHeader=NULL; - pData->map=0; - pData->mapAddr=NULL; - } - } - - - -#elif MAP_IMPLEMENTATION==MAP_STDIO - /* copy of the filestrm.c/T_FileStream_size() implementation */ - static int32_t - umap_fsize(FILE *f) { - int32_t savedPos = ftell(f); - int32_t size = 0; - - /*Changes by Bertrand A. D. doesn't affect the current position - goes to the end of the file before ftell*/ - fseek(f, 0, SEEK_END); - size = (int32_t)ftell(f); - fseek(f, savedPos, SEEK_SET); - return size; - } - - U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { - FILE *file; - int32_t fileLength; - void *p; - - if (U_FAILURE(*status)) { - return FALSE; - } - - UDataMemory_init(pData); /* Clear the output struct. */ - /* open the input file */ - file=fopen(path, "rb"); - if(file==NULL) { - return FALSE; - } - - /* get the file length */ - fileLength=umap_fsize(file); - if(ferror(file) || fileLength<=20) { - fclose(file); - return FALSE; - } - - /* allocate the memory to hold the file data */ - p=uprv_malloc(fileLength); - if(p==NULL) { - fclose(file); - *status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - - /* read the file */ - if(fileLength!=fread(p, 1, fileLength, file)) { - uprv_free(p); - fclose(file); - return FALSE; - } - - fclose(file); - pData->map=p; - pData->pHeader=(const DataHeader *)p; - pData->mapAddr=p; - return TRUE; - } - - U_CFUNC void - uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - uprv_free(pData->map); - pData->map = NULL; - pData->mapAddr = NULL; - pData->pHeader = NULL; - } - } - - -#elif MAP_IMPLEMENTATION==MAP_390DLL - /* 390 specific Library Loading. - * This is the only platform left that dynamically loads an ICU Data Library. - * All other platforms use .data files when dynamic loading is required, but - * this turn out to be awkward to support in 390 batch mode. - * - * The idea here is to hide the fact that 390 is using dll loading from the - * rest of ICU, and make it look like there is file loading happening. - * - */ - - static char *strcpy_returnEnd(char *dest, const char *src) - { - while((*dest=*src)!=0) { - ++dest; - ++src; - } - return dest; - } - - /*------------------------------------------------------------------------------ - * - * computeDirPath given a user-supplied path of an item to be opened, - * compute and return - * - the full directory path to be used - * when opening the file. - * - Pointer to null at end of above returned path - * - * Parameters: - * path: input path. Buffer is not altered. - * pathBuffer: Output buffer. Any contents are overwritten. - * - * Returns: - * Pointer to null termination in returned pathBuffer. - * - * TODO: This works the way ICU historically has, but the - * whole data fallback search path is so complicated that - * probably almost no one will ever really understand it, - * the potential for confusion is large. (It's not just - * this one function, but the whole scheme.) - * - *------------------------------------------------------------------------------*/ - static char *uprv_computeDirPath(const char *path, char *pathBuffer) - { - char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */ - int32_t pathLen; /* Length of the returned directory path */ - - finalSlash = 0; - if (path != 0) { - finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR); - } - - *pathBuffer = 0; - if (finalSlash == 0) { - /* No user-supplied path. - * Copy the ICU_DATA path to the path buffer and return that*/ - const char *icuDataDir; - icuDataDir=u_getDataDirectory(); - if(icuDataDir!=NULL && *icuDataDir!=0) { - return strcpy_returnEnd(pathBuffer, icuDataDir); - } else { - /* there is no icuDataDir either. Just return the empty pathBuffer. */ - return pathBuffer; - } - } - - /* User supplied path did contain a directory portion. - * Copy it to the output path buffer */ - pathLen = (int32_t)(finalSlash - path + 1); - uprv_memcpy(pathBuffer, path, pathLen); - *(pathBuffer+pathLen) = 0; - return pathBuffer+pathLen; - } - - -# define DATA_TYPE "dat" - - U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { - const char *inBasename; - char *basename; - char pathBuffer[1024]; - const DataHeader *pHeader; - dllhandle *handle; - void *val=0; - - if (U_FAILURE(*status)) { - return FALSE; - } - - inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); - if(inBasename==NULL) { - inBasename = path; - } else { - inBasename++; - } - basename=uprv_computeDirPath(path, pathBuffer); - if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) { - /* must mmap file... for build */ - int fd; - int length; - struct stat mystat; - void *data; - UDataMemory_init(pData); /* Clear the output struct. */ - - /* determine the length of the file */ - if(stat(path, &mystat)!=0 || mystat.st_size<=0) { - return FALSE; - } - length=mystat.st_size; - - /* open the file */ - fd=open(path, O_RDONLY); - if(fd==-1) { - return FALSE; - } - - /* get a view of the mapping */ - data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); /* no longer needed */ - if(data==MAP_FAILED) { - // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? - return FALSE; - } - pData->map = (char *)data + length; - pData->pHeader=(const DataHeader *)data; - pData->mapAddr = data; - return TRUE; - } - -# ifdef OS390BATCH - /* ### hack: we still need to get u_getDataDirectory() fixed - for OS/390 (batch mode - always return "//"? ) - and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!) - This is probably due to the strange file system on OS/390. It's more like - a database with short entry names than a typical file system. */ - /* U_ICUDATA_NAME should always have the correct name */ - /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */ - /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */ - /* PROJECT!!!!! */ - uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA"); -# else - /* set up the library name */ - uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX); -# endif - -# ifdef UDATA_DEBUG - fprintf(stderr, "dllload: %s ", pathBuffer); -# endif - - handle=dllload(pathBuffer); - -# ifdef UDATA_DEBUG - fprintf(stderr, " -> %08X\n", handle ); -# endif - - if(handle != NULL) { - /* we have a data DLL - what kind of lookup do we need here? */ - /* try to find the Table of Contents */ - UDataMemory_init(pData); /* Clear the output struct. */ - val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME); - if(val == 0) { - /* failed... so keep looking */ - return FALSE; - } -# ifdef UDATA_DEBUG - fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val); -# endif - - pData->pHeader=(const DataHeader *)val; - return TRUE; - } else { - return FALSE; /* no handle */ - } - } - - U_CFUNC void uprv_unmapFile(UDataMemory *pData) { - if(pData!=NULL && pData->map!=NULL) { - uprv_free(pData->map); - pData->map = NULL; - pData->mapAddr = NULL; - pData->pHeader = NULL; - } - } - -#else -# error MAP_IMPLEMENTATION is set incorrectly -#endif diff --git a/deps/node/deps/icu-small/source/common/umapfile.h b/deps/node/deps/icu-small/source/common/umapfile.h deleted file mode 100644 index 92bd567a..00000000 --- a/deps/node/deps/icu-small/source/common/umapfile.h +++ /dev/null @@ -1,57 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************/ - -/*---------------------------------------------------------------------------------- - * - * Memory mapped file wrappers for use by the ICU Data Implementation - * - * Porting note: The implementation of these functions is very platform specific. - * Not all platforms can do real memory mapping. Those that can't - * still must implement these functions, getting the data into memory using - * whatever means are available. - * - * These functions are part of the ICU internal implementation, and - * are not inteded to be used directly by applications. - * - *----------------------------------------------------------------------------------*/ - -#ifndef __UMAPFILE_H__ -#define __UMAPFILE_H__ - -#include "unicode/putil.h" -#include "unicode/udata.h" -#include "putilimp.h" - -U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status); -U_CFUNC void uprv_unmapFile(UDataMemory *pData); - -/* MAP_NONE: no memory mapping, no file access at all */ -#define MAP_NONE 0 -#define MAP_WIN32 1 -#define MAP_POSIX 2 -#define MAP_STDIO 3 -#define MAP_390DLL 4 - -#if UCONFIG_NO_FILE_IO -# define MAP_IMPLEMENTATION MAP_NONE -#elif U_PLATFORM_USES_ONLY_WIN32_API -# define MAP_IMPLEMENTATION MAP_WIN32 -#elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390 -# if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA) - /* No memory mapping for 390 batch mode. Fake it using dll loading. */ -# define MAP_IMPLEMENTATION MAP_390DLL -# else -# define MAP_IMPLEMENTATION MAP_POSIX -# endif -#else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */ -# define MAP_IMPLEMENTATION MAP_STDIO -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/umath.cpp b/deps/node/deps/icu-small/source/common/umath.cpp deleted file mode 100644 index 3ab72ab4..00000000 --- a/deps/node/deps/icu-small/source/common/umath.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2006, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* This file contains platform independent math. -*/ - -#include "putilimp.h" - -U_CAPI int32_t U_EXPORT2 -uprv_max(int32_t x, int32_t y) -{ - return (x > y ? x : y); -} - -U_CAPI int32_t U_EXPORT2 -uprv_min(int32_t x, int32_t y) -{ - return (x > y ? y : x); -} diff --git a/deps/node/deps/icu-small/source/common/umutablecptrie.cpp b/deps/node/deps/icu-small/source/common/umutablecptrie.cpp deleted file mode 100644 index 40af4b6c..00000000 --- a/deps/node/deps/icu-small/source/common/umutablecptrie.cpp +++ /dev/null @@ -1,1678 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// umutablecptrie.cpp (inspired by utrie2_builder.cpp) -// created: 2017dec29 Markus W. Scherer - -// #define UCPTRIE_DEBUG -#ifdef UCPTRIE_DEBUG -# include -#endif - -#include "unicode/utypes.h" -#include "unicode/ucptrie.h" -#include "unicode/umutablecptrie.h" -#include "unicode/uobject.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "uassert.h" -#include "ucptrie_impl.h" - -U_NAMESPACE_BEGIN - -namespace { - -constexpr int32_t MAX_UNICODE = 0x10ffff; - -constexpr int32_t UNICODE_LIMIT = 0x110000; -constexpr int32_t BMP_LIMIT = 0x10000; -constexpr int32_t ASCII_LIMIT = 0x80; - -constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3; -constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3; -constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3; - -constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3)); - -// Flag values for data blocks. -constexpr uint8_t ALL_SAME = 0; -constexpr uint8_t MIXED = 1; -constexpr uint8_t SAME_AS = 2; - -/** Start with allocation of 16k data entries. */ -constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14); - -/** Grow about 8x each time. */ -constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17); - -/** - * Maximum length of the build-time data array. - * One entry per 0x110000 code points. - */ -constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT; - -// Flag values for index-3 blocks while compacting/building. -constexpr uint8_t I3_NULL = 0; -constexpr uint8_t I3_BMP = 1; -constexpr uint8_t I3_16 = 2; -constexpr uint8_t I3_18 = 3; - -constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8; - -class AllSameBlocks; - -class MutableCodePointTrie : public UMemory { -public: - MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode); - MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode); - MutableCodePointTrie(const MutableCodePointTrie &other) = delete; - ~MutableCodePointTrie(); - - MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete; - - static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode); - static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode); - - uint32_t get(UChar32 c) const; - int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context, - uint32_t *pValue) const; - - void set(UChar32 c, uint32_t value, UErrorCode &errorCode); - void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode); - - UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode); - -private: - void clear(); - - bool ensureHighStart(UChar32 c); - int32_t allocDataBlock(int32_t blockLength); - int32_t getDataBlock(int32_t i); - - void maskValues(uint32_t mask); - UChar32 findHighStart() const; - int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks); - int32_t compactData(int32_t fastILimit, uint32_t *newData, int32_t dataNullIndex); - int32_t compactIndex(int32_t fastILimit, UErrorCode &errorCode); - int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode); - - uint32_t *index = nullptr; - int32_t indexCapacity = 0; - int32_t index3NullOffset = -1; - uint32_t *data = nullptr; - int32_t dataCapacity = 0; - int32_t dataLength = 0; - int32_t dataNullOffset = -1; - - uint32_t origInitialValue; - uint32_t initialValue; - uint32_t errorValue; - UChar32 highStart; - uint32_t highValue; -#ifdef UCPTRIE_DEBUG -public: - const char *name; -#endif -private: - /** Temporary array while building the final data. */ - uint16_t *index16 = nullptr; - uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3]; -}; - -MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) : - origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue), - highStart(0), highValue(initialValue) -#ifdef UCPTRIE_DEBUG - , name("open") -#endif - { - if (U_FAILURE(errorCode)) { return; } - index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4); - data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4); - if (index == nullptr || data == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - indexCapacity = BMP_I_LIMIT; - dataCapacity = INITIAL_DATA_LENGTH; -} - -MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) : - index3NullOffset(other.index3NullOffset), - dataNullOffset(other.dataNullOffset), - origInitialValue(other.origInitialValue), initialValue(other.initialValue), - errorValue(other.errorValue), - highStart(other.highStart), highValue(other.highValue) -#ifdef UCPTRIE_DEBUG - , name("mutable clone") -#endif - { - if (U_FAILURE(errorCode)) { return; } - int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT; - index = (uint32_t *)uprv_malloc(iCapacity * 4); - data = (uint32_t *)uprv_malloc(other.dataCapacity * 4); - if (index == nullptr || data == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - indexCapacity = iCapacity; - dataCapacity = other.dataCapacity; - - int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; - uprv_memcpy(flags, other.flags, iLimit); - uprv_memcpy(index, other.index, iLimit * 4); - uprv_memcpy(data, other.data, (size_t)other.dataLength * 4); - dataLength = other.dataLength; - U_ASSERT(other.index16 == nullptr); -} - -MutableCodePointTrie::~MutableCodePointTrie() { - uprv_free(index); - uprv_free(data); - uprv_free(index16); -} - -MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) { - // Use the highValue as the initialValue to reduce the highStart. - uint32_t errorValue = ucpmap_get(map, -1); - uint32_t initialValue = ucpmap_get(map, 0x10ffff); - LocalPointer mutableTrie( - new MutableCodePointTrie(initialValue, errorValue, errorCode), - errorCode); - if (U_FAILURE(errorCode)) { - return nullptr; - } - UChar32 start = 0, end; - uint32_t value; - while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, - nullptr, nullptr, &value)) >= 0) { - if (value != initialValue) { - if (start == end) { - mutableTrie->set(start, value, errorCode); - } else { - mutableTrie->setRange(start, end, value, errorCode); - } - } - start = end + 1; - } - if (U_SUCCESS(errorCode)) { - return mutableTrie.orphan(); - } else { - return nullptr; - } -} - -MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) { - // Use the highValue as the initialValue to reduce the highStart. - uint32_t errorValue; - uint32_t initialValue; - switch (trie->valueWidth) { - case UCPTRIE_VALUE_BITS_16: - errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; - initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; - break; - case UCPTRIE_VALUE_BITS_32: - errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; - initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; - break; - case UCPTRIE_VALUE_BITS_8: - errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; - initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; - break; - default: - // Unreachable if the trie is properly initialized. - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - LocalPointer mutableTrie( - new MutableCodePointTrie(initialValue, errorValue, errorCode), - errorCode); - if (U_FAILURE(errorCode)) { - return nullptr; - } - UChar32 start = 0, end; - uint32_t value; - while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, - nullptr, nullptr, &value)) >= 0) { - if (value != initialValue) { - if (start == end) { - mutableTrie->set(start, value, errorCode); - } else { - mutableTrie->setRange(start, end, value, errorCode); - } - } - start = end + 1; - } - if (U_SUCCESS(errorCode)) { - return mutableTrie.orphan(); - } else { - return nullptr; - } -} - -void MutableCodePointTrie::clear() { - index3NullOffset = dataNullOffset = -1; - dataLength = 0; - highValue = initialValue = origInitialValue; - highStart = 0; - uprv_free(index16); - index16 = nullptr; -} - -uint32_t MutableCodePointTrie::get(UChar32 c) const { - if ((uint32_t)c > MAX_UNICODE) { - return errorValue; - } - if (c >= highStart) { - return highValue; - } - int32_t i = c >> UCPTRIE_SHIFT_3; - if (flags[i] == ALL_SAME) { - return index[i]; - } else { - return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)]; - } -} - -inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue, - UCPMapValueFilter *filter, const void *context) { - if (value == initialValue) { - value = nullValue; - } else if (filter != nullptr) { - value = filter(context, value); - } - return value; -} - -UChar32 MutableCodePointTrie::getRange( - UChar32 start, UCPMapValueFilter *filter, const void *context, - uint32_t *pValue) const { - if ((uint32_t)start > MAX_UNICODE) { - return U_SENTINEL; - } - if (start >= highStart) { - if (pValue != nullptr) { - uint32_t value = highValue; - if (filter != nullptr) { value = filter(context, value); } - *pValue = value; - } - return MAX_UNICODE; - } - uint32_t nullValue = initialValue; - if (filter != nullptr) { nullValue = filter(context, nullValue); } - UChar32 c = start; - uint32_t value; - bool haveValue = false; - int32_t i = c >> UCPTRIE_SHIFT_3; - do { - if (flags[i] == ALL_SAME) { - uint32_t value2 = maybeFilterValue(index[i], initialValue, nullValue, - filter, context); - if (haveValue) { - if (value2 != value) { - return c - 1; - } - } else { - value = value2; - if (pValue != nullptr) { *pValue = value; } - haveValue = true; - } - c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK; - } else /* MIXED */ { - int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK); - uint32_t value2 = maybeFilterValue(data[di], initialValue, nullValue, - filter, context); - if (haveValue) { - if (value2 != value) { - return c - 1; - } - } else { - value = value2; - if (pValue != nullptr) { *pValue = value; } - haveValue = true; - } - while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) { - if (maybeFilterValue(data[++di], initialValue, nullValue, - filter, context) != value) { - return c - 1; - } - } - } - ++i; - } while (c < highStart); - U_ASSERT(haveValue); - if (maybeFilterValue(highValue, initialValue, nullValue, - filter, context) != value) { - return c - 1; - } else { - return MAX_UNICODE; - } -} - -void -writeBlock(uint32_t *block, uint32_t value) { - uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - while (block < limit) { - *block++ = value; - } -} - -bool MutableCodePointTrie::ensureHighStart(UChar32 c) { - if (c >= highStart) { - // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction. - c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); - int32_t i = highStart >> UCPTRIE_SHIFT_3; - int32_t iLimit = c >> UCPTRIE_SHIFT_3; - if (iLimit > indexCapacity) { - uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4); - if (newIndex == nullptr) { return false; } - uprv_memcpy(newIndex, index, i * 4); - uprv_free(index); - index = newIndex; - indexCapacity = I_LIMIT; - } - do { - flags[i] = ALL_SAME; - index[i] = initialValue; - } while(++i < iLimit); - highStart = c; - } - return true; -} - -int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) { - int32_t newBlock = dataLength; - int32_t newTop = newBlock + blockLength; - if (newTop > dataCapacity) { - int32_t capacity; - if (dataCapacity < MEDIUM_DATA_LENGTH) { - capacity = MEDIUM_DATA_LENGTH; - } else if (dataCapacity < MAX_DATA_LENGTH) { - capacity = MAX_DATA_LENGTH; - } else { - // Should never occur. - // Either MAX_DATA_LENGTH is incorrect, - // or the code writes more values than should be possible. - return -1; - } - uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4); - if (newData == nullptr) { - return -1; - } - uprv_memcpy(newData, data, (size_t)dataLength * 4); - uprv_free(data); - data = newData; - dataCapacity = capacity; - } - dataLength = newTop; - return newBlock; -} - -/** - * No error checking for illegal arguments. - * - * @return -1 if no new data block available (out of memory in data array) - * @internal - */ -int32_t MutableCodePointTrie::getDataBlock(int32_t i) { - if (flags[i] == MIXED) { - return index[i]; - } - if (i < BMP_I_LIMIT) { - int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH); - if (newBlock < 0) { return newBlock; } - int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1); - int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; - do { - U_ASSERT(flags[iStart] == ALL_SAME); - writeBlock(data + newBlock, index[iStart]); - flags[iStart] = MIXED; - index[iStart++] = newBlock; - newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - } while (iStart < iLimit); - return index[i]; - } else { - int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH); - if (newBlock < 0) { return newBlock; } - writeBlock(data + newBlock, index[i]); - flags[i] = MIXED; - index[i] = newBlock; - return newBlock; - } -} - -void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return; - } - if ((uint32_t)c > MAX_UNICODE) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - int32_t block; - if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - - data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value; -} - -void -fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) { - uint32_t *pLimit = block + limit; - block += start; - while (block < pLimit) { - *block++ = value; - } -} - -void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return; - } - if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (!ensureHighStart(end)) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - - UChar32 limit = end + 1; - if (start & UCPTRIE_SMALL_DATA_MASK) { - // Set partial block at [start..following block boundary[. - int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); - if (block < 0) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - - UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK; - if (nextStart <= limit) { - fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, - value); - start = nextStart; - } else { - fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK, - value); - return; - } - } - - // Number of positions in the last, partial block. - int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK; - - // Round down limit to a block boundary. - limit &= ~UCPTRIE_SMALL_DATA_MASK; - - // Iterate over all-value blocks. - while (start < limit) { - int32_t i = start >> UCPTRIE_SHIFT_3; - if (flags[i] == ALL_SAME) { - index[i] = value; - } else /* MIXED */ { - fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value); - } - start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - } - - if (rest > 0) { - // Set partial block at [last block boundary..limit[. - int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); - if (block < 0) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return; - } - - fillBlock(data + block, 0, rest, value); - } -} - -/* compaction --------------------------------------------------------------- */ - -void MutableCodePointTrie::maskValues(uint32_t mask) { - initialValue &= mask; - errorValue &= mask; - highValue &= mask; - int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; - for (int32_t i = 0; i < iLimit; ++i) { - if (flags[i] == ALL_SAME) { - index[i] &= mask; - } - } - for (int32_t i = 0; i < dataLength; ++i) { - data[i] &= mask; - } -} - -inline bool -equalBlocks(const uint32_t *s, const uint32_t *t, int32_t length) { - while (length > 0 && *s == *t) { - ++s; - ++t; - --length; - } - return length == 0; -} - -inline bool -equalBlocks(const uint16_t *s, const uint32_t *t, int32_t length) { - while (length > 0 && *s == *t) { - ++s; - ++t; - --length; - } - return length == 0; -} - -inline bool -equalBlocks(const uint16_t *s, const uint16_t *t, int32_t length) { - while (length > 0 && *s == *t) { - ++s; - ++t; - --length; - } - return length == 0; -} - -bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) { - const uint32_t *pLimit = p + length; - while (p < pLimit && *p == value) { ++p; } - return p == pLimit; -} - -/** Search for an identical block. */ -int32_t findSameBlock(const uint32_t *p, int32_t pStart, int32_t length, - const uint32_t *q, int32_t qStart, int32_t blockLength) { - // Ensure that we do not even partially get past length. - length -= blockLength; - - q += qStart; - while (pStart <= length) { - if (equalBlocks(p + pStart, q, blockLength)) { - return pStart; - } - ++pStart; - } - return -1; -} - -int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, - const uint32_t *q, int32_t qStart, int32_t blockLength) { - // Ensure that we do not even partially get past length. - length -= blockLength; - - q += qStart; - while (pStart <= length) { - if (equalBlocks(p + pStart, q, blockLength)) { - return pStart; - } - ++pStart; - } - return -1; -} - -int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, - const uint16_t *q, int32_t qStart, int32_t blockLength) { - // Ensure that we do not even partially get past length. - length -= blockLength; - - q += qStart; - while (pStart <= length) { - if (equalBlocks(p + pStart, q, blockLength)) { - return pStart; - } - ++pStart; - } - return -1; -} - -int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit, - uint32_t value, int32_t blockLength) { - // Ensure that we do not even partially get past limit. - limit -= blockLength; - - for (int32_t block = start; block <= limit; ++block) { - if (p[block] == value) { - for (int32_t i = 1;; ++i) { - if (i == blockLength) { - return block; - } - if (p[block + i] != value) { - block += i; - break; - } - } - } - } - return -1; -} - -/** - * Look for maximum overlap of the beginning of the other block - * with the previous, adjacent block. - */ -int32_t getOverlap(const uint32_t *p, int32_t length, - const uint32_t *q, int32_t qStart, int32_t blockLength) { - int32_t overlap = blockLength - 1; - U_ASSERT(overlap <= length); - q += qStart; - while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { - --overlap; - } - return overlap; -} - -int32_t getOverlap(const uint16_t *p, int32_t length, - const uint32_t *q, int32_t qStart, int32_t blockLength) { - int32_t overlap = blockLength - 1; - U_ASSERT(overlap <= length); - q += qStart; - while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { - --overlap; - } - return overlap; -} - -int32_t getOverlap(const uint16_t *p, int32_t length, - const uint16_t *q, int32_t qStart, int32_t blockLength) { - int32_t overlap = blockLength - 1; - U_ASSERT(overlap <= length); - q += qStart; - while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { - --overlap; - } - return overlap; -} - -int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value, - int32_t blockLength) { - int32_t min = length - (blockLength - 1); - int32_t i = length; - while (min < i && p[i - 1] == value) { --i; } - return length - i; -} - -bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) { - for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { - if (index[i] == dataOffset) { - return true; - } - } - return false; -} - -/** - * Finds the start of the last range in the trie by enumerating backward. - * Indexes for code points higher than this will be omitted. - */ -UChar32 MutableCodePointTrie::findHighStart() const { - int32_t i = highStart >> UCPTRIE_SHIFT_3; - while (i > 0) { - bool match; - if (flags[--i] == ALL_SAME) { - match = index[i] == highValue; - } else /* MIXED */ { - const uint32_t *p = data + index[i]; - for (int32_t j = 0;; ++j) { - if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) { - match = true; - break; - } - if (p[j] != highValue) { - match = false; - break; - } - } - } - if (!match) { - return (i + 1) << UCPTRIE_SHIFT_3; - } - } - return 0; -} - -class AllSameBlocks { -public: - static constexpr int32_t NEW_UNIQUE = -1; - static constexpr int32_t OVERFLOW = -2; - - AllSameBlocks() : length(0), mostRecent(-1) {} - - int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) { - if (mostRecent >= 0 && values[mostRecent] == value) { - refCounts[mostRecent] += count; - return indexes[mostRecent]; - } - for (int32_t i = 0; i < length; ++i) { - if (values[i] == value) { - mostRecent = i; - refCounts[i] += count; - return indexes[i]; - } - } - if (length == CAPACITY) { - return OVERFLOW; - } - mostRecent = length; - indexes[length] = index; - values[length] = value; - refCounts[length++] = count; - return NEW_UNIQUE; - } - - /** Replaces the block which has the lowest reference count. */ - void add(int32_t index, int32_t count, uint32_t value) { - U_ASSERT(length == CAPACITY); - int32_t least = -1; - int32_t leastCount = I_LIMIT; - for (int32_t i = 0; i < length; ++i) { - U_ASSERT(values[i] != value); - if (refCounts[i] < leastCount) { - least = i; - leastCount = refCounts[i]; - } - } - U_ASSERT(least >= 0); - mostRecent = least; - indexes[least] = index; - values[least] = value; - refCounts[least] = count; - } - - int32_t findMostUsed() const { - if (length == 0) { return -1; } - int32_t max = -1; - int32_t maxCount = 0; - for (int32_t i = 0; i < length; ++i) { - if (refCounts[i] > maxCount) { - max = i; - maxCount = refCounts[i]; - } - } - return indexes[max]; - } - -private: - static constexpr int32_t CAPACITY = 32; - - int32_t length; - int32_t mostRecent; - - int32_t indexes[CAPACITY]; - uint32_t values[CAPACITY]; - int32_t refCounts[CAPACITY]; -}; - -int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) { -#ifdef UCPTRIE_DEBUG - bool overflow = false; -#endif - - // ASCII data will be stored as a linear table, even if the following code - // does not yet count it that way. - int32_t newDataCapacity = ASCII_LIMIT; - // Add room for a small data null block in case it would match the start of - // a fast data block where dataNullOffset must not be set in that case. - newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - // Add room for special values (errorValue, highValue) and padding. - newDataCapacity += 4; - int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; - int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; - int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; - for (int32_t i = 0; i < iLimit; i += inc) { - if (i == fastILimit) { - blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - inc = 1; - } - uint32_t value = index[i]; - if (flags[i] == MIXED) { - // Really mixed? - const uint32_t *p = data + value; - value = *p; - if (allValuesSameAs(p + 1, blockLength - 1, value)) { - flags[i] = ALL_SAME; - index[i] = value; - // Fall through to ALL_SAME handling. - } else { - newDataCapacity += blockLength; - continue; - } - } else { - U_ASSERT(flags[i] == ALL_SAME); - if (inc > 1) { - // Do all of the fast-range data block's ALL_SAME parts have the same value? - bool allSame = true; - int32_t next_i = i + inc; - for (int32_t j = i + 1; j < next_i; ++j) { - U_ASSERT(flags[j] == ALL_SAME); - if (index[j] != value) { - allSame = false; - break; - } - } - if (!allSame) { - // Turn it into a MIXED block. - if (getDataBlock(i) < 0) { - return -1; - } - newDataCapacity += blockLength; - continue; - } - } - } - // Is there another ALL_SAME block with the same value? - int32_t other = allSameBlocks.findOrAdd(i, inc, value); - if (other == AllSameBlocks::OVERFLOW) { - // The fixed-size array overflowed. Slow check for a duplicate block. -#ifdef UCPTRIE_DEBUG - if (!overflow) { - puts("UCPTrie AllSameBlocks overflow"); - overflow = true; - } -#endif - int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; - for (int32_t j = 0;; j += jInc) { - if (j == i) { - allSameBlocks.add(i, inc, value); - break; - } - if (j == fastILimit) { - jInc = 1; - } - if (flags[j] == ALL_SAME && index[j] == value) { - allSameBlocks.add(j, jInc + inc, value); - other = j; - break; - // We could keep counting blocks with the same value - // before we add the first one, which may improve compaction in rare cases, - // but it would make it slower. - } - } - } - if (other >= 0) { - flags[i] = SAME_AS; - index[i] = other; - } else { - // New unique same-value block. - newDataCapacity += blockLength; - } - } - return newDataCapacity; -} - -#ifdef UCPTRIE_DEBUG -# define DEBUG_DO(expr) expr -#else -# define DEBUG_DO(expr) -#endif - -#ifdef UCPTRIE_DEBUG -// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF -int32_t appendValue(char s[], int32_t length, uint32_t value) { - value ^= value >> 16; - value ^= value >> 8; - s[length] = 0xE2; - s[length + 1] = (char)(0xA0 + ((value >> 6) & 3)); - s[length + 2] = (char)(0x80 + (value & 0x3F)); - return length + 3; -} - -void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value, - UChar32 start, int32_t overlap, uint32_t initialValue) { - char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3]; - int32_t length = 0; - int32_t i; - for (i = 0; i < overlap; ++i) { - length = appendValue(s, length, 0); // Braille blank - } - s[length++] = '|'; - for (; i < blockLength; ++i) { - if (block != nullptr) { - value = block[i]; - } - if (value == initialValue) { - value = 0x40; // Braille lower left dot - } - length = appendValue(s, length, value); - } - s[length] = 0; - start += overlap; - if (start <= 0xffff) { - printf(" %04lX %s|\n", (long)start, s); - } else if (start <= 0xfffff) { - printf(" %5lX %s|\n", (long)start, s); - } else { - printf(" %6lX %s|\n", (long)start, s); - } -} -#endif - -/** - * Compacts a build-time trie. - * - * The compaction - * - removes blocks that are identical with earlier ones - * - overlaps each new non-duplicate block as much as possible with the previously-written one - * - works with fast-range data blocks whose length is a multiple of that of - * higher-code-point data blocks - * - * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks. - */ -int32_t MutableCodePointTrie::compactData(int32_t fastILimit, - uint32_t *newData, int32_t dataNullIndex) { -#ifdef UCPTRIE_DEBUG - int32_t countSame=0, sumOverlaps=0; - bool printData = dataLength == 29088 /* line.brk */ || - // dataLength == 30048 /* CanonIterData */ || - dataLength == 50400 /* zh.txt~stroke */; -#endif - - // The linear ASCII data has been copied into newData already. - int32_t newDataLength = 0; - for (int32_t i = 0; newDataLength < ASCII_LIMIT; - newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { - index[i] = newDataLength; -#ifdef UCPTRIE_DEBUG - if (printData) { - printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue); - } -#endif - } - - int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; - int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; - int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; - int32_t fastLength = 0; - for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { - if (i == fastILimit) { - blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - inc = 1; - fastLength = newDataLength; - } - if (flags[i] == ALL_SAME) { - uint32_t value = index[i]; - int32_t n; - // Find an earlier part of the data array of length blockLength - // that is filled with this value. - // If we find a match, and the current block is the data null block, - // and it is not a fast block but matches the start of a fast block, - // then we need to continue looking. - // This is because this small block is shorter than the fast block, - // and not all of the rest of the fast block is filled with this value. - // Otherwise trie.getRange() would detect that the fast block starts at - // dataNullOffset and assume incorrectly that it is filled with the null value. - for (int32_t start = 0; - (n = findAllSameBlock(newData, start, newDataLength, - value, blockLength)) >= 0 && - i == dataNullIndex && i >= fastILimit && n < fastLength && - isStartOfSomeFastBlock(n, index, fastILimit); - start = n + 1) {} - if (n >= 0) { - DEBUG_DO(++countSame); - index[i] = n; - } else { - n = getAllSameOverlap(newData, newDataLength, value, blockLength); - DEBUG_DO(sumOverlaps += n); -#ifdef UCPTRIE_DEBUG - if (printData) { - printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue); - } -#endif - index[i] = newDataLength - n; - while (n < blockLength) { - newData[newDataLength++] = value; - ++n; - } - } - } else if (flags[i] == MIXED) { - const uint32_t *block = data + index[i]; - int32_t n = findSameBlock(newData, 0, newDataLength, block, 0, blockLength); - if (n >= 0) { - DEBUG_DO(++countSame); - index[i] = n; - } else { - n = getOverlap(newData, newDataLength, block, 0, blockLength); - DEBUG_DO(sumOverlaps += n); -#ifdef UCPTRIE_DEBUG - if (printData) { - printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue); - } -#endif - index[i] = newDataLength - n; - while (n < blockLength) { - newData[newDataLength++] = block[n++]; - } - } - } else /* SAME_AS */ { - uint32_t j = index[i]; - index[i] = index[j]; - } - } - -#ifdef UCPTRIE_DEBUG - /* we saved some space */ - printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", - (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps); -#endif - return newDataLength; -} - -int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, UErrorCode &errorCode) { - int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3); - if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { - // Only the linear fast index, no multi-stage index tables. - index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; - return fastIndexLength; - } - - // Condense the fast index table. - // Also, does it contain an index-3 block with all dataNullOffset? - uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength - int32_t i3FirstNull = -1; - for (int32_t i = 0, j = 0; i < fastILimit; ++j) { - uint32_t i3 = index[i]; - fastIndex[j] = (uint16_t)i3; - if (i3 == (uint32_t)dataNullOffset) { - if (i3FirstNull < 0) { - i3FirstNull = j; - } else if (index3NullOffset < 0 && - (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) { - index3NullOffset = i3FirstNull; - } - } else { - i3FirstNull = -1; - } - // Set the index entries that compactData() skipped. - // Needed when the multi-stage index covers the fast index range as well. - int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; - while (++i < iNext) { - i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - index[i] = i3; - } - } - - // Examine index-3 blocks. For each determine one of: - // - same as the index-3 null block - // - same as a fast-index block - // - 16-bit indexes - // - 18-bit indexes - // We store this in the first flags entry for the index-3 block. - // - // Also determine an upper limit for the index-3 table length. - int32_t index3Capacity = 0; - i3FirstNull = index3NullOffset; - // If the fast index covers the whole BMP, then - // the multi-stage index is only for supplementary code points. - // Otherwise, the multi-stage index covers all of Unicode. - int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT; - int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; - for (int32_t i = iStart; i < iLimit;) { - int32_t j = i; - int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; - uint32_t oredI3 = 0; - bool isNull = true; - do { - uint32_t i3 = index[j]; - oredI3 |= i3; - if (i3 != (uint32_t)dataNullOffset) { - isNull = false; - } - } while (++j < jLimit); - if (isNull) { - flags[i] = I3_NULL; - if (i3FirstNull < 0) { - if (oredI3 <= 0xffff) { - index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; - } else { - index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; - } - i3FirstNull = 0; - } - } else { - if (oredI3 <= 0xffff) { - int32_t n = findSameBlock(fastIndex, 0, fastIndexLength, - index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); - if (n >= 0) { - flags[i] = I3_BMP; - index[i] = n; - } else { - flags[i] = I3_16; - index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; - } - } else { - flags[i] = I3_18; - index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; - } - } - i = j; - } - - int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3; - - // Length of the index-1 table, rounded up. - int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2; - - // Index table: Fast index, index-1, index-3, index-2. - // +1 for possible index table padding. - int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1; - index16 = (uint16_t *)uprv_malloc(index16Capacity * 2); - if (index16 == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - uprv_memcpy(index16, fastIndex, fastIndexLength * 2); - - // Compact the index-3 table and write an uncompacted version of the index-2 table. - uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity - int32_t i2Length = 0; - i3FirstNull = index3NullOffset; - int32_t index3Start = fastIndexLength + index1Length; - int32_t indexLength = index3Start; - for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) { - int32_t i3; - uint8_t f = flags[i]; - if (f == I3_NULL && i3FirstNull < 0) { - // First index-3 null block. Write & overlap it like a normal block, then remember it. - f = dataNullOffset <= 0xffff ? I3_16 : I3_18; - i3FirstNull = 0; - } - if (f == I3_NULL) { - i3 = index3NullOffset; - } else if (f == I3_BMP) { - i3 = index[i]; - } else if (f == I3_16) { - int32_t n = findSameBlock(index16, index3Start, indexLength, - index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); - if (n >= 0) { - i3 = n; - } else { - if (indexLength == index3Start) { - // No overlap at the boundary between the index-1 and index-3 tables. - n = 0; - } else { - n = getOverlap(index16, indexLength, - index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); - } - i3 = indexLength - n; - while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) { - index16[indexLength++] = index[i + n++]; - } - } - } else { - U_ASSERT(f == I3_18); - // Encode an index-3 block that contains one or more data indexes exceeding 16 bits. - int32_t j = i; - int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; - int32_t k = indexLength; - do { - ++k; - uint32_t v = index[j++]; - uint32_t upperBits = (v & 0x30000) >> 2; - index16[k++] = v; - v = index[j++]; - upperBits |= (v & 0x30000) >> 4; - index16[k++] = v; - v = index[j++]; - upperBits |= (v & 0x30000) >> 6; - index16[k++] = v; - v = index[j++]; - upperBits |= (v & 0x30000) >> 8; - index16[k++] = v; - v = index[j++]; - upperBits |= (v & 0x30000) >> 10; - index16[k++] = v; - v = index[j++]; - upperBits |= (v & 0x30000) >> 12; - index16[k++] = v; - v = index[j++]; - upperBits |= (v & 0x30000) >> 14; - index16[k++] = v; - v = index[j++]; - upperBits |= (v & 0x30000) >> 16; - index16[k++] = v; - index16[k - 9] = upperBits; - } while (j < jLimit); - int32_t n = findSameBlock(index16, index3Start, indexLength, - index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); - if (n >= 0) { - i3 = n | 0x8000; - } else { - if (indexLength == index3Start) { - // No overlap at the boundary between the index-1 and index-3 tables. - n = 0; - } else { - n = getOverlap(index16, indexLength, - index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); - } - i3 = (indexLength - n) | 0x8000; - if (n > 0) { - int32_t start = indexLength; - while (n < INDEX_3_18BIT_BLOCK_LENGTH) { - index16[indexLength++] = index16[start + n++]; - } - } else { - indexLength += INDEX_3_18BIT_BLOCK_LENGTH; - } - } - } - if (index3NullOffset < 0 && i3FirstNull >= 0) { - index3NullOffset = i3; - } - // Set the index-2 table entry. - index2[i2Length++] = i3; - } - U_ASSERT(i2Length == index2Capacity); - U_ASSERT(indexLength <= index3Start + index3Capacity); - - if (index3NullOffset < 0) { - index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; - } - if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { - // The index-3 offsets exceed 15 bits, or - // the last one cannot be distinguished from the no-null-block value. - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - // Compact the index-2 table and write the index-1 table. - int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH; - int32_t i1 = fastIndexLength; - for (int32_t i = 0; i < i2Length; i += blockLength) { - if ((i2Length - i) < blockLength) { - // highStart is inside the last index-2 block. Shorten it. - blockLength = i2Length - i; - } - int32_t i2; - int32_t n = findSameBlock(index16, index3Start, indexLength, - index2, i, blockLength); - if (n >= 0) { - i2 = n; - } else { - if (indexLength == index3Start) { - // No overlap at the boundary between the index-1 and index-3/2 tables. - n = 0; - } else { - n = getOverlap(index16, indexLength, index2, i, blockLength); - } - i2 = indexLength - n; - while (n < blockLength) { - index16[indexLength++] = index2[i + n++]; - } - } - // Set the index-1 table entry. - index16[i1++] = i2; - } - U_ASSERT(i1 == index3Start); - U_ASSERT(indexLength <= index16Capacity); - -#ifdef UCPTRIE_DEBUG - /* we saved some space */ - printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n", - (long)iLimit, (long)indexLength); -#endif - - return indexLength; -} - -int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { - // Find the real highStart and round it up. - U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); - highValue = get(MAX_UNICODE); - int32_t realHighStart = findHighStart(); - realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) & - ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); - if (realHighStart == UNICODE_LIMIT) { - highValue = initialValue; - } - -#ifdef UCPTRIE_DEBUG - printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", - (long)realHighStart, (long)highValue, (long)initialValue); -#endif - - // We always store indexes and data values for the fast range. - // Pin highStart to the top of that range while building. - UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3; - if (realHighStart < fastLimit) { - for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) { - flags[i] = ALL_SAME; - index[i] = highValue; - } - highStart = fastLimit; - } else { - highStart = realHighStart; - } - - uint32_t asciiData[ASCII_LIMIT]; - for (int32_t i = 0; i < ASCII_LIMIT; ++i) { - asciiData[i] = get(i); - } - - // First we look for which data blocks have the same value repeated over the whole block, - // deduplicate such blocks, find a good null data block (for faster enumeration), - // and get an upper bound for the necessary data array length. - AllSameBlocks allSameBlocks; - int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); - if (newDataCapacity < 0) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4); - if (newData == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - uprv_memcpy(newData, asciiData, sizeof(asciiData)); - - int32_t dataNullIndex = allSameBlocks.findMostUsed(); - int32_t newDataLength = compactData(fastILimit, newData, dataNullIndex); - U_ASSERT(newDataLength <= newDataCapacity); - uprv_free(data); - data = newData; - dataCapacity = newDataCapacity; - dataLength = newDataLength; - if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { - // The offset of the last data block is too high to be stored in the index table. - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - if (dataNullIndex >= 0) { - dataNullOffset = index[dataNullIndex]; -#ifdef UCPTRIE_DEBUG - if (data[dataNullOffset] != initialValue) { - printf("UCPTrie initialValue %lx -> more common nullValue %lx\n", - (long)initialValue, (long)data[dataNullOffset]); - } -#endif - initialValue = data[dataNullOffset]; - } else { - dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET; - } - - int32_t indexLength = compactIndex(fastILimit, errorCode); - highStart = realHighStart; - return indexLength; -} - -UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { - return nullptr; - } - if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || - valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - - // The mutable trie always stores 32-bit values. - // When we build a UCPTrie for a smaller value width, we first mask off unused bits - // before compacting the data. - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_32: - break; - case UCPTRIE_VALUE_BITS_16: - maskValues(0xffff); - break; - case UCPTRIE_VALUE_BITS_8: - maskValues(0xff); - break; - default: - break; - } - - UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT; - int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode); - if (U_FAILURE(errorCode)) { - clear(); - return nullptr; - } - - // Ensure data table alignment: The index length must be even for uint32_t data. - if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) { - index16[indexLength++] = 0xffee; // arbitrary value - } - - // Make the total trie structure length a multiple of 4 bytes by padding the data table, - // and store special values as the last two data values. - int32_t length = indexLength * 2; - if (valueWidth == UCPTRIE_VALUE_BITS_16) { - if (((indexLength ^ dataLength) & 1) != 0) { - // padding - data[dataLength++] = errorValue; - } - if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { - data[dataLength++] = highValue; - data[dataLength++] = errorValue; - } - length += dataLength * 2; - } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { - // 32-bit data words never need padding to a multiple of 4 bytes. - if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { - if (data[dataLength - 1] != highValue) { - data[dataLength++] = highValue; - } - data[dataLength++] = errorValue; - } - length += dataLength * 4; - } else { - int32_t and3 = (length + dataLength) & 3; - if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { - // all set - } else if(and3 == 3 && data[dataLength - 1] == highValue) { - data[dataLength++] = errorValue; - } else { - while (and3 != 2) { - data[dataLength++] = highValue; - and3 = (and3 + 1) & 3; - } - data[dataLength++] = highValue; - data[dataLength++] = errorValue; - } - length += dataLength; - } - - // Calculate the total length of the UCPTrie as a single memory block. - length += sizeof(UCPTrie); - U_ASSERT((length & 3) == 0); - - uint8_t *bytes = (uint8_t *)uprv_malloc(length); - if (bytes == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - clear(); - return nullptr; - } - UCPTrie *trie = reinterpret_cast(bytes); - uprv_memset(trie, 0, sizeof(UCPTrie)); - trie->indexLength = indexLength; - trie->dataLength = dataLength; - - trie->highStart = highStart; - // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes. - // Runtime code needs to then test for the real highStart as well. - trie->shifted12HighStart = (highStart + 0xfff) >> 12; - trie->type = type; - trie->valueWidth = valueWidth; - - trie->index3NullOffset = index3NullOffset; - trie->dataNullOffset = dataNullOffset; - trie->nullValue = initialValue; - - bytes += sizeof(UCPTrie); - - // Fill the index and data arrays. - uint16_t *dest16 = (uint16_t *)bytes; - trie->index = dest16; - - if (highStart <= fastLimit) { - // Condense only the fast index from the mutable-trie index. - for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { - *dest16++ = (uint16_t)index[i]; // dest16[j] - } - } else { - uprv_memcpy(dest16, index16, indexLength * 2); - dest16 += indexLength; - } - bytes += indexLength * 2; - - // Write the data array. - const uint32_t *p = data; - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - // Write 16-bit data values. - trie->data.ptr16 = dest16; - for (int32_t i = dataLength; i > 0; --i) { - *dest16++ = (uint16_t)*p++; - } - break; - case UCPTRIE_VALUE_BITS_32: - // Write 32-bit data values. - trie->data.ptr32 = (uint32_t *)bytes; - uprv_memcpy(bytes, p, (size_t)dataLength * 4); - break; - case UCPTRIE_VALUE_BITS_8: - // Write 8-bit data values. - trie->data.ptr8 = bytes; - for (int32_t i = dataLength; i > 0; --i) { - *bytes++ = (uint8_t)*p++; - } - break; - default: - // Will not occur, valueWidth checked at the beginning. - break; - } - -#ifdef UCPTRIE_DEBUG - trie->name = name; - - ucptrie_printLengths(trie, ""); -#endif - - clear(); - return trie; -} - -} // namespace - -U_NAMESPACE_END - -U_NAMESPACE_USE - -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - LocalPointer trie( - new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode); - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - return reinterpret_cast(trie.orphan()); -} - -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - if (other == nullptr) { - return nullptr; - } - LocalPointer clone( - new MutableCodePointTrie(*reinterpret_cast(other), *pErrorCode), *pErrorCode); - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - return reinterpret_cast(clone.orphan()); -} - -U_CAPI void U_EXPORT2 -umutablecptrie_close(UMutableCPTrie *trie) { - delete reinterpret_cast(trie); -} - -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - if (map == nullptr) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - return reinterpret_cast(MutableCodePointTrie::fromUCPMap(map, *pErrorCode)); -} - -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - if (trie == nullptr) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - return reinterpret_cast(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode)); -} - -U_CAPI uint32_t U_EXPORT2 -umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) { - return reinterpret_cast(trie)->get(c); -} - -namespace { - -UChar32 getRange(const void *trie, UChar32 start, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - return reinterpret_cast(trie)-> - getRange(start, filter, context, pValue); -} - -} // namespace - -U_CAPI UChar32 U_EXPORT2 -umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - return ucptrie_internalGetRange(getRange, trie, start, - option, surrogateValue, - filter, context, pValue); -} - -U_CAPI void U_EXPORT2 -umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return; - } - reinterpret_cast(trie)->set(c, value, *pErrorCode); -} - -U_CAPI void U_EXPORT2 -umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end, - uint32_t value, UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return; - } - reinterpret_cast(trie)->setRange(start, end, value, *pErrorCode); -} - -/* Compact and internally serialize the trie. */ -U_CAPI UCPTrie * U_EXPORT2 -umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - return reinterpret_cast(trie)->build(type, valueWidth, *pErrorCode); -} - -#ifdef UCPTRIE_DEBUG -U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) { - reinterpret_cast(trie)->name = name; -} -#endif diff --git a/deps/node/deps/icu-small/source/common/umutex.cpp b/deps/node/deps/icu-small/source/common/umutex.cpp deleted file mode 100644 index cbbd66cb..00000000 --- a/deps/node/deps/icu-small/source/common/umutex.cpp +++ /dev/null @@ -1,378 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File umutex.cpp -* -* Modification History: -* -* Date Name Description -* 04/02/97 aliu Creation. -* 04/07/99 srl updated -* 05/13/99 stephen Changed to umutex (from cmutex). -* 11/22/99 aliu Make non-global mutex autoinitialize [j151] -****************************************************************************** -*/ - -#include "umutex.h" - -#include "unicode/utypes.h" -#include "uassert.h" -#include "cmemory.h" - - -// The ICU global mutex. Used when ICU implementation code passes NULL for the mutex pointer. -static UMutex globalMutex = U_MUTEX_INITIALIZER; - -/* - * ICU Mutex wrappers. Wrap operating system mutexes, giving the rest of ICU a - * platform independent set of mutex operations. For internal ICU use only. - */ - -#if defined(U_USER_MUTEX_CPP) -// Build time user mutex hook: #include "U_USER_MUTEX_CPP" -#include U_MUTEX_XSTR(U_USER_MUTEX_CPP) - -#elif U_PLATFORM_USES_ONLY_WIN32_API - -#if defined U_NO_PLATFORM_ATOMICS -#error ICU on Win32 requires support for low level atomic operations. -// Visual Studio, gcc, clang are OK. Shouldn't get here. -#endif - - -// This function is called when a test of a UInitOnce::fState reveals that -// initialization has not completed, that we either need to call the -// function on this thread, or wait for some other thread to complete. -// -// The actual call to the init function is made inline by template code -// that knows the C++ types involved. This function returns TRUE if -// the caller needs to call the Init function. -// - -U_NAMESPACE_BEGIN - -U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &uio) { - for (;;) { - int32_t previousState = InterlockedCompareExchange( - (LONG volatile *) // this is the type given in the API doc for this function. - &uio.fState, // Destination - 1, // Exchange Value - 0); // Compare value - - if (previousState == 0) { - return true; // Caller will next call the init function. - // Current state == 1. - } else if (previousState == 2) { - // Another thread already completed the initialization. - // We can simply return FALSE, indicating no - // further action is needed by the caller. - return FALSE; - } else { - // Another thread is currently running the initialization. - // Wait until it completes. - do { - Sleep(1); - previousState = umtx_loadAcquire(uio.fState); - } while (previousState == 1); - } - } -} - -// This function is called by the thread that ran an initialization function, -// just after completing the function. - -U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &uio) { - umtx_storeRelease(uio.fState, 2); -} - -U_NAMESPACE_END - -static void winMutexInit(CRITICAL_SECTION *cs) { - InitializeCriticalSection(cs); - return; -} - -U_CAPI void U_EXPORT2 -umtx_lock(UMutex *mutex) { - if (mutex == NULL) { - mutex = &globalMutex; - } - CRITICAL_SECTION *cs = &mutex->fCS; - umtx_initOnce(mutex->fInitOnce, winMutexInit, cs); - EnterCriticalSection(cs); -} - -U_CAPI void U_EXPORT2 -umtx_unlock(UMutex* mutex) -{ - if (mutex == NULL) { - mutex = &globalMutex; - } - LeaveCriticalSection(&mutex->fCS); -} - - -U_CAPI void U_EXPORT2 -umtx_condBroadcast(UConditionVar *condition) { - // We require that the associated mutex be held by the caller, - // so access to fWaitCount is protected and safe. No other thread can - // call condWait() while we are here. - if (condition->fWaitCount == 0) { - return; - } - ResetEvent(condition->fExitGate); - SetEvent(condition->fEntryGate); -} - -U_CAPI void U_EXPORT2 -umtx_condSignal(UConditionVar * /* condition */) { - // Function not implemented. There is no immediate requirement from ICU to have it. - // Once ICU drops support for Windows XP and Server 2003, ICU Condition Variables will be - // changed to be thin wrappers on native Windows CONDITION_VARIABLEs, and this function - // becomes trivial to provide. - U_ASSERT(FALSE); -} - -U_CAPI void U_EXPORT2 -umtx_condWait(UConditionVar *condition, UMutex *mutex) { - if (condition->fEntryGate == NULL) { - // Note: because the associated mutex must be locked when calling - // wait, we know that there can not be multiple threads - // running here with the same condition variable. - // Meaning that lazy initialization is safe. - U_ASSERT(condition->fExitGate == NULL); - condition->fEntryGate = CreateEvent(NULL, // Security Attributes - TRUE, // Manual Reset - FALSE, // Initially reset - NULL); // Name. - U_ASSERT(condition->fEntryGate != NULL); - condition->fExitGate = CreateEvent(NULL, TRUE, TRUE, NULL); - U_ASSERT(condition->fExitGate != NULL); - } - - condition->fWaitCount++; - umtx_unlock(mutex); - WaitForSingleObject(condition->fEntryGate, INFINITE); - umtx_lock(mutex); - condition->fWaitCount--; - if (condition->fWaitCount == 0) { - // All threads that were waiting at the entry gate have woken up - // and moved through. Shut the entry gate and open the exit gate. - ResetEvent(condition->fEntryGate); - SetEvent(condition->fExitGate); - } else { - umtx_unlock(mutex); - WaitForSingleObject(condition->fExitGate, INFINITE); - umtx_lock(mutex); - } -} - - -#elif U_PLATFORM_IMPLEMENTS_POSIX - -//------------------------------------------------------------------------------------------- -// -// POSIX specific definitions -// -//------------------------------------------------------------------------------------------- - -# include - -// Each UMutex consists of a pthread_mutex_t. -// All are statically initialized and ready for use. -// There is no runtime mutex initialization code needed. - -U_CAPI void U_EXPORT2 -umtx_lock(UMutex *mutex) { - if (mutex == NULL) { - mutex = &globalMutex; - } - int sysErr = pthread_mutex_lock(&mutex->fMutex); - (void)sysErr; // Suppress unused variable warnings. - U_ASSERT(sysErr == 0); -} - - -U_CAPI void U_EXPORT2 -umtx_unlock(UMutex* mutex) -{ - if (mutex == NULL) { - mutex = &globalMutex; - } - int sysErr = pthread_mutex_unlock(&mutex->fMutex); - (void)sysErr; // Suppress unused variable warnings. - U_ASSERT(sysErr == 0); -} - - -U_CAPI void U_EXPORT2 -umtx_condWait(UConditionVar *cond, UMutex *mutex) { - if (mutex == NULL) { - mutex = &globalMutex; - } - int sysErr = pthread_cond_wait(&cond->fCondition, &mutex->fMutex); - (void)sysErr; - U_ASSERT(sysErr == 0); -} - -U_CAPI void U_EXPORT2 -umtx_condBroadcast(UConditionVar *cond) { - int sysErr = pthread_cond_broadcast(&cond->fCondition); - (void)sysErr; - U_ASSERT(sysErr == 0); -} - -U_CAPI void U_EXPORT2 -umtx_condSignal(UConditionVar *cond) { - int sysErr = pthread_cond_signal(&cond->fCondition); - (void)sysErr; - U_ASSERT(sysErr == 0); -} - - - -U_NAMESPACE_BEGIN - -static pthread_mutex_t initMutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t initCondition = PTHREAD_COND_INITIALIZER; - - -// This function is called when a test of a UInitOnce::fState reveals that -// initialization has not completed, that we either need to call the -// function on this thread, or wait for some other thread to complete. -// -// The actual call to the init function is made inline by template code -// that knows the C++ types involved. This function returns TRUE if -// the caller needs to call the Init function. -// -U_COMMON_API UBool U_EXPORT2 -umtx_initImplPreInit(UInitOnce &uio) { - pthread_mutex_lock(&initMutex); - int32_t state = uio.fState; - if (state == 0) { - umtx_storeRelease(uio.fState, 1); - pthread_mutex_unlock(&initMutex); - return TRUE; // Caller will next call the init function. - } else { - while (uio.fState == 1) { - // Another thread is currently running the initialization. - // Wait until it completes. - pthread_cond_wait(&initCondition, &initMutex); - } - pthread_mutex_unlock(&initMutex); - U_ASSERT(uio.fState == 2); - return FALSE; - } -} - - - -// This function is called by the thread that ran an initialization function, -// just after completing the function. -// Some threads may be waiting on the condition, requiring the broadcast wakeup. -// Some threads may be racing to test the fState variable outside of the mutex, -// requiring the use of store/release when changing its value. - -U_COMMON_API void U_EXPORT2 -umtx_initImplPostInit(UInitOnce &uio) { - pthread_mutex_lock(&initMutex); - umtx_storeRelease(uio.fState, 2); - pthread_cond_broadcast(&initCondition); - pthread_mutex_unlock(&initMutex); -} - -U_NAMESPACE_END - -// End of POSIX specific umutex implementation. - -#else // Platform #define chain. - -#error Unknown Platform - -#endif // Platform #define chain. - - -//------------------------------------------------------------------------------- -// -// Atomic Operations, out-of-line versions. -// These are conditional, only defined if better versions -// were not available for the platform. -// -// These versions are platform neutral. -// -//-------------------------------------------------------------------------------- - -#if defined U_NO_PLATFORM_ATOMICS -static UMutex gIncDecMutex = U_MUTEX_INITIALIZER; - -U_NAMESPACE_BEGIN - -U_COMMON_API int32_t U_EXPORT2 -umtx_atomic_inc(u_atomic_int32_t *p) { - int32_t retVal; - umtx_lock(&gIncDecMutex); - retVal = ++(*p); - umtx_unlock(&gIncDecMutex); - return retVal; -} - - -U_COMMON_API int32_t U_EXPORT2 -umtx_atomic_dec(u_atomic_int32_t *p) { - int32_t retVal; - umtx_lock(&gIncDecMutex); - retVal = --(*p); - umtx_unlock(&gIncDecMutex); - return retVal; -} - -U_COMMON_API int32_t U_EXPORT2 -umtx_loadAcquire(u_atomic_int32_t &var) { - umtx_lock(&gIncDecMutex); - int32_t val = var; - umtx_unlock(&gIncDecMutex); - return val; -} - -U_COMMON_API void U_EXPORT2 -umtx_storeRelease(u_atomic_int32_t &var, int32_t val) { - umtx_lock(&gIncDecMutex); - var = val; - umtx_unlock(&gIncDecMutex); -} - -U_NAMESPACE_END -#endif - -//-------------------------------------------------------------------------- -// -// Deprecated functions for setting user mutexes. -// -//-------------------------------------------------------------------------- - -U_DEPRECATED void U_EXPORT2 -u_setMutexFunctions(const void * /*context */, UMtxInitFn *, UMtxFn *, - UMtxFn *, UMtxFn *, UErrorCode *status) { - if (U_SUCCESS(*status)) { - *status = U_UNSUPPORTED_ERROR; - } - return; -} - - - -U_DEPRECATED void U_EXPORT2 -u_setAtomicIncDecFunctions(const void * /*context */, UMtxAtomicFn *, UMtxAtomicFn *, - UErrorCode *status) { - if (U_SUCCESS(*status)) { - *status = U_UNSUPPORTED_ERROR; - } - return; -} diff --git a/deps/node/deps/icu-small/source/common/umutex.h b/deps/node/deps/icu-small/source/common/umutex.h deleted file mode 100644 index 37e49871..00000000 --- a/deps/node/deps/icu-small/source/common/umutex.h +++ /dev/null @@ -1,458 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File UMUTEX.H -* -* Modification History: -* -* Date Name Description -* 04/02/97 aliu Creation. -* 04/07/99 srl rewrite - C interface, multiple mutices -* 05/13/99 stephen Changed to umutex (from cmutex) -****************************************************************************** -*/ - -#ifndef UMUTEX_H -#define UMUTEX_H - -#include "unicode/utypes.h" -#include "unicode/uclean.h" -#include "putilimp.h" - - - -// Forward Declarations. UMutex is not in the ICU namespace (yet) because -// there are some remaining references from plain C. -struct UMutex; -struct UConditionVar; - -U_NAMESPACE_BEGIN -struct UInitOnce; -U_NAMESPACE_END - -// Stringify macros, to allow #include of user supplied atomic & mutex files. -#define U_MUTEX_STR(s) #s -#define U_MUTEX_XSTR(s) U_MUTEX_STR(s) - -/**************************************************************************** - * - * Low Level Atomic Operations. - * Compiler dependent. Not operating system dependent. - * - ****************************************************************************/ -#if defined (U_USER_ATOMICS_H) -#include U_MUTEX_XSTR(U_USER_ATOMICS_H) - -#elif U_HAVE_STD_ATOMICS - -// C++11 atomics are available. - -#include - -U_NAMESPACE_BEGIN - -// Export an explicit template instantiation of std::atomic. -// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class. -// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. -#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -template struct U_COMMON_API std::atomic; -#endif - -typedef std::atomic u_atomic_int32_t; -#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val) - -inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) { - return var.load(std::memory_order_acquire); -} - -inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) { - var.store(val, std::memory_order_release); -} - -inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) { - return var->fetch_add(1) + 1; -} - -inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) { - return var->fetch_sub(1) - 1; -} -U_NAMESPACE_END - -#elif U_PLATFORM_HAS_WIN32_API - -// MSVC compiler. Reads and writes of volatile variables have -// acquire and release memory semantics, respectively. -// This is a Microsoft extension, not standard C++ behavior. -// -// Update: can't use this because of MinGW, built with gcc. -// Original plan was to use gcc atomics for MinGW, but they -// aren't supported, so we fold MinGW into this path. - -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include - -U_NAMESPACE_BEGIN -typedef volatile LONG u_atomic_int32_t; -#define ATOMIC_INT32_T_INITIALIZER(val) val - -inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) { - return InterlockedCompareExchange(&var, 0, 0); -} - -inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) { - InterlockedExchange(&var, val); -} - - -inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) { - return InterlockedIncrement(var); -} - -inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) { - return InterlockedDecrement(var); -} -U_NAMESPACE_END - - -#elif U_HAVE_CLANG_ATOMICS -/* - * Clang __c11 atomic built-ins - */ - -U_NAMESPACE_BEGIN -typedef _Atomic(int32_t) u_atomic_int32_t; -#define ATOMIC_INT32_T_INITIALIZER(val) val - -inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) { - return __c11_atomic_load(&var, __ATOMIC_ACQUIRE); -} - -inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) { - return __c11_atomic_store(&var, val, __ATOMIC_RELEASE); -} - -inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) { - return __c11_atomic_fetch_add(var, 1, __ATOMIC_SEQ_CST) + 1; -} - -inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) { - return __c11_atomic_fetch_sub(var, 1, __ATOMIC_SEQ_CST) - 1; -} -U_NAMESPACE_END - - -#elif U_HAVE_GCC_ATOMICS -/* - * gcc atomic ops. These are available on several other compilers as well. - */ - -U_NAMESPACE_BEGIN -typedef int32_t u_atomic_int32_t; -#define ATOMIC_INT32_T_INITIALIZER(val) val - -inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) { - int32_t val = var; - __sync_synchronize(); - return val; -} - -inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) { - __sync_synchronize(); - var = val; -} - -inline int32_t umtx_atomic_inc(u_atomic_int32_t *p) { - return __sync_add_and_fetch(p, 1); -} - -inline int32_t umtx_atomic_dec(u_atomic_int32_t *p) { - return __sync_sub_and_fetch(p, 1); -} -U_NAMESPACE_END - -#else - -/* - * Unknown Platform. Use out-of-line functions, which in turn use mutexes. - * Slow but correct. - */ - -#define U_NO_PLATFORM_ATOMICS - -U_NAMESPACE_BEGIN -typedef int32_t u_atomic_int32_t; -#define ATOMIC_INT32_T_INITIALIZER(val) val - -U_COMMON_API int32_t U_EXPORT2 -umtx_loadAcquire(u_atomic_int32_t &var); - -U_COMMON_API void U_EXPORT2 -umtx_storeRelease(u_atomic_int32_t &var, int32_t val); - -U_COMMON_API int32_t U_EXPORT2 -umtx_atomic_inc(u_atomic_int32_t *p); - -U_COMMON_API int32_t U_EXPORT2 -umtx_atomic_dec(u_atomic_int32_t *p); - -U_NAMESPACE_END - -#endif /* Low Level Atomic Ops Platform Chain */ - - - -/************************************************************************************************* - * - * UInitOnce Definitions. - * These are platform neutral. - * - *************************************************************************************************/ - -U_NAMESPACE_BEGIN - -struct UInitOnce { - u_atomic_int32_t fState; - UErrorCode fErrCode; - void reset() {fState = 0;}; - UBool isReset() {return umtx_loadAcquire(fState) == 0;}; -// Note: isReset() is used by service registration code. -// Thread safety of this usage needs review. -}; - -#define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR} - - -U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &); -U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &); - -template void umtx_initOnce(UInitOnce &uio, T *obj, void (U_CALLCONV T::*fp)()) { - if (umtx_loadAcquire(uio.fState) == 2) { - return; - } - if (umtx_initImplPreInit(uio)) { - (obj->*fp)(); - umtx_initImplPostInit(uio); - } -} - - -// umtx_initOnce variant for plain functions, or static class functions. -// No context parameter. -inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)()) { - if (umtx_loadAcquire(uio.fState) == 2) { - return; - } - if (umtx_initImplPreInit(uio)) { - (*fp)(); - umtx_initImplPostInit(uio); - } -} - -// umtx_initOnce variant for plain functions, or static class functions. -// With ErrorCode, No context parameter. -inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(UErrorCode &), UErrorCode &errCode) { - if (U_FAILURE(errCode)) { - return; - } - if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) { - // We run the initialization. - (*fp)(errCode); - uio.fErrCode = errCode; - umtx_initImplPostInit(uio); - } else { - // Someone else already ran the initialization. - if (U_FAILURE(uio.fErrCode)) { - errCode = uio.fErrCode; - } - } -} - -// umtx_initOnce variant for plain functions, or static class functions, -// with a context parameter. -template void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T), T context) { - if (umtx_loadAcquire(uio.fState) == 2) { - return; - } - if (umtx_initImplPreInit(uio)) { - (*fp)(context); - umtx_initImplPostInit(uio); - } -} - -// umtx_initOnce variant for plain functions, or static class functions, -// with a context parameter and an error code. -template void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UErrorCode &), T context, UErrorCode &errCode) { - if (U_FAILURE(errCode)) { - return; - } - if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) { - // We run the initialization. - (*fp)(context, errCode); - uio.fErrCode = errCode; - umtx_initImplPostInit(uio); - } else { - // Someone else already ran the initialization. - if (U_FAILURE(uio.fErrCode)) { - errCode = uio.fErrCode; - } - } -} - -U_NAMESPACE_END - - - -/************************************************************************************************* - * - * Mutex Definitions. Platform Dependent, #if platform chain follows. - * TODO: Add a C++11 version. - * Need to convert all mutex using files to C++ first. - * - *************************************************************************************************/ - -#if defined(U_USER_MUTEX_H) -// #include "U_USER_MUTEX_H" -#include U_MUTEX_XSTR(U_USER_MUTEX_H) - -#elif U_PLATFORM_USES_ONLY_WIN32_API - -/* For CRITICAL_SECTION */ - -/* - * Note: there is an earlier include of windows.h in this file, but it is in - * different conditionals. - * This one is needed if we are using C++11 for atomic ops, but - * win32 APIs for Critical Sections. - */ - -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -# ifndef NOMINMAX -# define NOMINMAX -# endif -# include - - -typedef struct UMutex { - icu::UInitOnce fInitOnce; - CRITICAL_SECTION fCS; -} UMutex; - -/* Initializer for a static UMUTEX. Deliberately contains no value for the - * CRITICAL_SECTION. - */ -#define U_MUTEX_INITIALIZER {U_INITONCE_INITIALIZER} - -struct UConditionVar { - HANDLE fEntryGate; - HANDLE fExitGate; - int32_t fWaitCount; -}; - -#define U_CONDITION_INITIALIZER {NULL, NULL, 0} - - - -#elif U_PLATFORM_IMPLEMENTS_POSIX - -/* - * POSIX platform - */ - -#include - -struct UMutex { - pthread_mutex_t fMutex; -}; -typedef struct UMutex UMutex; -#define U_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER} - -struct UConditionVar { - pthread_cond_t fCondition; -}; -#define U_CONDITION_INITIALIZER {PTHREAD_COND_INITIALIZER} - -#else - -/* - * Unknown platform type. - * This is an error condition. ICU requires mutexes. - */ - -#error Unknown Platform. - -#endif - - - -/************************************************************************************** - * - * Mutex Implementation function declarations. - * Declarations are platform neutral. - * Implementations, in umutex.cpp, are platform specific. - * - ************************************************************************************/ - -/* Lock a mutex. - * @param mutex The given mutex to be locked. Pass NULL to specify - * the global ICU mutex. Recursive locks are an error - * and may cause a deadlock on some platforms. - */ -U_INTERNAL void U_EXPORT2 umtx_lock(UMutex* mutex); - -/* Unlock a mutex. - * @param mutex The given mutex to be unlocked. Pass NULL to specify - * the global ICU mutex. - */ -U_INTERNAL void U_EXPORT2 umtx_unlock (UMutex* mutex); - -/* - * Wait on a condition variable. - * The calling thread will unlock the mutex and wait on the condition variable. - * The mutex must be locked by the calling thread when invoking this function. - * - * @param cond the condition variable to wait on. - * @param mutex the associated mutex. - */ - -U_INTERNAL void U_EXPORT2 umtx_condWait(UConditionVar *cond, UMutex *mutex); - - -/* - * Broadcast wakeup of all threads waiting on a Condition. - * The associated mutex must be locked by the calling thread when calling - * this function; this is a temporary ICU restriction. - * - * @param cond the condition variable. - */ -U_INTERNAL void U_EXPORT2 umtx_condBroadcast(UConditionVar *cond); - -/* - * Signal a condition variable, waking up one waiting thread. - * CAUTION: Do not use. Place holder only. Not implemented for Windows. - */ -U_INTERNAL void U_EXPORT2 umtx_condSignal(UConditionVar *cond); - -#endif /* UMUTEX_H */ -/*eof*/ diff --git a/deps/node/deps/icu-small/source/common/unames.cpp b/deps/node/deps/icu-small/source/common/unames.cpp deleted file mode 100644 index 5f752b0d..00000000 --- a/deps/node/deps/icu-small/source/common/unames.cpp +++ /dev/null @@ -1,2101 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: unames.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999oct04 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/uchar.h" -#include "unicode/udata.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "uassert.h" -#include "ustr_imp.h" -#include "umutex.h" -#include "cmemory.h" -#include "cstring.h" -#include "ucln_cmn.h" -#include "udataswp.h" -#include "uprops.h" - -U_NAMESPACE_BEGIN - -/* prototypes ------------------------------------------------------------- */ - -static const char DATA_NAME[] = "unames"; -static const char DATA_TYPE[] = "icu"; - -#define GROUP_SHIFT 5 -#define LINES_PER_GROUP (1L<groupsOffset) - -typedef struct { - const char *otherName; - UChar32 code; -} FindName; - -#define DO_FIND_NAME NULL - -static UDataMemory *uCharNamesData=NULL; -static UCharNames *uCharNames=NULL; -static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER; - -/* - * Maximum length of character names (regular & 1.0). - */ -static int32_t gMaxNameLength=0; - -/* - * Set of chars used in character names (regular & 1.0). - * Chars are platform-dependent (can be EBCDIC). - */ -static uint32_t gNameSet[8]={ 0 }; - -#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT -#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1 -#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2 - -#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3) - -static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = { - "unassigned", - "uppercase letter", - "lowercase letter", - "titlecase letter", - "modifier letter", - "other letter", - "non spacing mark", - "enclosing mark", - "combining spacing mark", - "decimal digit number", - "letter number", - "other number", - "space separator", - "line separator", - "paragraph separator", - "control", - "format", - "private use area", - "surrogate", - "dash punctuation", - "start punctuation", - "end punctuation", - "connector punctuation", - "other punctuation", - "math symbol", - "currency symbol", - "modifier symbol", - "other symbol", - "initial punctuation", - "final punctuation", - "noncharacter", - "lead surrogate", - "trail surrogate" -}; - -/* implementation ----------------------------------------------------------- */ - -static UBool U_CALLCONV unames_cleanup(void) -{ - if(uCharNamesData) { - udata_close(uCharNamesData); - uCharNamesData = NULL; - } - if(uCharNames) { - uCharNames = NULL; - } - gCharNamesInitOnce.reset(); - gMaxNameLength=0; - return TRUE; -} - -static UBool U_CALLCONV -isAcceptable(void * /*context*/, - const char * /*type*/, const char * /*name*/, - const UDataInfo *pInfo) { - return (UBool)( - pInfo->size>=20 && - pInfo->isBigEndian==U_IS_BIG_ENDIAN && - pInfo->charsetFamily==U_CHARSET_FAMILY && - pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ - pInfo->dataFormat[1]==0x6e && - pInfo->dataFormat[2]==0x61 && - pInfo->dataFormat[3]==0x6d && - pInfo->formatVersion[0]==1); -} - -static void U_CALLCONV -loadCharNames(UErrorCode &status) { - U_ASSERT(uCharNamesData == NULL); - U_ASSERT(uCharNames == NULL); - - uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status); - if(U_FAILURE(status)) { - uCharNamesData = NULL; - } else { - uCharNames = (UCharNames *)udata_getMemory(uCharNamesData); - } - ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup); -} - - -static UBool -isDataLoaded(UErrorCode *pErrorCode) { - umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode); - return U_SUCCESS(*pErrorCode); -} - -#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \ - if((bufferLength)>0) { \ - *(buffer)++=c; \ - --(bufferLength); \ - } \ - ++(bufferPos); \ -} - -#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT - -/* - * Important: expandName() and compareName() are almost the same - - * apply fixes to both. - * - * UnicodeData.txt uses ';' as a field separator, so no - * field can contain ';' as part of its contents. - * In unames.dat, it is marked as token[';']==-1 only if the - * semicolon is used in the data file - which is iff we - * have Unicode 1.0 names or ISO comments or aliases. - * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases - * although we know that it will never be part of a name. - */ -static uint16_t -expandName(UCharNames *names, - const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, - char *buffer, uint16_t bufferLength) { - uint16_t *tokens=(uint16_t *)names+8; - uint16_t token, tokenCount=*tokens++, bufferPos=0; - uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; - uint8_t c; - - if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { - /* - * skip the modern name if it is not requested _and_ - * if the semicolon byte value is a character, not a token number - */ - if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { - int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; - do { - while(nameLength>0) { - --nameLength; - if(*name++==';') { - break; - } - } - } while(--fieldIndex>0); - } else { - /* - * the semicolon byte value is a token number, therefore - * only modern names are stored in unames.dat and there is no - * such requested alternate name here - */ - nameLength=0; - } - } - - /* write each letter directly, and write a token word per token */ - while(nameLength>0) { - --nameLength; - c=*name++; - - if(c>=tokenCount) { - if(c!=';') { - /* implicit letter */ - WRITE_CHAR(buffer, bufferLength, bufferPos, c); - } else { - /* finished */ - break; - } - } else { - token=tokens[c]; - if(token==(uint16_t)(-2)) { - /* this is a lead byte for a double-byte token */ - token=tokens[c<<8|*name++]; - --nameLength; - } - if(token==(uint16_t)(-1)) { - if(c!=';') { - /* explicit letter */ - WRITE_CHAR(buffer, bufferLength, bufferPos, c); - } else { - /* stop, but skip the semicolon if we are seeking - extended names and there was no 2.0 name but there - is a 1.0 name. */ - if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) { - if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { - continue; - } - } - /* finished */ - break; - } - } else { - /* write token word */ - uint8_t *tokenString=tokenStrings+token; - while((c=*tokenString++)!=0) { - WRITE_CHAR(buffer, bufferLength, bufferPos, c); - } - } - } - } - - /* zero-terminate */ - if(bufferLength>0) { - *buffer=0; - } - - return bufferPos; -} - -/* - * compareName() is almost the same as expandName() except that it compares - * the currently expanded name to an input name. - * It returns the match/no match result as soon as possible. - */ -static UBool -compareName(UCharNames *names, - const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, - const char *otherName) { - uint16_t *tokens=(uint16_t *)names+8; - uint16_t token, tokenCount=*tokens++; - uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; - uint8_t c; - const char *origOtherName = otherName; - - if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { - /* - * skip the modern name if it is not requested _and_ - * if the semicolon byte value is a character, not a token number - */ - if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { - int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; - do { - while(nameLength>0) { - --nameLength; - if(*name++==';') { - break; - } - } - } while(--fieldIndex>0); - } else { - /* - * the semicolon byte value is a token number, therefore - * only modern names are stored in unames.dat and there is no - * such requested alternate name here - */ - nameLength=0; - } - } - - /* compare each letter directly, and compare a token word per token */ - while(nameLength>0) { - --nameLength; - c=*name++; - - if(c>=tokenCount) { - if(c!=';') { - /* implicit letter */ - if((char)c!=*otherName++) { - return FALSE; - } - } else { - /* finished */ - break; - } - } else { - token=tokens[c]; - if(token==(uint16_t)(-2)) { - /* this is a lead byte for a double-byte token */ - token=tokens[c<<8|*name++]; - --nameLength; - } - if(token==(uint16_t)(-1)) { - if(c!=';') { - /* explicit letter */ - if((char)c!=*otherName++) { - return FALSE; - } - } else { - /* stop, but skip the semicolon if we are seeking - extended names and there was no 2.0 name but there - is a 1.0 name. */ - if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) { - if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { - continue; - } - } - /* finished */ - break; - } - } else { - /* write token word */ - uint8_t *tokenString=tokenStrings+token; - while((c=*tokenString++)!=0) { - if((char)c!=*otherName++) { - return FALSE; - } - } - } - } - } - - /* complete match? */ - return (UBool)(*otherName==0); -} - -static uint8_t getCharCat(UChar32 cp) { - uint8_t cat; - - if (U_IS_UNICODE_NONCHAR(cp)) { - return U_NONCHARACTER_CODE_POINT; - } - - if ((cat = u_charType(cp)) == U_SURROGATE) { - cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE; - } - - return cat; -} - -static const char *getCharCatName(UChar32 cp) { - uint8_t cat = getCharCat(cp); - - /* Return unknown if the table of names above is not up to - date. */ - - if (cat >= UPRV_LENGTHOF(charCatNames)) { - return "unknown"; - } else { - return charCatNames[cat]; - } -} - -static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { - const char *catname = getCharCatName(code); - uint16_t length = 0; - - UChar32 cp; - int ndigits, i; - - WRITE_CHAR(buffer, bufferLength, length, '<'); - while (catname[length - 1]) { - WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]); - } - WRITE_CHAR(buffer, bufferLength, length, '-'); - for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4) - ; - if (ndigits < 4) - ndigits = 4; - for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) { - uint8_t v = (uint8_t)(cp & 0xf); - buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); - } - buffer += ndigits; - length += static_cast(ndigits); - WRITE_CHAR(buffer, bufferLength, length, '>'); - - return length; -} - -/* - * getGroup() does a binary search for the group that contains the - * Unicode code point "code". - * The return value is always a valid Group* that may contain "code" - * or else is the highest group before "code". - * If the lowest group is after "code", then that one is returned. - */ -static const uint16_t * -getGroup(UCharNames *names, uint32_t code) { - const uint16_t *groups=GET_GROUPS(names); - uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT), - start=0, - limit=*groups++, - number; - - /* binary search for the group of names that contains the one for code */ - while(start=0xc, then it forms a length value with the following nibble. - * Calculation see below. - * The offsets and lengths arrays must be at least 33 (one more) long because - * there is no check here at the end if the last nibble is still used. - */ -static const uint8_t * -expandGroupLengths(const uint8_t *s, - uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) { - /* read the lengths of the 32 strings in this group and get each string's offset */ - uint16_t i=0, offset=0, length=0; - uint8_t lengthByte; - - /* all 32 lengths must be read to get the offset of the first group string */ - while(i=12) { - /* double-nibble length spread across two bytes */ - length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12); - lengthByte&=0xf; - } else if((lengthByte /* &0xf0 */)>=0xc0) { - /* double-nibble length spread across this one byte */ - length=(uint16_t)((lengthByte&0x3f)+12); - } else { - /* single-nibble length in MSBs */ - length=(uint16_t)(lengthByte>>4); - lengthByte&=0xf; - } - - *offsets++=offset; - *lengths++=length; - - offset+=length; - ++i; - - /* read odd nibble - LSBs of lengthByte */ - if((lengthByte&0xf0)==0) { - /* this nibble was not consumed for a double-nibble length above */ - length=lengthByte; - if(length<12) { - /* single-nibble length in LSBs */ - *offsets++=offset; - *lengths++=length; - - offset+=length; - ++i; - } - } else { - length=0; /* prevent double-nibble detection in the next iteration */ - } - } - - /* now, s is at the first group string */ - return s; -} - -static uint16_t -expandGroupName(UCharNames *names, const uint16_t *group, - uint16_t lineNumber, UCharNameChoice nameChoice, - char *buffer, uint16_t bufferLength) { - uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; - const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); - s=expandGroupLengths(s, offsets, lengths); - return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice, - buffer, bufferLength); -} - -static uint16_t -getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, - char *buffer, uint16_t bufferLength) { - const uint16_t *group=getGroup(names, code); - if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) { - return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice, - buffer, bufferLength); - } else { - /* group not found */ - /* zero-terminate */ - if(bufferLength>0) { - *buffer=0; - } - return 0; - } -} - -/* - * enumGroupNames() enumerates all the names in a 32-group - * and either calls the enumerator function or finds a given input name. - */ -static UBool -enumGroupNames(UCharNames *names, const uint16_t *group, - UChar32 start, UChar32 end, - UEnumCharNamesFn *fn, void *context, - UCharNameChoice nameChoice) { - uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; - const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); - - s=expandGroupLengths(s, offsets, lengths); - if(fn!=DO_FIND_NAME) { - char buffer[200]; - uint16_t length; - - while(start<=end) { - length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer)); - if (!length && nameChoice == U_EXTENDED_CHAR_NAME) { - buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; - } - /* here, we assume that the buffer is large enough */ - if(length>0) { - if(!fn(context, start, nameChoice, buffer, length)) { - return FALSE; - } - } - ++start; - } - } else { - const char *otherName=((FindName *)context)->otherName; - while(start<=end) { - if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) { - ((FindName *)context)->code=start; - return FALSE; - } - ++start; - } - } - return TRUE; -} - -/* - * enumExtNames enumerate extended names. - * It only needs to do it if it is called with a real function and not - * with the dummy DO_FIND_NAME, because u_charFromName() does a check - * for extended names by itself. - */ -static UBool -enumExtNames(UChar32 start, UChar32 end, - UEnumCharNamesFn *fn, void *context) -{ - if(fn!=DO_FIND_NAME) { - char buffer[200]; - uint16_t length; - - while(start<=end) { - buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; - /* here, we assume that the buffer is large enough */ - if(length>0) { - if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) { - return FALSE; - } - } - ++start; - } - } - - return TRUE; -} - -static UBool -enumNames(UCharNames *names, - UChar32 start, UChar32 limit, - UEnumCharNamesFn *fn, void *context, - UCharNameChoice nameChoice) { - uint16_t startGroupMSB, endGroupMSB, groupCount; - const uint16_t *group, *groupLimit; - - startGroupMSB=(uint16_t)(start>>GROUP_SHIFT); - endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT); - - /* find the group that contains start, or the highest before it */ - group=getGroup(names, start); - - if(startGroupMSBlimit) { - extLimit=limit; - } - if(!enumExtNames(start, extLimit-1, fn, context)) { - return FALSE; - } - start=extLimit; - } - - if(startGroupMSB==endGroupMSB) { - if(startGroupMSB==group[GROUP_MSB]) { - /* if start and limit-1 are in the same group, then enumerate only in that one */ - return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice); - } - } else { - const uint16_t *groups=GET_GROUPS(names); - groupCount=*groups++; - groupLimit=groups+groupCount*GROUP_LENGTH; - - if(startGroupMSB==group[GROUP_MSB]) { - /* enumerate characters in the partial start group */ - if((start&GROUP_MASK)!=0) { - if(!enumGroupNames(names, group, - start, ((UChar32)startGroupMSB<group[GROUP_MSB]) { - /* make sure that we start enumerating with the first group after start */ - const uint16_t *nextGroup=NEXT_GROUP(group); - if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) { - UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; - if (end > limit) { - end = limit; - } - if (!enumExtNames(start, end - 1, fn, context)) { - return FALSE; - } - } - group=nextGroup; - } - - /* enumerate entire groups between the start- and end-groups */ - while(group group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) { - UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; - if (end > limit) { - end = limit; - } - if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) { - return FALSE; - } - } - group=nextGroup; - } - - /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */ - if(group start) { - start = next; - } - } else { - return TRUE; - } - } - - /* we have not found a group, which means everything is made of - extended names. */ - if (nameChoice == U_EXTENDED_CHAR_NAME) { - if (limit > UCHAR_MAX_VALUE + 1) { - limit = UCHAR_MAX_VALUE + 1; - } - return enumExtNames(start, limit - 1, fn, context); - } - - return TRUE; -} - -static uint16_t -writeFactorSuffix(const uint16_t *factors, uint16_t count, - const char *s, /* suffix elements */ - uint32_t code, - uint16_t indexes[8], /* output fields from here */ - const char *elementBases[8], const char *elements[8], - char *buffer, uint16_t bufferLength) { - uint16_t i, factor, bufferPos=0; - char c; - - /* write elements according to the factors */ - - /* - * the factorized elements are determined by modulo arithmetic - * with the factors of this algorithm - * - * note that for fewer operations, count is decremented here - */ - --count; - for(i=count; i>0; --i) { - factor=factors[i]; - indexes[i]=(uint16_t)(code%factor); - code/=factor; - } - /* - * we don't need to calculate the last modulus because start<=code<=end - * guarantees here that code<=factors[0] - */ - indexes[0]=(uint16_t)code; - - /* write each element */ - for(;;) { - if(elementBases!=NULL) { - *elementBases++=s; - } - - /* skip indexes[i] strings */ - factor=indexes[i]; - while(factor>0) { - while(*s++!=0) {} - --factor; - } - if(elements!=NULL) { - *elements++=s; - } - - /* write element */ - while((c=*s++)!=0) { - WRITE_CHAR(buffer, bufferLength, bufferPos, c); - } - - /* we do not need to perform the rest of this loop for i==count - break here */ - if(i>=count) { - break; - } - - /* skip the rest of the strings for this factors[i] */ - factor=(uint16_t)(factors[i]-indexes[i]-1); - while(factor>0) { - while(*s++!=0) {} - --factor; - } - - ++i; - } - - /* zero-terminate */ - if(bufferLength>0) { - *buffer=0; - } - - return bufferPos; -} - -/* - * Important: - * Parts of findAlgName() are almost the same as some of getAlgName(). - * Fixes must be applied to both. - */ -static uint16_t -getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, - char *buffer, uint16_t bufferLength) { - uint16_t bufferPos=0; - - /* Only the normative character name can be algorithmic. */ - if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { - /* zero-terminate */ - if(bufferLength>0) { - *buffer=0; - } - return 0; - } - - switch(range->type) { - case 0: { - /* name = prefix hex-digits */ - const char *s=(const char *)(range+1); - char c; - - uint16_t i, count; - - /* copy prefix */ - while((c=*s++)!=0) { - WRITE_CHAR(buffer, bufferLength, bufferPos, c); - } - - /* write hexadecimal code point value */ - count=range->variant; - - /* zero-terminate */ - if(count0;) { - if(--i>=4; - } - - bufferPos+=count; - break; - } - case 1: { - /* name = prefix factorized-elements */ - uint16_t indexes[8]; - const uint16_t *factors=(const uint16_t *)(range+1); - uint16_t count=range->variant; - const char *s=(const char *)(factors+count); - char c; - - /* copy prefix */ - while((c=*s++)!=0) { - WRITE_CHAR(buffer, bufferLength, bufferPos, c); - } - - bufferPos+=writeFactorSuffix(factors, count, - s, code-range->start, indexes, NULL, NULL, buffer, bufferLength); - break; - } - default: - /* undefined type */ - /* zero-terminate */ - if(bufferLength>0) { - *buffer=0; - } - break; - } - - return bufferPos; -} - -/* - * Important: enumAlgNames() and findAlgName() are almost the same. - * Any fix must be applied to both. - */ -static UBool -enumAlgNames(AlgorithmicRange *range, - UChar32 start, UChar32 limit, - UEnumCharNamesFn *fn, void *context, - UCharNameChoice nameChoice) { - char buffer[200]; - uint16_t length; - - if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { - return TRUE; - } - - switch(range->type) { - case 0: { - char *s, *end; - char c; - - /* get the full name of the start character */ - length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer)); - if(length<=0) { - return TRUE; - } - - /* call the enumerator function with this first character */ - if(!fn(context, start, nameChoice, buffer, length)) { - return FALSE; - } - - /* go to the end of the name; all these names have the same length */ - end=buffer; - while(*end!=0) { - ++end; - } - - /* enumerate the rest of the names */ - while(++startvariant; - const char *s=(const char *)(factors+count); - char *suffix, *t; - uint16_t prefixLength, i, idx; - - char c; - - /* name = prefix factorized-elements */ - - /* copy prefix */ - suffix=buffer; - prefixLength=0; - while((c=*s++)!=0) { - *suffix++=c; - ++prefixLength; - } - - /* append the suffix of the start character */ - length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count, - s, (uint32_t)start-range->start, - indexes, elementBases, elements, - suffix, (uint16_t)(sizeof(buffer)-prefixLength))); - - /* call the enumerator function with this first character */ - if(!fn(context, start, nameChoice, buffer, length)) { - return FALSE; - } - - /* enumerate the rest of the names */ - while(++starttype) { - case 0: { - /* name = prefix hex-digits */ - const char *s=(const char *)(range+1); - char c; - - uint16_t i, count; - - /* compare prefix */ - while((c=*s++)!=0) { - if((char)c!=*otherName++) { - return 0xffff; - } - } - - /* read hexadecimal code point value */ - count=range->variant; - code=0; - for(i=0; istart<=(uint32_t)code && (uint32_t)code<=range->end) { - return code; - } - break; - } - case 1: { - char buffer[64]; - uint16_t indexes[8]; - const char *elementBases[8], *elements[8]; - const uint16_t *factors=(const uint16_t *)(range+1); - uint16_t count=range->variant; - const char *s=(const char *)(factors+count), *t; - UChar32 start, limit; - uint16_t i, idx; - - char c; - - /* name = prefix factorized-elements */ - - /* compare prefix */ - while((c=*s++)!=0) { - if((char)c!=*otherName++) { - return 0xffff; - } - } - - start=(UChar32)range->start; - limit=(UChar32)(range->end+1); - - /* initialize the suffix elements for enumeration; indexes should all be set to 0 */ - writeFactorSuffix(factors, count, s, 0, - indexes, elementBases, elements, buffer, sizeof(buffer)); - - /* compare the first suffix */ - if(0==uprv_strcmp(otherName, buffer)) { - return start; - } - - /* enumerate and compare the rest of the suffixes */ - while(++start>5]|=((uint32_t)1<<((uint8_t)c&0x1f))) -#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0) - -static int32_t -calcStringSetLength(uint32_t set[8], const char *s) { - int32_t length=0; - char c; - - while((c=*s++)!=0) { - SET_ADD(set, c); - ++length; - } - return length; -} - -static int32_t -calcAlgNameSetsLengths(int32_t maxNameLength) { - AlgorithmicRange *range; - uint32_t *p; - uint32_t rangeCount; - int32_t length; - - /* enumerate algorithmic ranges */ - p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); - rangeCount=*p; - range=(AlgorithmicRange *)(p+1); - while(rangeCount>0) { - switch(range->type) { - case 0: - /* name = prefix + (range->variant times) hex-digits */ - /* prefix */ - length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant; - if(length>maxNameLength) { - maxNameLength=length; - } - break; - case 1: { - /* name = prefix factorized-elements */ - const uint16_t *factors=(const uint16_t *)(range+1); - const char *s; - int32_t i, count=range->variant, factor, factorLength, maxFactorLength; - - /* prefix length */ - s=(const char *)(factors+count); - length=calcStringSetLength(gNameSet, s); - s+=length+1; /* start of factor suffixes */ - - /* get the set and maximum factor suffix length for each factor */ - for(i=0; i0; --factor) { - factorLength=calcStringSetLength(gNameSet, s); - s+=factorLength+1; - if(factorLength>maxFactorLength) { - maxFactorLength=factorLength; - } - } - length+=maxFactorLength; - } - - if(length>maxNameLength) { - maxNameLength=length; - } - break; - } - default: - /* unknown type */ - break; - } - - range=(AlgorithmicRange *)((uint8_t *)range+range->size); - --rangeCount; - } - return maxNameLength; -} - -static int32_t -calcExtNameSetsLengths(int32_t maxNameLength) { - int32_t i, length; - - for(i=0; i - * 1 for - - * 6 for most hex digits per code point - */ - length=9+calcStringSetLength(gNameSet, charCatNames[i]); - if(length>maxNameLength) { - maxNameLength=length; - } - } - return maxNameLength; -} - -static int32_t -calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths, - uint32_t set[8], - const uint8_t **pLine, const uint8_t *lineLimit) { - const uint8_t *line=*pLine; - int32_t length=0, tokenLength; - uint16_t c, token; - - while(line!=lineLimit && (c=*line++)!=(uint8_t)';') { - if(c>=tokenCount) { - /* implicit letter */ - SET_ADD(set, c); - ++length; - } else { - token=tokens[c]; - if(token==(uint16_t)(-2)) { - /* this is a lead byte for a double-byte token */ - c=c<<8|*line++; - token=tokens[c]; - } - if(token==(uint16_t)(-1)) { - /* explicit letter */ - SET_ADD(set, c); - ++length; - } else { - /* count token word */ - if(tokenLengths!=NULL) { - /* use cached token length */ - tokenLength=tokenLengths[c]; - if(tokenLength==0) { - tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); - tokenLengths[c]=(int8_t)tokenLength; - } - } else { - tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); - } - length+=tokenLength; - } - } - } - - *pLine=line; - return length; -} - -static void -calcGroupNameSetsLengths(int32_t maxNameLength) { - uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; - - uint16_t *tokens=(uint16_t *)uCharNames+8; - uint16_t tokenCount=*tokens++; - uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset; - - int8_t *tokenLengths; - - const uint16_t *group; - const uint8_t *s, *line, *lineLimit; - - int32_t groupCount, lineNumber, length; - - tokenLengths=(int8_t *)uprv_malloc(tokenCount); - if(tokenLengths!=NULL) { - uprv_memset(tokenLengths, 0, tokenCount); - } - - group=GET_GROUPS(uCharNames); - groupCount=*group++; - - /* enumerate all groups */ - while(groupCount>0) { - s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group); - s=expandGroupLengths(s, offsets, lengths); - - /* enumerate all lines in each group */ - for(lineNumber=0; lineNumbermaxNameLength) { - maxNameLength=length; - } - if(line==lineLimit) { - continue; - } - - /* read Unicode 1.0 name */ - length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); - if(length>maxNameLength) { - maxNameLength=length; - } - if(line==lineLimit) { - continue; - } - - /* read ISO comment */ - /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/ - } - - group=NEXT_GROUP(group); - --groupCount; - } - - if(tokenLengths!=NULL) { - uprv_free(tokenLengths); - } - - /* set gMax... - name length last for threading */ - gMaxNameLength=maxNameLength; -} - -static UBool -calcNameSetsLengths(UErrorCode *pErrorCode) { - static const char extChars[]="0123456789ABCDEF<>-"; - int32_t i, maxNameLength; - - if(gMaxNameLength!=0) { - return TRUE; - } - - if(!isDataLoaded(pErrorCode)) { - return FALSE; - } - - /* set hex digits, used in various names, and <>-, used in extended names */ - for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) { - SET_ADD(gNameSet, extChars[i]); - } - - /* set sets and lengths from algorithmic names */ - maxNameLength=calcAlgNameSetsLengths(0); - - /* set sets and lengths from extended names */ - maxNameLength=calcExtNameSetsLengths(maxNameLength); - - /* set sets and lengths from group names, set global maximum values */ - calcGroupNameSetsLengths(maxNameLength); - - return TRUE; -} - -U_NAMESPACE_END - -/* public API --------------------------------------------------------------- */ - -U_NAMESPACE_USE - -U_CAPI int32_t U_EXPORT2 -u_charName(UChar32 code, UCharNameChoice nameChoice, - char *buffer, int32_t bufferLength, - UErrorCode *pErrorCode) { - AlgorithmicRange *algRange; - uint32_t *p; - uint32_t i; - int32_t length; - - /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || - bufferLength<0 || (bufferLength>0 && buffer==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { - return u_terminateChars(buffer, bufferLength, 0, pErrorCode); - } - - length=0; - - /* try algorithmic names first */ - p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); - i=*p; - algRange=(AlgorithmicRange *)(p+1); - while(i>0) { - if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) { - length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); - break; - } - algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); - --i; - } - - if(i==0) { - if (nameChoice == U_EXTENDED_CHAR_NAME) { - length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength); - if (!length) { - /* extended character name */ - length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength); - } - } else { - /* normal character name */ - length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); - } - } - - return u_terminateChars(buffer, bufferLength, length, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -u_getISOComment(UChar32 /*c*/, - char *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - return u_terminateChars(dest, destCapacity, 0, pErrorCode); -} - -U_CAPI UChar32 U_EXPORT2 -u_charFromName(UCharNameChoice nameChoice, - const char *name, - UErrorCode *pErrorCode) { - char upper[120], lower[120]; - FindName findName; - AlgorithmicRange *algRange; - uint32_t *p; - uint32_t i; - UChar32 cp = 0; - char c0; - UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */ - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return error; - } - - if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return error; - } - - if(!isDataLoaded(pErrorCode)) { - return error; - } - - /* construct the uppercase and lowercase of the name first */ - for(i=0; i" where HHHH is a hex code point. - if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') { - while (i >= 3 && lower[--i] != '-') {} - - if (i >= 2 && lower[i] == '-') { - uint32_t cIdx; - - lower[i] = 0; - - for (++i; lower[i] != '>'; ++i) { - if (lower[i] >= '0' && lower[i] <= '9') { - cp = (cp << 4) + lower[i] - '0'; - } else if (lower[i] >= 'a' && lower[i] <= 'f') { - cp = (cp << 4) + lower[i] - 'a' + 10; - } else { - *pErrorCode = U_ILLEGAL_CHAR_FOUND; - return error; - } - } - - /* Now validate the category name. - We could use a binary search, or a trie, if - we really wanted to. */ - - for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) { - - if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { - if (getCharCat(cp) == cIdx) { - return cp; - } - break; - } - } - } - } - } - - *pErrorCode = U_ILLEGAL_CHAR_FOUND; - return error; - } - - /* try algorithmic names now */ - p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); - i=*p; - algRange=(AlgorithmicRange *)(p+1); - while(i>0) { - if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) { - return cp; - } - algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); - --i; - } - - /* normal character name */ - findName.otherName=upper; - findName.code=error; - enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice); - if (findName.code == error) { - *pErrorCode = U_ILLEGAL_CHAR_FOUND; - } - return findName.code; -} - -U_CAPI void U_EXPORT2 -u_enumCharNames(UChar32 start, UChar32 limit, - UEnumCharNamesFn *fn, - void *context, - UCharNameChoice nameChoice, - UErrorCode *pErrorCode) { - AlgorithmicRange *algRange; - uint32_t *p; - uint32_t i; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - - if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if((uint32_t) limit > UCHAR_MAX_VALUE + 1) { - limit = UCHAR_MAX_VALUE + 1; - } - if((uint32_t)start>=(uint32_t)limit) { - return; - } - - if(!isDataLoaded(pErrorCode)) { - return; - } - - /* interleave the data-driven ones with the algorithmic ones */ - /* iterate over all algorithmic ranges; assume that they are in ascending order */ - p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); - i=*p; - algRange=(AlgorithmicRange *)(p+1); - while(i>0) { - /* enumerate the character names before the current algorithmic range */ - /* here: startstart) { - if((uint32_t)limit<=algRange->start) { - enumNames(uCharNames, start, limit, fn, context, nameChoice); - return; - } - if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) { - return; - } - start=(UChar32)algRange->start; - } - /* enumerate the character names in the current algorithmic range */ - /* here: algRange->start<=startend) { - if((uint32_t)limit<=(algRange->end+1)) { - enumAlgNames(algRange, start, limit, fn, context, nameChoice); - return; - } - if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) { - return; - } - start=(UChar32)algRange->end+1; - } - /* continue to the next algorithmic range (here: startsize); - --i; - } - /* enumerate the character names after the last algorithmic range */ - enumNames(uCharNames, start, limit, fn, context, nameChoice); -} - -U_CAPI int32_t U_EXPORT2 -uprv_getMaxCharNameLength() { - UErrorCode errorCode=U_ZERO_ERROR; - if(calcNameSetsLengths(&errorCode)) { - return gMaxNameLength; - } else { - return 0; - } -} - -/** - * Converts the char set cset into a Unicode set uset. - * @param cset Set of 256 bit flags corresponding to a set of chars. - * @param uset USet to receive characters. Existing contents are deleted. - */ -static void -charSetToUSet(uint32_t cset[8], const USetAdder *sa) { - UChar us[256]; - char cs[256]; - - int32_t i, length; - UErrorCode errorCode; - - errorCode=U_ZERO_ERROR; - - if(!calcNameSetsLengths(&errorCode)) { - return; - } - - /* build a char string with all chars that are used in character names */ - length=0; - for(i=0; i<256; ++i) { - if(SET_CONTAINS(cset, i)) { - cs[length++]=(char)i; - } - } - - /* convert the char string to a UChar string */ - u_charsToUChars(cs, us, length); - - /* add each UChar to the USet */ - for(i=0; iadd(sa->set, us[i]); - } - } -} - -/** - * Fills set with characters that are used in Unicode character names. - * @param set USet to receive characters. - */ -U_CAPI void U_EXPORT2 -uprv_getCharNameCharacters(const USetAdder *sa) { - charSetToUSet(gNameSet, sa); -} - -/* data swapping ------------------------------------------------------------ */ - -/* - * The token table contains non-negative entries for token bytes, - * and -1 for bytes that represent themselves in the data file's charset. - * -2 entries are used for lead bytes. - * - * Direct bytes (-1 entries) must be translated from the input charset family - * to the output charset family. - * makeTokenMap() writes a permutation mapping for this. - * Use it once for single-/lead-byte tokens and once more for all trail byte - * tokens. (';' is an unused trail byte marked with -1.) - */ -static void -makeTokenMap(const UDataSwapper *ds, - int16_t tokens[], uint16_t tokenCount, - uint8_t map[256], - UErrorCode *pErrorCode) { - UBool usedOutChar[256]; - uint16_t i, j; - uint8_t c1, c2; - - if(U_FAILURE(*pErrorCode)) { - return; - } - - if(ds->inCharset==ds->outCharset) { - /* Same charset family: identity permutation */ - for(i=0; i<256; ++i) { - map[i]=(uint8_t)i; - } - } else { - uprv_memset(map, 0, 256); - uprv_memset(usedOutChar, 0, 256); - - if(tokenCount>256) { - tokenCount=256; - } - - /* set the direct bytes (byte 0 always maps to itself) */ - for(i=1; iswapInvChars(ds, &c1, 1, &c2, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n", - i, ds->inCharset); - return; - } - - /* enter the converted character into the map and mark it used */ - map[c1]=c2; - usedOutChar[c2]=TRUE; - } - } - - /* set the mappings for the rest of the permutation */ - for(i=j=1; idataFormat[0]==0x75 && /* dataFormat="unam" */ - pInfo->dataFormat[1]==0x6e && - pInfo->dataFormat[2]==0x61 && - pInfo->dataFormat[3]==0x6d && - pInfo->formatVersion[0]==1 - )) { - udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - if(length<0) { - algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]); - } else { - length-=headerSize; - if( length<20 || - (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3])) - ) { - udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - if(length<0) { - /* preflighting: iterate through algorithmic ranges */ - offset=algNamesOffset; - count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); - offset+=4; - - for(i=0; ireadUInt16(inRange->size); - } - } else { - /* swap data */ - const uint16_t *p; - uint16_t *q, *temp; - - int16_t tokens[512]; - uint16_t tokenCount; - - uint8_t map[256], trailMap[256]; - - /* copy the data for inaccessible bytes */ - if(inBytes!=outBytes) { - uprv_memcpy(outBytes, inBytes, length); - } - - /* the initial 4 offsets first */ - tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]); - groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]); - groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]); - ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode); - - /* - * now the tokens table - * it needs to be permutated along with the compressed name strings - */ - p=(const uint16_t *)(inBytes+16); - q=(uint16_t *)(outBytes+16); - - /* read and swap the tokenCount */ - tokenCount=ds->readUInt16(*p); - ds->swapArray16(ds, p, 2, q, pErrorCode); - ++p; - ++q; - - /* read the first 512 tokens and make the token maps */ - if(tokenCount<=512) { - count=tokenCount; - } else { - count=512; - } - for(i=0; i256 ? tokenCount-256 : 0), trailMap, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* - * swap and permutate the tokens - * go through a temporary array to support in-place swapping - */ - temp=(uint16_t *)uprv_malloc(tokenCount*2); - if(temp==NULL) { - udata_printError(ds, "out of memory swapping %u unames.icu tokens\n", - tokenCount); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - /* swap and permutate single-/lead-byte tokens */ - for(i=0; iswapArray16(ds, p+i, 2, temp+map[i], pErrorCode); - } - - /* swap and permutate trail-byte tokens */ - for(; iswapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode); - } - - /* copy the result into the output and free the temporary array */ - uprv_memcpy(q, temp, tokenCount*2); - uprv_free(temp); - - /* - * swap the token strings but not a possible padding byte after - * the terminating NUL of the last string - */ - udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset), - outBytes+tokenStringOffset, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "uchar_swapNames(token strings) failed\n"); - return 0; - } - - /* swap the group table */ - count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset))); - ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2), - outBytes+groupsOffset, pErrorCode); - - /* - * swap the group strings - * swap the string bytes but not the nibble-encoded string lengths - */ - if(ds->inCharset!=ds->outCharset) { - uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1]; - - const uint8_t *inStrings, *nextInStrings; - uint8_t *outStrings; - - uint8_t c; - - inStrings=inBytes+groupStringOffset; - outStrings=outBytes+groupStringOffset; - - stringsCount=algNamesOffset-groupStringOffset; - - /* iterate through string groups until only a few padding bytes are left */ - while(stringsCount>32) { - nextInStrings=expandGroupLengths(inStrings, offsets, lengths); - - /* move past the length bytes */ - stringsCount-=(uint32_t)(nextInStrings-inStrings); - outStrings+=nextInStrings-inStrings; - inStrings=nextInStrings; - - count=offsets[31]+lengths[31]; /* total number of string bytes in this group */ - stringsCount-=count; - - /* swap the string bytes using map[] and trailMap[] */ - while(count>0) { - c=*inStrings++; - *outStrings++=map[c]; - if(tokens[c]!=-2) { - --count; - } else { - /* token lead byte: swap the trail byte, too */ - *outStrings++=trailMap[*inStrings++]; - count-=2; - } - } - } - } - - /* swap the algorithmic ranges */ - offset=algNamesOffset; - count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); - ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode); - offset+=4; - - for(i=0; i(uint32_t)length) { - udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n", - length, i); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - inRange=(const AlgorithmicRange *)(inBytes+offset); - outRange=(AlgorithmicRange *)(outBytes+offset); - offset+=ds->readUInt16(inRange->size); - - ds->swapArray32(ds, inRange, 8, outRange, pErrorCode); - ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode); - switch(inRange->type) { - case 0: - /* swap prefix string */ - ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)), - outRange+1, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n", - i); - return 0; - } - break; - case 1: - { - /* swap factors and the prefix and factor strings */ - uint32_t factorsCount; - - factorsCount=inRange->variant; - p=(const uint16_t *)(inRange+1); - q=(uint16_t *)(outRange+1); - ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode); - - /* swap the strings, up to the last terminating NUL */ - p+=factorsCount; - q+=factorsCount; - stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p); - while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) { - --stringsCount; - } - ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode); - } - break; - default: - udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n", - inRange->type, i); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - } - } - - return headerSize+(int32_t)offset; -} - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/unicode/appendable.h b/deps/node/deps/icu-small/source/common/unicode/appendable.h deleted file mode 100644 index 8512c2f3..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/appendable.h +++ /dev/null @@ -1,234 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: appendable.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010dec07 -* created by: Markus W. Scherer -*/ - -#ifndef __APPENDABLE_H__ -#define __APPENDABLE_H__ - -/** - * \file - * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts). - */ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" - -U_NAMESPACE_BEGIN - -class UnicodeString; - -/** - * Base class for objects to which Unicode characters and strings can be appended. - * Combines elements of Java Appendable and ICU4C ByteSink. - * - * This class can be used in APIs where it does not matter whether the actual destination is - * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object - * that receives and processes characters and/or strings. - * - * Implementation classes must implement at least appendCodeUnit(char16_t). - * The base class provides default implementations for the other methods. - * - * The methods do not take UErrorCode parameters. - * If an error occurs (e.g., out-of-memory), - * in addition to returning FALSE from failing operations, - * the implementation must prevent unexpected behavior (e.g., crashes) - * from further calls and should make the error condition available separately - * (e.g., store a UErrorCode, make/keep a UnicodeString bogus). - * @stable ICU 4.8 - */ -class U_COMMON_API Appendable : public UObject { -public: - /** - * Destructor. - * @stable ICU 4.8 - */ - ~Appendable(); - - /** - * Appends a 16-bit code unit. - * @param c code unit - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool appendCodeUnit(char16_t c) = 0; - - /** - * Appends a code point. - * The default implementation calls appendCodeUnit(char16_t) once or twice. - * @param c code point 0..0x10ffff - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool appendCodePoint(UChar32 c); - - /** - * Appends a string. - * The default implementation calls appendCodeUnit(char16_t) for each code unit. - * @param s string, must not be NULL if length!=0 - * @param length string length, or -1 if NUL-terminated - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool appendString(const char16_t *s, int32_t length); - - /** - * Tells the object that the caller is going to append roughly - * appendCapacity char16_ts. A subclass might use this to pre-allocate - * a larger buffer if necessary. - * The default implementation does nothing. (It always returns TRUE.) - * @param appendCapacity estimated number of char16_ts that will be appended - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool reserveAppendCapacity(int32_t appendCapacity); - - /** - * Returns a writable buffer for appending and writes the buffer's capacity to - * *resultCapacity. Guarantees *resultCapacity>=minCapacity. - * May return a pointer to the caller-owned scratch buffer which must have - * scratchCapacity>=minCapacity. - * The returned buffer is only valid until the next operation - * on this Appendable. - * - * After writing at most *resultCapacity char16_ts, call appendString() with the - * pointer returned from this function and the number of char16_ts written. - * Many appendString() implementations will avoid copying char16_ts if this function - * returned an internal buffer. - * - * Partial usage example: - * \code - * int32_t capacity; - * char16_t* buffer = app.getAppendBuffer(..., &capacity); - * ... Write n char16_ts into buffer, with n <= capacity. - * app.appendString(buffer, n); - * \endcode - * In many implementations, that call to append will avoid copying char16_ts. - * - * If the Appendable allocates or reallocates an internal buffer, it should use - * the desiredCapacityHint if appropriate. - * If a caller cannot provide a reasonable guess at the desired capacity, - * it should pass desiredCapacityHint=0. - * - * If a non-scratch buffer is returned, the caller may only pass - * a prefix to it to appendString(). - * That is, it is not correct to pass an interior pointer to appendString(). - * - * The default implementation always returns the scratch buffer. - * - * @param minCapacity required minimum capacity of the returned buffer; - * must be non-negative - * @param desiredCapacityHint desired capacity of the returned buffer; - * must be non-negative - * @param scratch default caller-owned buffer - * @param scratchCapacity capacity of the scratch buffer - * @param resultCapacity pointer to an integer which will be set to the - * capacity of the returned buffer - * @return a buffer with *resultCapacity>=minCapacity - * @stable ICU 4.8 - */ - virtual char16_t *getAppendBuffer(int32_t minCapacity, - int32_t desiredCapacityHint, - char16_t *scratch, int32_t scratchCapacity, - int32_t *resultCapacity); -}; - -/** - * An Appendable implementation which writes to a UnicodeString. - * - * This class is not intended for public subclassing. - * @stable ICU 4.8 - */ -class U_COMMON_API UnicodeStringAppendable : public Appendable { -public: - /** - * Aliases the UnicodeString (keeps its reference) for writing. - * @param s The UnicodeString to which this Appendable will write. - * @stable ICU 4.8 - */ - explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {} - - /** - * Destructor. - * @stable ICU 4.8 - */ - ~UnicodeStringAppendable(); - - /** - * Appends a 16-bit code unit to the string. - * @param c code unit - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool appendCodeUnit(char16_t c); - - /** - * Appends a code point to the string. - * @param c code point 0..0x10ffff - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool appendCodePoint(UChar32 c); - - /** - * Appends a string to the UnicodeString. - * @param s string, must not be NULL if length!=0 - * @param length string length, or -1 if NUL-terminated - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool appendString(const char16_t *s, int32_t length); - - /** - * Tells the UnicodeString that the caller is going to append roughly - * appendCapacity char16_ts. - * @param appendCapacity estimated number of char16_ts that will be appended - * @return TRUE if the operation succeeded - * @stable ICU 4.8 - */ - virtual UBool reserveAppendCapacity(int32_t appendCapacity); - - /** - * Returns a writable buffer for appending and writes the buffer's capacity to - * *resultCapacity. Guarantees *resultCapacity>=minCapacity. - * May return a pointer to the caller-owned scratch buffer which must have - * scratchCapacity>=minCapacity. - * The returned buffer is only valid until the next write operation - * on the UnicodeString. - * - * For details see Appendable::getAppendBuffer(). - * - * @param minCapacity required minimum capacity of the returned buffer; - * must be non-negative - * @param desiredCapacityHint desired capacity of the returned buffer; - * must be non-negative - * @param scratch default caller-owned buffer - * @param scratchCapacity capacity of the scratch buffer - * @param resultCapacity pointer to an integer which will be set to the - * capacity of the returned buffer - * @return a buffer with *resultCapacity>=minCapacity - * @stable ICU 4.8 - */ - virtual char16_t *getAppendBuffer(int32_t minCapacity, - int32_t desiredCapacityHint, - char16_t *scratch, int32_t scratchCapacity, - int32_t *resultCapacity); - -private: - UnicodeString &str; -}; - -U_NAMESPACE_END - -#endif // __APPENDABLE_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/brkiter.h b/deps/node/deps/icu-small/source/common/unicode/brkiter.h deleted file mode 100644 index 5faeedfa..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/brkiter.h +++ /dev/null @@ -1,660 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -* -* File brkiter.h -* -* Modification History: -* -* Date Name Description -* 02/18/97 aliu Added typedef for TextCount. Made DONE const. -* 05/07/97 aliu Fixed DLL declaration. -* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK -* 08/11/98 helena Sync-up JDK1.2. -* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. -******************************************************************************** -*/ - -#ifndef BRKITER_H -#define BRKITER_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Break Iterator. - */ - -#if UCONFIG_NO_BREAK_ITERATION - -U_NAMESPACE_BEGIN - -/* - * Allow the declaration of APIs with pointers to BreakIterator - * even when break iteration is removed from the build. - */ -class BreakIterator; - -U_NAMESPACE_END - -#else - -#include "unicode/uobject.h" -#include "unicode/unistr.h" -#include "unicode/chariter.h" -#include "unicode/locid.h" -#include "unicode/ubrk.h" -#include "unicode/strenum.h" -#include "unicode/utext.h" -#include "unicode/umisc.h" - -U_NAMESPACE_BEGIN - -/** - * The BreakIterator class implements methods for finding the location - * of boundaries in text. BreakIterator is an abstract base class. - * Instances of BreakIterator maintain a current position and scan over - * text returning the index of characters where boundaries occur. - *

- * Line boundary analysis determines where a text string can be broken - * when line-wrapping. The mechanism correctly handles punctuation and - * hyphenated words. - *

- * Sentence boundary analysis allows selection with correct - * interpretation of periods within numbers and abbreviations, and - * trailing punctuation marks such as quotation marks and parentheses. - *

- * Word boundary analysis is used by search and replace functions, as - * well as within text editing applications that allow the user to - * select words with a double click. Word selection provides correct - * interpretation of punctuation marks within and following - * words. Characters that are not part of a word, such as symbols or - * punctuation marks, have word-breaks on both sides. - *

- * Character boundary analysis allows users to interact with - * characters as they expect to, for example, when moving the cursor - * through a text string. Character boundary analysis provides correct - * navigation of through character strings, regardless of how the - * character is stored. For example, an accented character might be - * stored as a base character and a diacritical mark. What users - * consider to be a character can differ between languages. - *

- * The text boundary positions are found according to the rules - * described in Unicode Standard Annex #29, Text Boundaries, and - * Unicode Standard Annex #14, Line Breaking Properties. These - * are available at http://www.unicode.org/reports/tr14/ and - * http://www.unicode.org/reports/tr29/. - *

- * In addition to the C++ API defined in this header file, a - * plain C API with equivalent functionality is defined in the - * file ubrk.h - *

- * Code snippets illustrating the use of the Break Iterator APIs - * are available in the ICU User Guide, - * http://icu-project.org/userguide/boundaryAnalysis.html - * and in the sample program icu/source/samples/break/break.cpp - * - */ -class U_COMMON_API BreakIterator : public UObject { -public: - /** - * destructor - * @stable ICU 2.0 - */ - virtual ~BreakIterator(); - - /** - * Return true if another object is semantically equal to this - * one. The other object should be an instance of the same subclass of - * BreakIterator. Objects of different subclasses are considered - * unequal. - *

- * Return true if this BreakIterator is at the same position in the - * same text, and is the same class and type (word, line, etc.) of - * BreakIterator, as the argument. Text is considered the same if - * it contains the same characters, it need not be the same - * object, and styles are not considered. - * @stable ICU 2.0 - */ - virtual UBool operator==(const BreakIterator&) const = 0; - - /** - * Returns the complement of the result of operator== - * @param rhs The BreakIterator to be compared for inequality - * @return the complement of the result of operator== - * @stable ICU 2.0 - */ - UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } - - /** - * Return a polymorphic copy of this object. This is an abstract - * method which subclasses implement. - * @stable ICU 2.0 - */ - virtual BreakIterator* clone(void) const = 0; - - /** - * Return a polymorphic class ID for this object. Different subclasses - * will return distinct unequal values. - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const = 0; - - /** - * Return a CharacterIterator over the text being analyzed. - * @stable ICU 2.0 - */ - virtual CharacterIterator& getText(void) const = 0; - - - /** - * Get a UText for the text being analyzed. - * The returned UText is a shallow clone of the UText used internally - * by the break iterator implementation. It can safely be used to - * access the text without impacting any break iterator operations, - * but the underlying text itself must not be altered. - * - * @param fillIn A UText to be filled in. If NULL, a new UText will be - * allocated to hold the result. - * @param status receives any error codes. - * @return The current UText for this break iterator. If an input - * UText was provided, it will always be returned. - * @stable ICU 3.4 - */ - virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; - - /** - * Change the text over which this operates. The text boundary is - * reset to the start. - * - * The BreakIterator will retain a reference to the supplied string. - * The caller must not modify or delete the text while the BreakIterator - * retains the reference. - * - * @param text The UnicodeString used to change the text. - * @stable ICU 2.0 - */ - virtual void setText(const UnicodeString &text) = 0; - - /** - * Reset the break iterator to operate over the text represented by - * the UText. The iterator position is reset to the start. - * - * This function makes a shallow clone of the supplied UText. This means - * that the caller is free to immediately close or otherwise reuse the - * Utext that was passed as a parameter, but that the underlying text itself - * must not be altered while being referenced by the break iterator. - * - * All index positions returned by break iterator functions are - * native indices from the UText. For example, when breaking UTF-8 - * encoded text, the break positions returned by next(), previous(), etc. - * will be UTF-8 string indices, not UTF-16 positions. - * - * @param text The UText used to change the text. - * @param status receives any error codes. - * @stable ICU 3.4 - */ - virtual void setText(UText *text, UErrorCode &status) = 0; - - /** - * Change the text over which this operates. The text boundary is - * reset to the start. - * Note that setText(UText *) provides similar functionality to this function, - * and is more efficient. - * @param it The CharacterIterator used to change the text. - * @stable ICU 2.0 - */ - virtual void adoptText(CharacterIterator* it) = 0; - - enum { - /** - * DONE is returned by previous() and next() after all valid - * boundaries have been returned. - * @stable ICU 2.0 - */ - DONE = (int32_t)-1 - }; - - /** - * Sets the current iteration position to the beginning of the text, position zero. - * @return The offset of the beginning of the text, zero. - * @stable ICU 2.0 - */ - virtual int32_t first(void) = 0; - - /** - * Set the iterator position to the index immediately BEYOND the last character in the text being scanned. - * @return The index immediately BEYOND the last character in the text being scanned. - * @stable ICU 2.0 - */ - virtual int32_t last(void) = 0; - - /** - * Set the iterator position to the boundary preceding the current boundary. - * @return The character index of the previous text boundary or DONE if all - * boundaries have been returned. - * @stable ICU 2.0 - */ - virtual int32_t previous(void) = 0; - - /** - * Advance the iterator to the boundary following the current boundary. - * @return The character index of the next text boundary or DONE if all - * boundaries have been returned. - * @stable ICU 2.0 - */ - virtual int32_t next(void) = 0; - - /** - * Return character index of the current iterator position within the text. - * @return The boundary most recently returned. - * @stable ICU 2.0 - */ - virtual int32_t current(void) const = 0; - - /** - * Advance the iterator to the first boundary following the specified offset. - * The value returned is always greater than the offset or - * the value BreakIterator.DONE - * @param offset the offset to begin scanning. - * @return The first boundary after the specified offset. - * @stable ICU 2.0 - */ - virtual int32_t following(int32_t offset) = 0; - - /** - * Set the iterator position to the first boundary preceding the specified offset. - * The value returned is always smaller than the offset or - * the value BreakIterator.DONE - * @param offset the offset to begin scanning. - * @return The first boundary before the specified offset. - * @stable ICU 2.0 - */ - virtual int32_t preceding(int32_t offset) = 0; - - /** - * Return true if the specified position is a boundary position. - * As a side effect, the current position of the iterator is set - * to the first boundary position at or following the specified offset. - * @param offset the offset to check. - * @return True if "offset" is a boundary position. - * @stable ICU 2.0 - */ - virtual UBool isBoundary(int32_t offset) = 0; - - /** - * Set the iterator position to the nth boundary from the current boundary - * @param n the number of boundaries to move by. A value of 0 - * does nothing. Negative values move to previous boundaries - * and positive values move to later boundaries. - * @return The new iterator position, or - * DONE if there are fewer than |n| boundaries in the specified direction. - * @stable ICU 2.0 - */ - virtual int32_t next(int32_t n) = 0; - - /** - * For RuleBasedBreakIterators, return the status tag from the break rule - * that determined the boundary at the current iteration position. - *

- * For break iterator types that do not support a rule status, - * a default value of 0 is returned. - *

- * @return the status from the break rule that determined the boundary at - * the current iteration position. - * @see RuleBaseBreakIterator::getRuleStatus() - * @see UWordBreak - * @stable ICU 52 - */ - virtual int32_t getRuleStatus() const; - - /** - * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) - * that determined the boundary at the current iteration position. - *

- * For break iterator types that do not support rule status, - * no values are returned. - *

- * The returned status value(s) are stored into an array provided by the caller. - * The values are stored in sorted (ascending) order. - * If the capacity of the output array is insufficient to hold the data, - * the output will be truncated to the available length, and a - * U_BUFFER_OVERFLOW_ERROR will be signaled. - *

- * @see RuleBaseBreakIterator::getRuleStatusVec - * - * @param fillInVec an array to be filled in with the status values. - * @param capacity the length of the supplied vector. A length of zero causes - * the function to return the number of status values, in the - * normal way, without attempting to store any values. - * @param status receives error codes. - * @return The number of rule status values from rules that determined - * the boundary at the current iteration position. - * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value - * is the total number of status values that were available, - * not the reduced number that were actually returned. - * @see getRuleStatus - * @stable ICU 52 - */ - virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); - - /** - * Create BreakIterator for word-breaks using the given locale. - * Returns an instance of a BreakIterator implementing word breaks. - * WordBreak is useful for word selection (ex. double click) - * @param where the locale. - * @param status the error code - * @return A BreakIterator for word-breaks. The UErrorCode& status - * parameter is used to return status information to the user. - * To check whether the construction succeeded or not, you should check - * the value of U_SUCCESS(err). If you wish more detailed information, you - * can check for informational error results which still indicate success. - * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For - * example, 'de_CH' was requested, but nothing was found there, so 'de' was - * used. U_USING_DEFAULT_WARNING indicates that the default locale data was - * used; neither the requested locale nor any of its fall back locales - * could be found. - * The caller owns the returned object and is responsible for deleting it. - * @stable ICU 2.0 - */ - static BreakIterator* U_EXPORT2 - createWordInstance(const Locale& where, UErrorCode& status); - - /** - * Create BreakIterator for line-breaks using specified locale. - * Returns an instance of a BreakIterator implementing line breaks. Line - * breaks are logically possible line breaks, actual line breaks are - * usually determined based on display width. - * LineBreak is useful for word wrapping text. - * @param where the locale. - * @param status The error code. - * @return A BreakIterator for line-breaks. The UErrorCode& status - * parameter is used to return status information to the user. - * To check whether the construction succeeded or not, you should check - * the value of U_SUCCESS(err). If you wish more detailed information, you - * can check for informational error results which still indicate success. - * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For - * example, 'de_CH' was requested, but nothing was found there, so 'de' was - * used. U_USING_DEFAULT_WARNING indicates that the default locale data was - * used; neither the requested locale nor any of its fall back locales - * could be found. - * The caller owns the returned object and is responsible for deleting it. - * @stable ICU 2.0 - */ - static BreakIterator* U_EXPORT2 - createLineInstance(const Locale& where, UErrorCode& status); - - /** - * Create BreakIterator for character-breaks using specified locale - * Returns an instance of a BreakIterator implementing character breaks. - * Character breaks are boundaries of combining character sequences. - * @param where the locale. - * @param status The error code. - * @return A BreakIterator for character-breaks. The UErrorCode& status - * parameter is used to return status information to the user. - * To check whether the construction succeeded or not, you should check - * the value of U_SUCCESS(err). If you wish more detailed information, you - * can check for informational error results which still indicate success. - * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For - * example, 'de_CH' was requested, but nothing was found there, so 'de' was - * used. U_USING_DEFAULT_WARNING indicates that the default locale data was - * used; neither the requested locale nor any of its fall back locales - * could be found. - * The caller owns the returned object and is responsible for deleting it. - * @stable ICU 2.0 - */ - static BreakIterator* U_EXPORT2 - createCharacterInstance(const Locale& where, UErrorCode& status); - - /** - * Create BreakIterator for sentence-breaks using specified locale - * Returns an instance of a BreakIterator implementing sentence breaks. - * @param where the locale. - * @param status The error code. - * @return A BreakIterator for sentence-breaks. The UErrorCode& status - * parameter is used to return status information to the user. - * To check whether the construction succeeded or not, you should check - * the value of U_SUCCESS(err). If you wish more detailed information, you - * can check for informational error results which still indicate success. - * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For - * example, 'de_CH' was requested, but nothing was found there, so 'de' was - * used. U_USING_DEFAULT_WARNING indicates that the default locale data was - * used; neither the requested locale nor any of its fall back locales - * could be found. - * The caller owns the returned object and is responsible for deleting it. - * @stable ICU 2.0 - */ - static BreakIterator* U_EXPORT2 - createSentenceInstance(const Locale& where, UErrorCode& status); - - /** - * Create BreakIterator for title-casing breaks using the specified locale - * Returns an instance of a BreakIterator implementing title breaks. - * The iterator returned locates title boundaries as described for - * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, - * please use Word Boundary iterator.{@link #createWordInstance } - * - * @param where the locale. - * @param status The error code. - * @return A BreakIterator for title-breaks. The UErrorCode& status - * parameter is used to return status information to the user. - * To check whether the construction succeeded or not, you should check - * the value of U_SUCCESS(err). If you wish more detailed information, you - * can check for informational error results which still indicate success. - * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For - * example, 'de_CH' was requested, but nothing was found there, so 'de' was - * used. U_USING_DEFAULT_WARNING indicates that the default locale data was - * used; neither the requested locale nor any of its fall back locales - * could be found. - * The caller owns the returned object and is responsible for deleting it. - * @stable ICU 2.1 - */ - static BreakIterator* U_EXPORT2 - createTitleInstance(const Locale& where, UErrorCode& status); - - /** - * Get the set of Locales for which TextBoundaries are installed. - *

Note: this will not return locales added through the register - * call. To see the registered locales too, use the getAvailableLocales - * function that returns a StringEnumeration object

- * @param count the output parameter of number of elements in the locale list - * @return available locales - * @stable ICU 2.0 - */ - static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); - - /** - * Get name of the object for the desired Locale, in the desired language. - * @param objectLocale must be from getAvailableLocales. - * @param displayLocale specifies the desired locale for output. - * @param name the fill-in parameter of the return value - * Uses best match. - * @return user-displayable name - * @stable ICU 2.0 - */ - static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, - const Locale& displayLocale, - UnicodeString& name); - - /** - * Get name of the object for the desired Locale, in the language of the - * default locale. - * @param objectLocale must be from getMatchingLocales - * @param name the fill-in parameter of the return value - * @return user-displayable name - * @stable ICU 2.0 - */ - static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, - UnicodeString& name); - - /** - * Deprecated functionality. Use clone() instead. - * - * Thread safe client-buffer-based cloning operation - * Do NOT call delete on a safeclone, since 'new' is not used to create it. - * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. - * If buffer is not large enough, new memory will be allocated. - * @param BufferSize reference to size of allocated space. - * If BufferSize == 0, a sufficient size for use in cloning will - * be returned ('pre-flighting') - * If BufferSize is not enough for a stack-based safe clone, - * new memory will be allocated. - * @param status to indicate whether the operation went on smoothly or there were errors - * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were - * necessary. - * @return pointer to the new clone - * - * @deprecated ICU 52. Use clone() instead. - */ - virtual BreakIterator * createBufferClone(void *stackBuffer, - int32_t &BufferSize, - UErrorCode &status) = 0; - -#ifndef U_HIDE_DEPRECATED_API - - /** - * Determine whether the BreakIterator was created in user memory by - * createBufferClone(), and thus should not be deleted. Such objects - * must be closed by an explicit call to the destructor (not delete). - * @deprecated ICU 52. Always delete the BreakIterator. - */ - inline UBool isBufferClone(void); - -#endif /* U_HIDE_DEPRECATED_API */ - -#if !UCONFIG_NO_SERVICE - /** - * Register a new break iterator of the indicated kind, to use in the given locale. - * The break iterator will be adopted. Clones of the iterator will be returned - * if a request for a break iterator of the given kind matches or falls back to - * this locale. - * Because ICU may choose to cache BreakIterators internally, this must - * be called at application startup, prior to any calls to - * BreakIterator::createXXXInstance to avoid undefined behavior. - * @param toAdopt the BreakIterator instance to be adopted - * @param locale the Locale for which this instance is to be registered - * @param kind the type of iterator for which this instance is to be registered - * @param status the in/out status code, no special meanings are assigned - * @return a registry key that can be used to unregister this instance - * @stable ICU 2.4 - */ - static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt, - const Locale& locale, - UBreakIteratorType kind, - UErrorCode& status); - - /** - * Unregister a previously-registered BreakIterator using the key returned from the - * register call. Key becomes invalid after a successful call and should not be used again. - * The BreakIterator corresponding to the key will be deleted. - * Because ICU may choose to cache BreakIterators internally, this should - * be called during application shutdown, after all calls to - * BreakIterator::createXXXInstance to avoid undefined behavior. - * @param key the registry key returned by a previous call to registerInstance - * @param status the in/out status code, no special meanings are assigned - * @return TRUE if the iterator for the key was successfully unregistered - * @stable ICU 2.4 - */ - static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); - - /** - * Return a StringEnumeration over the locales available at the time of the call, - * including registered locales. - * @return a StringEnumeration over the locales available at the time of the call - * @stable ICU 2.4 - */ - static StringEnumeration* U_EXPORT2 getAvailableLocales(void); -#endif - - /** - * Returns the locale for this break iterator. Two flavors are available: valid and - * actual locale. - * @stable ICU 2.8 - */ - Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; - -#ifndef U_HIDE_INTERNAL_API - /** Get the locale for this break iterator object. You can choose between valid and actual locale. - * @param type type of the locale we're looking for (valid or actual) - * @param status error code for the operation - * @return the locale - * @internal - */ - const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; -#endif /* U_HIDE_INTERNAL_API */ - - /** - * Set the subject text string upon which the break iterator is operating - * without changing any other aspect of the matching state. - * The new and previous text strings must have the same content. - * - * This function is intended for use in environments where ICU is operating on - * strings that may move around in memory. It provides a mechanism for notifying - * ICU that the string has been relocated, and providing a new UText to access the - * string in its new position. - * - * Note that the break iterator implementation never copies the underlying text - * of a string being processed, but always operates directly on the original text - * provided by the user. Refreshing simply drops the references to the old text - * and replaces them with references to the new. - * - * Caution: this function is normally used only by very specialized, - * system-level code. One example use case is with garbage collection that moves - * the text in memory. - * - * @param input The new (moved) text string. - * @param status Receives errors detected by this function. - * @return *this - * - * @stable ICU 49 - */ - virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; - - private: - static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status); - static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); - static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); - - friend class ICUBreakIteratorFactory; - friend class ICUBreakIteratorService; - -protected: - // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API - // or else the compiler will create a public ones. - /** @internal */ - BreakIterator(); - /** @internal */ - BreakIterator (const BreakIterator &other); -#ifndef U_HIDE_INTERNAL_API - /** @internal */ - BreakIterator (const Locale& valid, const Locale &actual); - /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */ - BreakIterator &operator = (const BreakIterator &other); -#endif /* U_HIDE_INTERNAL_API */ - -private: - - /** @internal (private) */ - char actualLocale[ULOC_FULLNAME_CAPACITY]; - char validLocale[ULOC_FULLNAME_CAPACITY]; -}; - -#ifndef U_HIDE_DEPRECATED_API - -inline UBool BreakIterator::isBufferClone() -{ - return FALSE; -} - -#endif /* U_HIDE_DEPRECATED_API */ - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif // BRKITER_H -//eof diff --git a/deps/node/deps/icu-small/source/common/unicode/bytestream.h b/deps/node/deps/icu-small/source/common/unicode/bytestream.h deleted file mode 100644 index 61d1e8ac..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/bytestream.h +++ /dev/null @@ -1,270 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// Copyright (C) 2009-2012, International Business Machines -// Corporation and others. All Rights Reserved. -// -// Copyright 2007 Google Inc. All Rights Reserved. -// Author: sanjay@google.com (Sanjay Ghemawat) -// -// Abstract interface that consumes a sequence of bytes (ByteSink). -// -// Used so that we can write a single piece of code that can operate -// on a variety of output string types. -// -// Various implementations of this interface are provided: -// ByteSink: -// CheckedArrayByteSink Write to a flat array, with bounds checking -// StringByteSink Write to an STL string - -// This code is a contribution of Google code, and the style used here is -// a compromise between the original Google code and the ICU coding guidelines. -// For example, data types are ICU-ified (size_t,int->int32_t), -// and API comments doxygen-ified, but function names and behavior are -// as in the original, if possible. -// Assertion-style error handling, not available in ICU, was changed to -// parameter "pinning" similar to UnicodeString. -// -// In addition, this is only a partial port of the original Google code, -// limited to what was needed so far. The (nearly) complete original code -// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib -// (see ICU ticket 6765, r25517). - -#ifndef __BYTESTREAM_H__ -#define __BYTESTREAM_H__ - -/** - * \file - * \brief C++ API: Interface for writing bytes, and implementation classes. - */ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/std_string.h" - -U_NAMESPACE_BEGIN - -/** - * A ByteSink can be filled with bytes. - * @stable ICU 4.2 - */ -class U_COMMON_API ByteSink : public UMemory { -public: - /** - * Default constructor. - * @stable ICU 4.2 - */ - ByteSink() { } - /** - * Virtual destructor. - * @stable ICU 4.2 - */ - virtual ~ByteSink(); - - /** - * Append "bytes[0,n-1]" to this. - * @param bytes the pointer to the bytes - * @param n the number of bytes; must be non-negative - * @stable ICU 4.2 - */ - virtual void Append(const char* bytes, int32_t n) = 0; - - /** - * Returns a writable buffer for appending and writes the buffer's capacity to - * *result_capacity. Guarantees *result_capacity>=min_capacity. - * May return a pointer to the caller-owned scratch buffer which must have - * scratch_capacity>=min_capacity. - * The returned buffer is only valid until the next operation - * on this ByteSink. - * - * After writing at most *result_capacity bytes, call Append() with the - * pointer returned from this function and the number of bytes written. - * Many Append() implementations will avoid copying bytes if this function - * returned an internal buffer. - * - * Partial usage example: - * int32_t capacity; - * char* buffer = sink->GetAppendBuffer(..., &capacity); - * ... Write n bytes into buffer, with n <= capacity. - * sink->Append(buffer, n); - * In many implementations, that call to Append will avoid copying bytes. - * - * If the ByteSink allocates or reallocates an internal buffer, it should use - * the desired_capacity_hint if appropriate. - * If a caller cannot provide a reasonable guess at the desired capacity, - * it should pass desired_capacity_hint=0. - * - * If a non-scratch buffer is returned, the caller may only pass - * a prefix to it to Append(). - * That is, it is not correct to pass an interior pointer to Append(). - * - * The default implementation always returns the scratch buffer. - * - * @param min_capacity required minimum capacity of the returned buffer; - * must be non-negative - * @param desired_capacity_hint desired capacity of the returned buffer; - * must be non-negative - * @param scratch default caller-owned buffer - * @param scratch_capacity capacity of the scratch buffer - * @param result_capacity pointer to an integer which will be set to the - * capacity of the returned buffer - * @return a buffer with *result_capacity>=min_capacity - * @stable ICU 4.2 - */ - virtual char* GetAppendBuffer(int32_t min_capacity, - int32_t desired_capacity_hint, - char* scratch, int32_t scratch_capacity, - int32_t* result_capacity); - - /** - * Flush internal buffers. - * Some byte sinks use internal buffers or provide buffering - * and require calling Flush() at the end of the stream. - * The ByteSink should be ready for further Append() calls after Flush(). - * The default implementation of Flush() does nothing. - * @stable ICU 4.2 - */ - virtual void Flush(); - -private: - ByteSink(const ByteSink &) = delete; - ByteSink &operator=(const ByteSink &) = delete; -}; - -// ------------------------------------------------------------- -// Some standard implementations - -/** - * Implementation of ByteSink that writes to a flat byte array, - * with bounds-checking: - * This sink will not write more than capacity bytes to outbuf. - * If more than capacity bytes are Append()ed, then excess bytes are ignored, - * and Overflowed() will return true. - * Overflow does not cause a runtime error. - * @stable ICU 4.2 - */ -class U_COMMON_API CheckedArrayByteSink : public ByteSink { -public: - /** - * Constructs a ByteSink that will write to outbuf[0..capacity-1]. - * @param outbuf buffer to write to - * @param capacity size of the buffer - * @stable ICU 4.2 - */ - CheckedArrayByteSink(char* outbuf, int32_t capacity); - /** - * Destructor. - * @stable ICU 4.2 - */ - virtual ~CheckedArrayByteSink(); - /** - * Returns the sink to its original state, without modifying the buffer. - * Useful for reusing both the buffer and the sink for multiple streams. - * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0 - * and Overflowed()=FALSE. - * @return *this - * @stable ICU 4.6 - */ - virtual CheckedArrayByteSink& Reset(); - /** - * Append "bytes[0,n-1]" to this. - * @param bytes the pointer to the bytes - * @param n the number of bytes; must be non-negative - * @stable ICU 4.2 - */ - virtual void Append(const char* bytes, int32_t n); - /** - * Returns a writable buffer for appending and writes the buffer's capacity to - * *result_capacity. For details see the base class documentation. - * @param min_capacity required minimum capacity of the returned buffer; - * must be non-negative - * @param desired_capacity_hint desired capacity of the returned buffer; - * must be non-negative - * @param scratch default caller-owned buffer - * @param scratch_capacity capacity of the scratch buffer - * @param result_capacity pointer to an integer which will be set to the - * capacity of the returned buffer - * @return a buffer with *result_capacity>=min_capacity - * @stable ICU 4.2 - */ - virtual char* GetAppendBuffer(int32_t min_capacity, - int32_t desired_capacity_hint, - char* scratch, int32_t scratch_capacity, - int32_t* result_capacity); - /** - * Returns the number of bytes actually written to the sink. - * @return number of bytes written to the buffer - * @stable ICU 4.2 - */ - int32_t NumberOfBytesWritten() const { return size_; } - /** - * Returns true if any bytes were discarded, i.e., if there was an - * attempt to write more than 'capacity' bytes. - * @return TRUE if more than 'capacity' bytes were Append()ed - * @stable ICU 4.2 - */ - UBool Overflowed() const { return overflowed_; } - /** - * Returns the number of bytes appended to the sink. - * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten() - * else they return the same number. - * @return number of bytes written to the buffer - * @stable ICU 4.6 - */ - int32_t NumberOfBytesAppended() const { return appended_; } -private: - char* outbuf_; - const int32_t capacity_; - int32_t size_; - int32_t appended_; - UBool overflowed_; - - CheckedArrayByteSink() = delete; - CheckedArrayByteSink(const CheckedArrayByteSink &) = delete; - CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete; -}; - -/** - * Implementation of ByteSink that writes to a "string". - * The StringClass is usually instantiated with a std::string. - * @stable ICU 4.2 - */ -template -class StringByteSink : public ByteSink { - public: - /** - * Constructs a ByteSink that will append bytes to the dest string. - * @param dest pointer to string object to append to - * @stable ICU 4.2 - */ - StringByteSink(StringClass* dest) : dest_(dest) { } - /** - * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string. - * - * @param dest pointer to string object to append to - * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d - * @stable ICU 60 - */ - StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) { - if (initialAppendCapacity > 0 && - (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) { - dest->reserve(dest->length() + initialAppendCapacity); - } - } - /** - * Append "bytes[0,n-1]" to this. - * @param data the pointer to the bytes - * @param n the number of bytes; must be non-negative - * @stable ICU 4.2 - */ - virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } - private: - StringClass* dest_; - - StringByteSink() = delete; - StringByteSink(const StringByteSink &) = delete; - StringByteSink &operator=(const StringByteSink &) = delete; -}; - -U_NAMESPACE_END - -#endif // __BYTESTREAM_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/bytestrie.h b/deps/node/deps/icu-small/source/common/unicode/bytestrie.h deleted file mode 100644 index c57b8ccf..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/bytestrie.h +++ /dev/null @@ -1,520 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: bytestrie.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010sep25 -* created by: Markus W. Scherer -*/ - -#ifndef __BYTESTRIE_H__ -#define __BYTESTRIE_H__ - -/** - * \file - * \brief C++ API: Trie for mapping byte sequences to integer values. - */ - -#include "unicode/utypes.h" -#include "unicode/stringpiece.h" -#include "unicode/uobject.h" -#include "unicode/ustringtrie.h" - -U_NAMESPACE_BEGIN - -class ByteSink; -class BytesTrieBuilder; -class CharString; -class UVector32; - -/** - * Light-weight, non-const reader class for a BytesTrie. - * Traverses a byte-serialized data structure with minimal state, - * for mapping byte sequences to non-negative integer values. - * - * This class owns the serialized trie data only if it was constructed by - * the builder's build() method. - * The public constructor and the copy constructor only alias the data (only copy the pointer). - * There is no assignment operator. - * - * This class is not intended for public subclassing. - * @stable ICU 4.8 - */ -class U_COMMON_API BytesTrie : public UMemory { -public: - /** - * Constructs a BytesTrie reader instance. - * - * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder, - * starting with the first byte of that sequence. - * The BytesTrie object will not read more bytes than - * the BytesTrieBuilder generated in the corresponding build() call. - * - * The array is not copied/cloned and must not be modified while - * the BytesTrie object is in use. - * - * @param trieBytes The byte array that contains the serialized trie. - * @stable ICU 4.8 - */ - BytesTrie(const void *trieBytes) - : ownedArray_(NULL), bytes_(static_cast(trieBytes)), - pos_(bytes_), remainingMatchLength_(-1) {} - - /** - * Destructor. - * @stable ICU 4.8 - */ - ~BytesTrie(); - - /** - * Copy constructor, copies the other trie reader object and its state, - * but not the byte array which will be shared. (Shallow copy.) - * @param other Another BytesTrie object. - * @stable ICU 4.8 - */ - BytesTrie(const BytesTrie &other) - : ownedArray_(NULL), bytes_(other.bytes_), - pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {} - - /** - * Resets this trie to its initial state. - * @return *this - * @stable ICU 4.8 - */ - BytesTrie &reset() { - pos_=bytes_; - remainingMatchLength_=-1; - return *this; - } - - /** - * BytesTrie state object, for saving a trie's current state - * and resetting the trie back to this state later. - * @stable ICU 4.8 - */ - class State : public UMemory { - public: - /** - * Constructs an empty State. - * @stable ICU 4.8 - */ - State() { bytes=NULL; } - private: - friend class BytesTrie; - - const uint8_t *bytes; - const uint8_t *pos; - int32_t remainingMatchLength; - }; - - /** - * Saves the state of this trie. - * @param state The State object to hold the trie's state. - * @return *this - * @see resetToState - * @stable ICU 4.8 - */ - const BytesTrie &saveState(State &state) const { - state.bytes=bytes_; - state.pos=pos_; - state.remainingMatchLength=remainingMatchLength_; - return *this; - } - - /** - * Resets this trie to the saved state. - * If the state object contains no state, or the state of a different trie, - * then this trie remains unchanged. - * @param state The State object which holds a saved trie state. - * @return *this - * @see saveState - * @see reset - * @stable ICU 4.8 - */ - BytesTrie &resetToState(const State &state) { - if(bytes_==state.bytes && bytes_!=NULL) { - pos_=state.pos; - remainingMatchLength_=state.remainingMatchLength; - } - return *this; - } - - /** - * Determines whether the byte sequence so far matches, whether it has a value, - * and whether another input byte can continue a matching byte sequence. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult current() const; - - /** - * Traverses the trie from the initial state for this input byte. - * Equivalent to reset().next(inByte). - * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. - * Values below -0x100 and above 0xff will never match. - * @return The match/value Result. - * @stable ICU 4.8 - */ - inline UStringTrieResult first(int32_t inByte) { - remainingMatchLength_=-1; - if(inByte<0) { - inByte+=0x100; - } - return nextImpl(bytes_, inByte); - } - - /** - * Traverses the trie from the current state for this input byte. - * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. - * Values below -0x100 and above 0xff will never match. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult next(int32_t inByte); - - /** - * Traverses the trie from the current state for this byte sequence. - * Equivalent to - * \code - * Result result=current(); - * for(each c in s) - * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; - * result=next(c); - * return result; - * \endcode - * @param s A string or byte sequence. Can be NULL if length is 0. - * @param length The length of the byte sequence. Can be -1 if NUL-terminated. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult next(const char *s, int32_t length); - - /** - * Returns a matching byte sequence's value if called immediately after - * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. - * getValue() can be called multiple times. - * - * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! - * @return The value for the byte sequence so far. - * @stable ICU 4.8 - */ - inline int32_t getValue() const { - const uint8_t *pos=pos_; - int32_t leadByte=*pos++; - // U_ASSERT(leadByte>=kMinValueLead); - return readValue(pos, leadByte>>1); - } - - /** - * Determines whether all byte sequences reachable from the current state - * map to the same value. - * @param uniqueValue Receives the unique value, if this function returns TRUE. - * (output-only) - * @return TRUE if all byte sequences reachable from the current state - * map to the same value. - * @stable ICU 4.8 - */ - inline UBool hasUniqueValue(int32_t &uniqueValue) const { - const uint8_t *pos=pos_; - // Skip the rest of a pending linear-match node. - return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue); - } - - /** - * Finds each byte which continues the byte sequence from the current state. - * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now. - * @param out Each next byte is appended to this object. - * (Only uses the out.Append(s, length) method.) - * @return the number of bytes which continue the byte sequence from here - * @stable ICU 4.8 - */ - int32_t getNextBytes(ByteSink &out) const; - - /** - * Iterator for all of the (byte sequence, value) pairs in a BytesTrie. - * @stable ICU 4.8 - */ - class U_COMMON_API Iterator : public UMemory { - public: - /** - * Iterates from the root of a byte-serialized BytesTrie. - * @param trieBytes The trie bytes. - * @param maxStringLength If 0, the iterator returns full strings/byte sequences. - * Otherwise, the iterator returns strings with this maximum length. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 4.8 - */ - Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode); - - /** - * Iterates from the current state of the specified BytesTrie. - * @param trie The trie whose state will be copied for iteration. - * @param maxStringLength If 0, the iterator returns full strings/byte sequences. - * Otherwise, the iterator returns strings with this maximum length. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 4.8 - */ - Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); - - /** - * Destructor. - * @stable ICU 4.8 - */ - ~Iterator(); - - /** - * Resets this iterator to its initial state. - * @return *this - * @stable ICU 4.8 - */ - Iterator &reset(); - - /** - * @return TRUE if there are more elements. - * @stable ICU 4.8 - */ - UBool hasNext() const; - - /** - * Finds the next (byte sequence, value) pair if there is one. - * - * If the byte sequence is truncated to the maximum length and does not - * have a real value, then the value is set to -1. - * In this case, this "not a real value" is indistinguishable from - * a real value of -1. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if there is another element. - * @stable ICU 4.8 - */ - UBool next(UErrorCode &errorCode); - - /** - * @return The NUL-terminated byte sequence for the last successful next(). - * @stable ICU 4.8 - */ - StringPiece getString() const; - /** - * @return The value for the last successful next(). - * @stable ICU 4.8 - */ - int32_t getValue() const { return value_; } - - private: - UBool truncateAndStop(); - - const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode); - - const uint8_t *bytes_; - const uint8_t *pos_; - const uint8_t *initialPos_; - int32_t remainingMatchLength_; - int32_t initialRemainingMatchLength_; - - CharString *str_; - int32_t maxLength_; - int32_t value_; - - // The stack stores pairs of integers for backtracking to another - // outbound edge of a branch node. - // The first integer is an offset from bytes_. - // The second integer has the str_->length() from before the node in bits 15..0, - // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.) - // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24, - // but the code looks more confusing that way.) - UVector32 *stack_; - }; - -private: - friend class BytesTrieBuilder; - - /** - * Constructs a BytesTrie reader instance. - * Unlike the public constructor which just aliases an array, - * this constructor adopts the builder's array. - * This constructor is only called by the builder. - */ - BytesTrie(void *adoptBytes, const void *trieBytes) - : ownedArray_(static_cast(adoptBytes)), - bytes_(static_cast(trieBytes)), - pos_(bytes_), remainingMatchLength_(-1) {} - - // No assignment operator. - BytesTrie &operator=(const BytesTrie &other); - - inline void stop() { - pos_=NULL; - } - - // Reads a compact 32-bit integer. - // pos is already after the leadByte, and the lead byte is already shifted right by 1. - static int32_t readValue(const uint8_t *pos, int32_t leadByte); - static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) { - // U_ASSERT(leadByte>=kMinValueLead); - if(leadByte>=(kMinTwoByteValueLead<<1)) { - if(leadByte<(kMinThreeByteValueLead<<1)) { - ++pos; - } else if(leadByte<(kFourByteValueLead<<1)) { - pos+=2; - } else { - pos+=3+((leadByte>>1)&1); - } - } - return pos; - } - static inline const uint8_t *skipValue(const uint8_t *pos) { - int32_t leadByte=*pos++; - return skipValue(pos, leadByte); - } - - // Reads a jump delta and jumps. - static const uint8_t *jumpByDelta(const uint8_t *pos); - - static inline const uint8_t *skipDelta(const uint8_t *pos) { - int32_t delta=*pos++; - if(delta>=kMinTwoByteDeltaLead) { - if(delta>8)+1; // 0x6c - static const int32_t kFourByteValueLead=0x7e; - - // A little more than Unicode code points. (0x11ffff) - static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1; - - static const int32_t kFiveByteValueLead=0x7f; - - // Compact delta integers. - static const int32_t kMaxOneByteDelta=0xbf; - static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0 - static const int32_t kMinThreeByteDeltaLead=0xf0; - static const int32_t kFourByteDeltaLead=0xfe; - static const int32_t kFiveByteDeltaLead=0xff; - - static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff - static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff - - uint8_t *ownedArray_; - - // Fixed value referencing the BytesTrie bytes. - const uint8_t *bytes_; - - // Iterator variables. - - // Pointer to next trie byte to read. NULL if no more matches. - const uint8_t *pos_; - // Remaining length of a linear-match node, minus 1. Negative if not in such a node. - int32_t remainingMatchLength_; -}; - -U_NAMESPACE_END - -#endif // __BYTESTRIE_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/bytestriebuilder.h b/deps/node/deps/icu-small/source/common/unicode/bytestriebuilder.h deleted file mode 100644 index b164e3bb..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/bytestriebuilder.h +++ /dev/null @@ -1,182 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: bytestriebuilder.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010sep25 -* created by: Markus W. Scherer -*/ - -/** - * \file - * \brief C++ API: Builder for icu::BytesTrie - */ - -#ifndef __BYTESTRIEBUILDER_H__ -#define __BYTESTRIEBUILDER_H__ - -#include "unicode/utypes.h" -#include "unicode/bytestrie.h" -#include "unicode/stringpiece.h" -#include "unicode/stringtriebuilder.h" - -U_NAMESPACE_BEGIN - -class BytesTrieElement; -class CharString; -/** - * Builder class for BytesTrie. - * - * This class is not intended for public subclassing. - * @stable ICU 4.8 - */ -class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { -public: - /** - * Constructs an empty builder. - * @param errorCode Standard ICU error code. - * @stable ICU 4.8 - */ - BytesTrieBuilder(UErrorCode &errorCode); - - /** - * Destructor. - * @stable ICU 4.8 - */ - virtual ~BytesTrieBuilder(); - - /** - * Adds a (byte sequence, value) pair. - * The byte sequence must be unique. - * The bytes will be copied; the builder does not keep - * a reference to the input StringPiece or its data(). - * @param s The input byte sequence. - * @param value The value associated with this byte sequence. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return *this - * @stable ICU 4.8 - */ - BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode); - - /** - * Builds a BytesTrie for the add()ed data. - * Once built, no further data can be add()ed until clear() is called. - * - * A BytesTrie cannot be empty. At least one (byte sequence, value) pair - * must have been add()ed. - * - * This method passes ownership of the builder's internal result array to the new trie object. - * Another call to any build() variant will re-serialize the trie. - * After clear() has been called, a new array will be used as well. - * @param buildOption Build option, see UStringTrieBuildOption. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return A new BytesTrie for the add()ed data. - * @stable ICU 4.8 - */ - BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); - - /** - * Builds a BytesTrie for the add()ed data and byte-serializes it. - * Once built, no further data can be add()ed until clear() is called. - * - * A BytesTrie cannot be empty. At least one (byte sequence, value) pair - * must have been add()ed. - * - * Multiple calls to buildStringPiece() return StringPieces referring to the - * builder's same byte array, without rebuilding. - * If buildStringPiece() is called after build(), the trie will be - * re-serialized into a new array. - * If build() is called after buildStringPiece(), the trie object will become - * the owner of the previously returned array. - * After clear() has been called, a new array will be used as well. - * @param buildOption Build option, see UStringTrieBuildOption. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. - * @stable ICU 4.8 - */ - StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); - - /** - * Removes all (byte sequence, value) pairs. - * New data can then be add()ed and a new trie can be built. - * @return *this - * @stable ICU 4.8 - */ - BytesTrieBuilder &clear(); - -private: - BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor - BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator - - void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); - - virtual int32_t getElementStringLength(int32_t i) const; - virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const; - virtual int32_t getElementValue(int32_t i) const; - - virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; - - virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; - virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const; - - virtual UBool matchNodesCanHaveValues() const { return FALSE; } - - virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } - virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } - virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } - - /** - * @internal (private) - */ - class BTLinearMatchNode : public LinearMatchNode { - public: - BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); - virtual UBool operator==(const Node &other) const; - virtual void write(StringTrieBuilder &builder); - private: - const char *s; - }; - - virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, - Node *nextNode) const; - - UBool ensureCapacity(int32_t length); - virtual int32_t write(int32_t byte); - int32_t write(const char *b, int32_t length); - virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); - virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); - virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); - virtual int32_t writeDeltaTo(int32_t jumpTarget); - - CharString *strings; // Pointer not object so we need not #include internal charstr.h. - BytesTrieElement *elements; - int32_t elementsCapacity; - int32_t elementsLength; - - // Byte serialization of the trie. - // Grows from the back: bytesLength measures from the end of the buffer! - char *bytes; - int32_t bytesCapacity; - int32_t bytesLength; -}; - -U_NAMESPACE_END - -#endif // __BYTESTRIEBUILDER_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/caniter.h b/deps/node/deps/icu-small/source/common/unicode/caniter.h deleted file mode 100644 index b47e35da..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/caniter.h +++ /dev/null @@ -1,210 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * Copyright (C) 1996-2014, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************************* - */ - -#ifndef CANITER_H -#define CANITER_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/uobject.h" -#include "unicode/unistr.h" - -/** - * \file - * \brief C++ API: Canonical Iterator - */ - -/** Should permutation skip characters with combining class zero - * Should be either TRUE or FALSE. This is a compile time option - * @stable ICU 2.4 - */ -#ifndef CANITER_SKIP_ZEROES -#define CANITER_SKIP_ZEROES TRUE -#endif - -U_NAMESPACE_BEGIN - -class Hashtable; -class Normalizer2; -class Normalizer2Impl; - -/** - * This class allows one to iterate through all the strings that are canonically equivalent to a given - * string. For example, here are some sample results: -Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -1: \\u0041\\u030A\\u0064\\u0307\\u0327 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -2: \\u0041\\u030A\\u0064\\u0327\\u0307 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -3: \\u0041\\u030A\\u1E0B\\u0327 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -4: \\u0041\\u030A\\u1E11\\u0307 - = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} -5: \\u00C5\\u0064\\u0307\\u0327 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -6: \\u00C5\\u0064\\u0327\\u0307 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -7: \\u00C5\\u1E0B\\u0327 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -8: \\u00C5\\u1E11\\u0307 - = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} -9: \\u212B\\u0064\\u0307\\u0327 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} -10: \\u212B\\u0064\\u0327\\u0307 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} -11: \\u212B\\u1E0B\\u0327 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} -12: \\u212B\\u1E11\\u0307 - = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} - *
Note: the code is intended for use with small strings, and is not suitable for larger ones, - * since it has not been optimized for that situation. - * Note, CanonicalIterator is not intended to be subclassed. - * @author M. Davis - * @author C++ port by V. Weinstein - * @stable ICU 2.4 - */ -class U_COMMON_API CanonicalIterator U_FINAL : public UObject { -public: - /** - * Construct a CanonicalIterator object - * @param source string to get results for - * @param status Fill-in parameter which receives the status of this operation. - * @stable ICU 2.4 - */ - CanonicalIterator(const UnicodeString &source, UErrorCode &status); - - /** Destructor - * Cleans pieces - * @stable ICU 2.4 - */ - virtual ~CanonicalIterator(); - - /** - * Gets the NFD form of the current source we are iterating over. - * @return gets the source: NOTE: it is the NFD form of source - * @stable ICU 2.4 - */ - UnicodeString getSource(); - - /** - * Resets the iterator so that one can start again from the beginning. - * @stable ICU 2.4 - */ - void reset(); - - /** - * Get the next canonically equivalent string. - *
Warning: The strings are not guaranteed to be in any particular order. - * @return the next string that is canonically equivalent. A bogus string is returned when - * the iteration is done. - * @stable ICU 2.4 - */ - UnicodeString next(); - - /** - * Set a new source for this iterator. Allows object reuse. - * @param newSource the source string to iterate against. This allows the same iterator to be used - * while changing the source string, saving object creation. - * @param status Fill-in parameter which receives the status of this operation. - * @stable ICU 2.4 - */ - void setSource(const UnicodeString &newSource, UErrorCode &status); - -#ifndef U_HIDE_INTERNAL_API - /** - * Dumb recursive implementation of permutation. - * TODO: optimize - * @param source the string to find permutations for - * @param skipZeros determine if skip zeros - * @param result the results in a set. - * @param status Fill-in parameter which receives the status of this operation. - * @internal - */ - static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status); -#endif /* U_HIDE_INTERNAL_API */ - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - -private: - // ===================== PRIVATES ============================== - // private default constructor - CanonicalIterator(); - - - /** - * Copy constructor. Private for now. - * @internal (private) - */ - CanonicalIterator(const CanonicalIterator& other); - - /** - * Assignment operator. Private for now. - * @internal (private) - */ - CanonicalIterator& operator=(const CanonicalIterator& other); - - // fields - UnicodeString source; - UBool done; - - // 2 dimensional array holds the pieces of the string with - // their different canonically equivalent representations - UnicodeString **pieces; - int32_t pieces_length; - int32_t *pieces_lengths; - - // current is used in iterating to combine pieces - int32_t *current; - int32_t current_length; - - // transient fields - UnicodeString buffer; - - const Normalizer2 &nfd; - const Normalizer2Impl &nfcImpl; - - // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. - UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) - - //Set getEquivalents2(String segment); - Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); - //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); - - /** - * See if the decomposition of cp2 is at segment starting at segmentPos - * (with canonical rearrangment!) - * If so, take the remainder, and return the equivalents - */ - //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); - Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); - //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); - - void cleanPieces(); - -}; - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/casemap.h b/deps/node/deps/icu-small/source/common/unicode/casemap.h deleted file mode 100644 index 477eb484..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/casemap.h +++ /dev/null @@ -1,492 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// casemap.h -// created: 2017jan12 Markus W. Scherer - -#ifndef __CASEMAP_H__ -#define __CASEMAP_H__ - -#include "unicode/utypes.h" -#include "unicode/stringpiece.h" -#include "unicode/uobject.h" - -/** - * \file - * \brief C++ API: Low-level C++ case mapping functions. - */ - -U_NAMESPACE_BEGIN - -class BreakIterator; -class ByteSink; -class Edits; - -/** - * Low-level C++ case mapping functions. - * - * @stable ICU 59 - */ -class U_COMMON_API CaseMap U_FINAL : public UMemory { -public: - /** - * Lowercases a UTF-16 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see u_strToLower - * @stable ICU 59 - */ - static int32_t toLower( - const char *locale, uint32_t options, - const char16_t *src, int32_t srcLength, - char16_t *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - - /** - * Uppercases a UTF-16 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see u_strToUpper - * @stable ICU 59 - */ - static int32_t toUpper( - const char *locale, uint32_t options, - const char16_t *src, int32_t srcLength, - char16_t *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - - /** - * Titlecases a UTF-16 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with options bits.) - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, - * U_TITLECASE_NO_LOWERCASE, - * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, - * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. - * @param iter A break iterator to find the first characters of words that are to be titlecased. - * It is set to the source string (setText()) - * and used one or more times for iteration (first() and next()). - * If NULL, then a word break iterator for the locale is used - * (or something equivalent). - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see u_strToTitle - * @see ucasemap_toTitle - * @stable ICU 59 - */ - static int32_t toTitle( - const char *locale, uint32_t options, BreakIterator *iter, - const char16_t *src, int32_t srcLength, - char16_t *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - -#endif // UCONFIG_NO_BREAK_ITERATION - - /** - * Case-folds a UTF-16 string and optionally records edits. - * - * Case folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, - * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see u_strFoldCase - * @stable ICU 59 - */ - static int32_t fold( - uint32_t options, - const char16_t *src, int32_t srcLength, - char16_t *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - - /** - * Lowercases a UTF-8 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src The original string. - * @param sink A ByteSink to which the result string is written. - * sink.Flush() is called at the end. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * - * @see ucasemap_utf8ToLower - * @stable ICU 60 - */ - static void utf8ToLower( - const char *locale, uint32_t options, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode); - - /** - * Uppercases a UTF-8 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src The original string. - * @param sink A ByteSink to which the result string is written. - * sink.Flush() is called at the end. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * - * @see ucasemap_utf8ToUpper - * @stable ICU 60 - */ - static void utf8ToUpper( - const char *locale, uint32_t options, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - - /** - * Titlecases a UTF-8 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with options bits.) - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, - * U_TITLECASE_NO_LOWERCASE, - * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, - * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. - * @param iter A break iterator to find the first characters of words that are to be titlecased. - * It is set to the source string (setUText()) - * and used one or more times for iteration (first() and next()). - * If NULL, then a word break iterator for the locale is used - * (or something equivalent). - * @param src The original string. - * @param sink A ByteSink to which the result string is written. - * sink.Flush() is called at the end. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * - * @see ucasemap_utf8ToTitle - * @stable ICU 60 - */ - static void utf8ToTitle( - const char *locale, uint32_t options, BreakIterator *iter, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode); - -#endif // UCONFIG_NO_BREAK_ITERATION - - /** - * Case-folds a UTF-8 string and optionally records edits. - * - * Case folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src The original string. - * @param sink A ByteSink to which the result string is written. - * sink.Flush() is called at the end. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * - * @see ucasemap_utf8FoldCase - * @stable ICU 60 - */ - static void utf8Fold( - uint32_t options, - StringPiece src, ByteSink &sink, Edits *edits, - UErrorCode &errorCode); - - /** - * Lowercases a UTF-8 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see ucasemap_utf8ToLower - * @stable ICU 59 - */ - static int32_t utf8ToLower( - const char *locale, uint32_t options, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - - /** - * Uppercases a UTF-8 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see ucasemap_utf8ToUpper - * @stable ICU 59 - */ - static int32_t utf8ToUpper( - const char *locale, uint32_t options, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - - /** - * Titlecases a UTF-8 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with options bits.) - * - * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, - * U_TITLECASE_NO_LOWERCASE, - * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, - * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. - * @param iter A break iterator to find the first characters of words that are to be titlecased. - * It is set to the source string (setUText()) - * and used one or more times for iteration (first() and next()). - * If NULL, then a word break iterator for the locale is used - * (or something equivalent). - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see ucasemap_utf8ToTitle - * @stable ICU 59 - */ - static int32_t utf8ToTitle( - const char *locale, uint32_t options, BreakIterator *iter, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - -#endif // UCONFIG_NO_BREAK_ITERATION - - /** - * Case-folds a UTF-8 string and optionally records edits. - * - * Case folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, - * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful. - * When the result would be longer than destCapacity, - * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see ucasemap_utf8FoldCase - * @stable ICU 59 - */ - static int32_t utf8Fold( - uint32_t options, - const char *src, int32_t srcLength, - char *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode); - -private: - CaseMap() = delete; - CaseMap(const CaseMap &other) = delete; - CaseMap &operator=(const CaseMap &other) = delete; -}; - -U_NAMESPACE_END - -#endif // __CASEMAP_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/char16ptr.h b/deps/node/deps/icu-small/source/common/unicode/char16ptr.h deleted file mode 100644 index a7c5f1a0..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/char16ptr.h +++ /dev/null @@ -1,308 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// char16ptr.h -// created: 2017feb28 Markus W. Scherer - -#ifndef __CHAR16PTR_H__ -#define __CHAR16PTR_H__ - -#include -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: char16_t pointer wrappers with - * implicit conversion from bit-compatible raw pointer types. - * Also conversion functions from char16_t * to UChar * and OldUChar *. - */ - -U_NAMESPACE_BEGIN - -/** - * \def U_ALIASING_BARRIER - * Barrier for pointer anti-aliasing optimizations even across function boundaries. - * @internal - */ -#ifdef U_ALIASING_BARRIER - // Use the predefined value. -#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT -# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") -#elif defined(U_IN_DOXYGEN) -# define U_ALIASING_BARRIER(ptr) -#endif - -/** - * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @stable ICU 59 - */ -class U_COMMON_API Char16Ptr U_FINAL { -public: - /** - * Copies the pointer. - * @param p pointer - * @stable ICU 59 - */ - inline Char16Ptr(char16_t *p); -#if !U_CHAR16_IS_TYPEDEF - /** - * Converts the pointer to char16_t *. - * @param p pointer to be converted - * @stable ICU 59 - */ - inline Char16Ptr(uint16_t *p); -#endif -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * Converts the pointer to char16_t *. - * (Only defined if U_SIZEOF_WCHAR_T==2.) - * @param p pointer to be converted - * @stable ICU 59 - */ - inline Char16Ptr(wchar_t *p); -#endif - /** - * nullptr constructor. - * @param p nullptr - * @stable ICU 59 - */ - inline Char16Ptr(std::nullptr_t p); - /** - * Destructor. - * @stable ICU 59 - */ - inline ~Char16Ptr(); - - /** - * Pointer access. - * @return the wrapped pointer - * @stable ICU 59 - */ - inline char16_t *get() const; - /** - * char16_t pointer access via type conversion (e.g., static_cast). - * @return the wrapped pointer - * @stable ICU 59 - */ - inline operator char16_t *() const { return get(); } - -private: - Char16Ptr() = delete; - -#ifdef U_ALIASING_BARRIER - template static char16_t *cast(T *t) { - U_ALIASING_BARRIER(t); - return reinterpret_cast(t); - } - - char16_t *p_; -#else - union { - char16_t *cp; - uint16_t *up; - wchar_t *wp; - } u_; -#endif -}; - -/// \cond -#ifdef U_ALIASING_BARRIER - -Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {} -#if !U_CHAR16_IS_TYPEDEF -Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {} -#endif -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {} -#endif -Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {} -Char16Ptr::~Char16Ptr() { - U_ALIASING_BARRIER(p_); -} - -char16_t *Char16Ptr::get() const { return p_; } - -#else - -Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; } -#if !U_CHAR16_IS_TYPEDEF -Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; } -#endif -#if U_SIZEOF_WCHAR_T==2 -Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; } -#endif -Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; } -Char16Ptr::~Char16Ptr() {} - -char16_t *Char16Ptr::get() const { return u_.cp; } - -#endif -/// \endcond - -/** - * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. - * @stable ICU 59 - */ -class U_COMMON_API ConstChar16Ptr U_FINAL { -public: - /** - * Copies the pointer. - * @param p pointer - * @stable ICU 59 - */ - inline ConstChar16Ptr(const char16_t *p); -#if !U_CHAR16_IS_TYPEDEF - /** - * Converts the pointer to char16_t *. - * @param p pointer to be converted - * @stable ICU 59 - */ - inline ConstChar16Ptr(const uint16_t *p); -#endif -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * Converts the pointer to char16_t *. - * (Only defined if U_SIZEOF_WCHAR_T==2.) - * @param p pointer to be converted - * @stable ICU 59 - */ - inline ConstChar16Ptr(const wchar_t *p); -#endif - /** - * nullptr constructor. - * @param p nullptr - * @stable ICU 59 - */ - inline ConstChar16Ptr(const std::nullptr_t p); - - /** - * Destructor. - * @stable ICU 59 - */ - inline ~ConstChar16Ptr(); - - /** - * Pointer access. - * @return the wrapped pointer - * @stable ICU 59 - */ - inline const char16_t *get() const; - /** - * char16_t pointer access via type conversion (e.g., static_cast). - * @return the wrapped pointer - * @stable ICU 59 - */ - inline operator const char16_t *() const { return get(); } - -private: - ConstChar16Ptr() = delete; - -#ifdef U_ALIASING_BARRIER - template static const char16_t *cast(const T *t) { - U_ALIASING_BARRIER(t); - return reinterpret_cast(t); - } - - const char16_t *p_; -#else - union { - const char16_t *cp; - const uint16_t *up; - const wchar_t *wp; - } u_; -#endif -}; - -/// \cond -#ifdef U_ALIASING_BARRIER - -ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {} -#if !U_CHAR16_IS_TYPEDEF -ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {} -#endif -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {} -#endif -ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {} -ConstChar16Ptr::~ConstChar16Ptr() { - U_ALIASING_BARRIER(p_); -} - -const char16_t *ConstChar16Ptr::get() const { return p_; } - -#else - -ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; } -#if !U_CHAR16_IS_TYPEDEF -ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; } -#endif -#if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; } -#endif -ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; } -ConstChar16Ptr::~ConstChar16Ptr() {} - -const char16_t *ConstChar16Ptr::get() const { return u_.cp; } - -#endif -/// \endcond - -/** - * Converts from const char16_t * to const UChar *. - * Includes an aliasing barrier if available. - * @param p pointer - * @return p as const UChar * - * @stable ICU 59 - */ -inline const UChar *toUCharPtr(const char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - -/** - * Converts from char16_t * to UChar *. - * Includes an aliasing barrier if available. - * @param p pointer - * @return p as UChar * - * @stable ICU 59 - */ -inline UChar *toUCharPtr(char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - -/** - * Converts from const char16_t * to const OldUChar *. - * Includes an aliasing barrier if available. - * @param p pointer - * @return p as const OldUChar * - * @stable ICU 59 - */ -inline const OldUChar *toOldUCharPtr(const char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - -/** - * Converts from char16_t * to OldUChar *. - * Includes an aliasing barrier if available. - * @param p pointer - * @return p as OldUChar * - * @stable ICU 59 - */ -inline OldUChar *toOldUCharPtr(char16_t *p) { -#ifdef U_ALIASING_BARRIER - U_ALIASING_BARRIER(p); -#endif - return reinterpret_cast(p); -} - -U_NAMESPACE_END - -#endif // __CHAR16PTR_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/chariter.h b/deps/node/deps/icu-small/source/common/unicode/chariter.h deleted file mode 100644 index 292794f6..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/chariter.h +++ /dev/null @@ -1,728 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************** -* -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************** -*/ - -#ifndef CHARITER_H -#define CHARITER_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/unistr.h" -/** - * \file - * \brief C++ API: Character Iterator - */ - -U_NAMESPACE_BEGIN -/** - * Abstract class that defines an API for forward-only iteration - * on text objects. - * This is a minimal interface for iteration without random access - * or backwards iteration. It is especially useful for wrapping - * streams with converters into an object for collation or - * normalization. - * - *

Characters can be accessed in two ways: as code units or as - * code points. - * Unicode code points are 21-bit integers and are the scalar values - * of Unicode characters. ICU uses the type UChar32 for them. - * Unicode code units are the storage units of a given - * Unicode/UCS Transformation Format (a character encoding scheme). - * With UTF-16, all code points can be represented with either one - * or two code units ("surrogates"). - * String storage is typically based on code units, while properties - * of characters are typically determined using code point values. - * Some processes may be designed to work with sequences of code units, - * or it may be known that all characters that are important to an - * algorithm can be represented with single code units. - * Other processes will need to use the code point access functions.

- * - *

ForwardCharacterIterator provides nextPostInc() to access - * a code unit and advance an internal position into the text object, - * similar to a return text[position++].
- * It provides next32PostInc() to access a code point and advance an internal - * position.

- * - *

next32PostInc() assumes that the current position is that of - * the beginning of a code point, i.e., of its first code unit. - * After next32PostInc(), this will be true again. - * In general, access to code units and code points in the same - * iteration loop should not be mixed. In UTF-16, if the current position - * is on a second code unit (Low Surrogate), then only that code unit - * is returned even by next32PostInc().

- * - *

For iteration with either function, there are two ways to - * check for the end of the iteration. When there are no more - * characters in the text object: - *

    - *
  • The hasNext() function returns FALSE.
  • - *
  • nextPostInc() and next32PostInc() return DONE - * when one attempts to read beyond the end of the text object.
  • - *
- * - * Example: - * \code - * void function1(ForwardCharacterIterator &it) { - * UChar32 c; - * while(it.hasNext()) { - * c=it.next32PostInc(); - * // use c - * } - * } - * - * void function1(ForwardCharacterIterator &it) { - * char16_t c; - * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) { - * // use c - * } - * } - * \endcode - *

- * - * @stable ICU 2.0 - */ -class U_COMMON_API ForwardCharacterIterator : public UObject { -public: - /** - * Value returned by most of ForwardCharacterIterator's functions - * when the iterator has reached the limits of its iteration. - * @stable ICU 2.0 - */ - enum { DONE = 0xffff }; - - /** - * Destructor. - * @stable ICU 2.0 - */ - virtual ~ForwardCharacterIterator(); - - /** - * Returns true when both iterators refer to the same - * character in the same character-storage object. - * @param that The ForwardCharacterIterator to be compared for equality - * @return true when both iterators refer to the same - * character in the same character-storage object - * @stable ICU 2.0 - */ - virtual UBool operator==(const ForwardCharacterIterator& that) const = 0; - - /** - * Returns true when the iterators refer to different - * text-storage objects, or to different characters in the - * same text-storage object. - * @param that The ForwardCharacterIterator to be compared for inequality - * @return true when the iterators refer to different - * text-storage objects, or to different characters in the - * same text-storage object - * @stable ICU 2.0 - */ - inline UBool operator!=(const ForwardCharacterIterator& that) const; - - /** - * Generates a hash code for this iterator. - * @return the hash code. - * @stable ICU 2.0 - */ - virtual int32_t hashCode(void) const = 0; - - /** - * Returns a UClassID for this ForwardCharacterIterator ("poor man's - * RTTI").

Despite the fact that this function is public, - * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! - * @return a UClassID for this ForwardCharacterIterator - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const = 0; - - /** - * Gets the current code unit for returning and advances to the next code unit - * in the iteration range - * (toward endIndex()). If there are - * no more code units to return, returns DONE. - * @return the current code unit. - * @stable ICU 2.0 - */ - virtual char16_t nextPostInc(void) = 0; - - /** - * Gets the current code point for returning and advances to the next code point - * in the iteration range - * (toward endIndex()). If there are - * no more code points to return, returns DONE. - * @return the current code point. - * @stable ICU 2.0 - */ - virtual UChar32 next32PostInc(void) = 0; - - /** - * Returns FALSE if there are no more code units or code points - * at or after the current position in the iteration range. - * This is used with nextPostInc() or next32PostInc() in forward - * iteration. - * @returns FALSE if there are no more code units or code points - * at or after the current position in the iteration range. - * @stable ICU 2.0 - */ - virtual UBool hasNext() = 0; - -protected: - /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/ - ForwardCharacterIterator(); - - /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/ - ForwardCharacterIterator(const ForwardCharacterIterator &other); - - /** - * Assignment operator to be overridden in the implementing class. - * @stable ICU 2.0 - */ - ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; } -}; - -/** - * Abstract class that defines an API for iteration - * on text objects. - * This is an interface for forward and backward iteration - * and random access into a text object. - * - *

The API provides backward compatibility to the Java and older ICU - * CharacterIterator classes but extends them significantly: - *

    - *
  1. CharacterIterator is now a subclass of ForwardCharacterIterator.
  2. - *
  3. While the old API functions provided forward iteration with - * "pre-increment" semantics, the new one also provides functions - * with "post-increment" semantics. They are more efficient and should - * be the preferred iterator functions for new implementations. - * The backward iteration always had "pre-decrement" semantics, which - * are efficient.
  4. - *
  5. Just like ForwardCharacterIterator, it provides access to - * both code units and code points. Code point access versions are available - * for the old and the new iteration semantics.
  6. - *
  7. There are new functions for setting and moving the current position - * without returning a character, for efficiency.
  8. - *
- * - * See ForwardCharacterIterator for examples for using the new forward iteration - * functions. For backward iteration, there is also a hasPrevious() function - * that can be used analogously to hasNext(). - * The old functions work as before and are shown below.

- * - *

Examples for some of the new functions:

- * - * Forward iteration with hasNext(): - * \code - * void forward1(CharacterIterator &it) { - * UChar32 c; - * for(it.setToStart(); it.hasNext();) { - * c=it.next32PostInc(); - * // use c - * } - * } - * \endcode - * Forward iteration more similar to loops with the old forward iteration, - * showing a way to convert simple for() loops: - * \code - * void forward2(CharacterIterator &it) { - * char16_t c; - * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { - * // use c - * } - * } - * \endcode - * Backward iteration with setToEnd() and hasPrevious(): - * \code - * void backward1(CharacterIterator &it) { - * UChar32 c; - * for(it.setToEnd(); it.hasPrevious();) { - * c=it.previous32(); - * // use c - * } - * } - * \endcode - * Backward iteration with a more traditional for() loop: - * \code - * void backward2(CharacterIterator &it) { - * char16_t c; - * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { - * // use c - * } - * } - * \endcode - * - * Example for random access: - * \code - * void random(CharacterIterator &it) { - * // set to the third code point from the beginning - * it.move32(3, CharacterIterator::kStart); - * // get a code point from here without moving the position - * UChar32 c=it.current32(); - * // get the position - * int32_t pos=it.getIndex(); - * // get the previous code unit - * char16_t u=it.previous(); - * // move back one more code unit - * it.move(-1, CharacterIterator::kCurrent); - * // set the position back to where it was - * // and read the same code point c and move beyond it - * it.setIndex(pos); - * if(c!=it.next32PostInc()) { - * exit(1); // CharacterIterator inconsistent - * } - * } - * \endcode - * - *

Examples, especially for the old API:

- * - * Function processing characters, in this example simple output - *
- * \code
- *  void processChar( char16_t c )
- *  {
- *      cout << " " << c;
- *  }
- * \endcode
- * 
- * Traverse the text from start to finish - *
- * \code
- *  void traverseForward(CharacterIterator& iter)
- *  {
- *      for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
- *          processChar(c);
- *      }
- *  }
- * \endcode
- * 
- * Traverse the text backwards, from end to start - *
- * \code
- *  void traverseBackward(CharacterIterator& iter)
- *  {
- *      for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
- *          processChar(c);
- *      }
- *  }
- * \endcode
- * 
- * Traverse both forward and backward from a given position in the text. - * Calls to notBoundary() in this example represents some additional stopping criteria. - *
- * \code
- * void traverseOut(CharacterIterator& iter, int32_t pos)
- * {
- *      char16_t c;
- *      for (c = iter.setIndex(pos);
- *      c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
- *          c = iter.next()) {}
- *      int32_t end = iter.getIndex();
- *      for (c = iter.setIndex(pos);
- *          c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
- *          c = iter.previous()) {}
- *      int32_t start = iter.getIndex() + 1;
- *
- *      cout << "start: " << start << " end: " << end << endl;
- *      for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
- *          processChar(c);
- *     }
- *  }
- * \endcode
- * 
- * Creating a StringCharacterIterator and calling the test functions - *
- * \code
- *  void CharacterIterator_Example( void )
- *   {
- *       cout << endl << "===== CharacterIterator_Example: =====" << endl;
- *       UnicodeString text("Ein kleiner Satz.");
- *       StringCharacterIterator iterator(text);
- *       cout << "----- traverseForward: -----------" << endl;
- *       traverseForward( iterator );
- *       cout << endl << endl << "----- traverseBackward: ----------" << endl;
- *       traverseBackward( iterator );
- *       cout << endl << endl << "----- traverseOut: ---------------" << endl;
- *       traverseOut( iterator, 7 );
- *       cout << endl << endl << "-----" << endl;
- *   }
- * \endcode
- * 
- * - * @stable ICU 2.0 - */ -class U_COMMON_API CharacterIterator : public ForwardCharacterIterator { -public: - /** - * Origin enumeration for the move() and move32() functions. - * @stable ICU 2.0 - */ - enum EOrigin { kStart, kCurrent, kEnd }; - - /** - * Destructor. - * @stable ICU 2.0 - */ - virtual ~CharacterIterator(); - - /** - * Returns a pointer to a new CharacterIterator of the same - * concrete class as this one, and referring to the same - * character in the same text-storage object as this one. The - * caller is responsible for deleting the new clone. - * @return a pointer to a new CharacterIterator - * @stable ICU 2.0 - */ - virtual CharacterIterator* clone(void) const = 0; - - /** - * Sets the iterator to refer to the first code unit in its - * iteration range, and returns that code unit. - * This can be used to begin an iteration with next(). - * @return the first code unit in its iteration range. - * @stable ICU 2.0 - */ - virtual char16_t first(void) = 0; - - /** - * Sets the iterator to refer to the first code unit in its - * iteration range, returns that code unit, and moves the position - * to the second code unit. This is an alternative to setToStart() - * for forward iteration with nextPostInc(). - * @return the first code unit in its iteration range. - * @stable ICU 2.0 - */ - virtual char16_t firstPostInc(void); - - /** - * Sets the iterator to refer to the first code point in its - * iteration range, and returns that code unit, - * This can be used to begin an iteration with next32(). - * Note that an iteration with next32PostInc(), beginning with, - * e.g., setToStart() or firstPostInc(), is more efficient. - * @return the first code point in its iteration range. - * @stable ICU 2.0 - */ - virtual UChar32 first32(void) = 0; - - /** - * Sets the iterator to refer to the first code point in its - * iteration range, returns that code point, and moves the position - * to the second code point. This is an alternative to setToStart() - * for forward iteration with next32PostInc(). - * @return the first code point in its iteration range. - * @stable ICU 2.0 - */ - virtual UChar32 first32PostInc(void); - - /** - * Sets the iterator to refer to the first code unit or code point in its - * iteration range. This can be used to begin a forward - * iteration with nextPostInc() or next32PostInc(). - * @return the start position of the iteration range - * @stable ICU 2.0 - */ - inline int32_t setToStart(); - - /** - * Sets the iterator to refer to the last code unit in its - * iteration range, and returns that code unit. - * This can be used to begin an iteration with previous(). - * @return the last code unit. - * @stable ICU 2.0 - */ - virtual char16_t last(void) = 0; - - /** - * Sets the iterator to refer to the last code point in its - * iteration range, and returns that code unit. - * This can be used to begin an iteration with previous32(). - * @return the last code point. - * @stable ICU 2.0 - */ - virtual UChar32 last32(void) = 0; - - /** - * Sets the iterator to the end of its iteration range, just behind - * the last code unit or code point. This can be used to begin a backward - * iteration with previous() or previous32(). - * @return the end position of the iteration range - * @stable ICU 2.0 - */ - inline int32_t setToEnd(); - - /** - * Sets the iterator to refer to the "position"-th code unit - * in the text-storage object the iterator refers to, and - * returns that code unit. - * @param position the "position"-th code unit in the text-storage object - * @return the "position"-th code unit. - * @stable ICU 2.0 - */ - virtual char16_t setIndex(int32_t position) = 0; - - /** - * Sets the iterator to refer to the beginning of the code point - * that contains the "position"-th code unit - * in the text-storage object the iterator refers to, and - * returns that code point. - * The current position is adjusted to the beginning of the code point - * (its first code unit). - * @param position the "position"-th code unit in the text-storage object - * @return the "position"-th code point. - * @stable ICU 2.0 - */ - virtual UChar32 setIndex32(int32_t position) = 0; - - /** - * Returns the code unit the iterator currently refers to. - * @return the current code unit. - * @stable ICU 2.0 - */ - virtual char16_t current(void) const = 0; - - /** - * Returns the code point the iterator currently refers to. - * @return the current code point. - * @stable ICU 2.0 - */ - virtual UChar32 current32(void) const = 0; - - /** - * Advances to the next code unit in the iteration range - * (toward endIndex()), and returns that code unit. If there are - * no more code units to return, returns DONE. - * @return the next code unit. - * @stable ICU 2.0 - */ - virtual char16_t next(void) = 0; - - /** - * Advances to the next code point in the iteration range - * (toward endIndex()), and returns that code point. If there are - * no more code points to return, returns DONE. - * Note that iteration with "pre-increment" semantics is less - * efficient than iteration with "post-increment" semantics - * that is provided by next32PostInc(). - * @return the next code point. - * @stable ICU 2.0 - */ - virtual UChar32 next32(void) = 0; - - /** - * Advances to the previous code unit in the iteration range - * (toward startIndex()), and returns that code unit. If there are - * no more code units to return, returns DONE. - * @return the previous code unit. - * @stable ICU 2.0 - */ - virtual char16_t previous(void) = 0; - - /** - * Advances to the previous code point in the iteration range - * (toward startIndex()), and returns that code point. If there are - * no more code points to return, returns DONE. - * @return the previous code point. - * @stable ICU 2.0 - */ - virtual UChar32 previous32(void) = 0; - - /** - * Returns FALSE if there are no more code units or code points - * before the current position in the iteration range. - * This is used with previous() or previous32() in backward - * iteration. - * @return FALSE if there are no more code units or code points - * before the current position in the iteration range, return TRUE otherwise. - * @stable ICU 2.0 - */ - virtual UBool hasPrevious() = 0; - - /** - * Returns the numeric index in the underlying text-storage - * object of the character returned by first(). Since it's - * possible to create an iterator that iterates across only - * part of a text-storage object, this number isn't - * necessarily 0. - * @returns the numeric index in the underlying text-storage - * object of the character returned by first(). - * @stable ICU 2.0 - */ - inline int32_t startIndex(void) const; - - /** - * Returns the numeric index in the underlying text-storage - * object of the position immediately BEYOND the character - * returned by last(). - * @return the numeric index in the underlying text-storage - * object of the position immediately BEYOND the character - * returned by last(). - * @stable ICU 2.0 - */ - inline int32_t endIndex(void) const; - - /** - * Returns the numeric index in the underlying text-storage - * object of the character the iterator currently refers to - * (i.e., the character returned by current()). - * @return the numeric index in the text-storage object of - * the character the iterator currently refers to - * @stable ICU 2.0 - */ - inline int32_t getIndex(void) const; - - /** - * Returns the length of the entire text in the underlying - * text-storage object. - * @return the length of the entire text in the text-storage object - * @stable ICU 2.0 - */ - inline int32_t getLength() const; - - /** - * Moves the current position relative to the start or end of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code units forward - * or backward by specifying a positive or negative delta. - * @param delta the position relative to origin. A positive delta means forward; - * a negative delta means backward. - * @param origin Origin enumeration {kStart, kCurrent, kEnd} - * @return the new position - * @stable ICU 2.0 - */ - virtual int32_t move(int32_t delta, EOrigin origin) = 0; - - /** - * Moves the current position relative to the start or end of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code points forward - * or backward by specifying a positive or negative delta. - * @param delta the position relative to origin. A positive delta means forward; - * a negative delta means backward. - * @param origin Origin enumeration {kStart, kCurrent, kEnd} - * @return the new position - * @stable ICU 2.0 - */ -#ifdef move32 - // One of the system headers right now is sometimes defining a conflicting macro we don't use -#undef move32 -#endif - virtual int32_t move32(int32_t delta, EOrigin origin) = 0; - - /** - * Copies the text under iteration into the UnicodeString - * referred to by "result". - * @param result Receives a copy of the text under iteration. - * @stable ICU 2.0 - */ - virtual void getText(UnicodeString& result) = 0; - -protected: - /** - * Empty constructor. - * @stable ICU 2.0 - */ - CharacterIterator(); - - /** - * Constructor, just setting the length field in this base class. - * @stable ICU 2.0 - */ - CharacterIterator(int32_t length); - - /** - * Constructor, just setting the length and position fields in this base class. - * @stable ICU 2.0 - */ - CharacterIterator(int32_t length, int32_t position); - - /** - * Constructor, just setting the length, start, end, and position fields in this base class. - * @stable ICU 2.0 - */ - CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position); - - /** - * Copy constructor. - * - * @param that The CharacterIterator to be copied - * @stable ICU 2.0 - */ - CharacterIterator(const CharacterIterator &that); - - /** - * Assignment operator. Sets this CharacterIterator to have the same behavior, - * as the one passed in. - * @param that The CharacterIterator passed in. - * @return the newly set CharacterIterator. - * @stable ICU 2.0 - */ - CharacterIterator &operator=(const CharacterIterator &that); - - /** - * Base class text length field. - * Necessary this for correct getText() and hashCode(). - * @stable ICU 2.0 - */ - int32_t textLength; - - /** - * Base class field for the current position. - * @stable ICU 2.0 - */ - int32_t pos; - - /** - * Base class field for the start of the iteration range. - * @stable ICU 2.0 - */ - int32_t begin; - - /** - * Base class field for the end of the iteration range. - * @stable ICU 2.0 - */ - int32_t end; -}; - -inline UBool -ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const { - return !operator==(that); -} - -inline int32_t -CharacterIterator::setToStart() { - return move(0, kStart); -} - -inline int32_t -CharacterIterator::setToEnd() { - return move(0, kEnd); -} - -inline int32_t -CharacterIterator::startIndex(void) const { - return begin; -} - -inline int32_t -CharacterIterator::endIndex(void) const { - return end; -} - -inline int32_t -CharacterIterator::getIndex(void) const { - return pos; -} - -inline int32_t -CharacterIterator::getLength(void) const { - return textLength; -} - -U_NAMESPACE_END -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/dbbi.h b/deps/node/deps/icu-small/source/common/unicode/dbbi.h deleted file mode 100644 index 62509c52..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/dbbi.h +++ /dev/null @@ -1,42 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2006,2013 IBM Corp. All rights reserved. -********************************************************************** -* Date Name Description -* 12/1/99 rgillam Complete port from Java. -* 01/13/2000 helena Added UErrorCode to ctors. -********************************************************************** -*/ - -#ifndef DBBI_H -#define DBBI_H - -#include "unicode/rbbi.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -/** - * \file - * \brief C++ API: Dictionary Based Break Iterator - */ - -U_NAMESPACE_BEGIN - -#ifndef U_HIDE_DEPRECATED_API -/** - * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary- - * based break iteration has been folded into the base class. This class - * is deprecated as of ICU 3.6. - * @deprecated ICU 3.6 - */ -typedef RuleBasedBreakIterator DictionaryBasedBreakIterator; - -#endif /* U_HIDE_DEPRECATED_API */ - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/docmain.h b/deps/node/deps/icu-small/source/common/unicode/docmain.h deleted file mode 100644 index 243fa17b..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/docmain.h +++ /dev/null @@ -1,227 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 1997-2012, International Business Machines Corporation and - * others. All Rights Reserved. - * - * FILE NAME: DOCMAIN.h - * - * Date Name Description - * 12/11/2000 Ram Creation. - */ - -/** - * \file - * \brief (Non API- contains Doxygen definitions) - * - * This file contains documentation for Doxygen and doesnot have - * any significance with respect to C or C++ API - */ - -/*! \mainpage - * - * \section API API Reference Usage - * - *

C++ Programmers:

- *

Use Class Hierarchy or Alphabetical List - * or Compound List - * to find the class you are interested in. For example, to find BreakIterator, - * you can go to the Alphabetical List, then click on - * "BreakIterator". Once you are at the class, you will find an inheritance - * chart, a list of the public members, a detailed description of the class, - * then detailed member descriptions.

- * - *

C Programmers:

- *

Use Module List or File Members - * to find a list of all the functions and constants. - * For example, to find BreakIterator functions you would click on - * File List, - * then find "ubrk.h" and click on it. You will find descriptions of Defines, - * Typedefs, Enumerations, and Functions, with detailed descriptions below. - * If you want to find a specific function, such as ubrk_next(), then click - * first on File Members, then use your browser - * Find dialog to search for "ubrk_next()".

- * - * - *

API References for Previous Releases

- *

The API References for each release of ICU are also available as - * a zip file from the ICU - * download page.

- * - *
- * - *

Architecture (User's Guide)

- * - * - *
- *\htmlonly

Module List

\endhtmlonly
Module NameCC++
Basic Types and Constantsutypes.hutypes.h
Strings and Character Iterationustring.h, utf8.h, utf16.h, UText, UCharIteratoricu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink
Unicode Character
Properties and Names
uchar.h, uscript.hC API
Sets of Unicode Code Points and Stringsuset.hicu::UnicodeSet
Maps from Unicode Code Points to Integer Valuesucptrie.h, umutablecptrie.hC API
Maps from Strings to Integer Values(no C API)icu::BytesTrie, icu::UCharsTrie
Codepage Conversionucnv.h, ucnvsel.hbC API
Codepage Detectionucsdet.hC API
Unicode Text Compressionucnv.h
(encoding name "SCSU" or "BOCU-1")
C API
Locales uloc.hicu::Locale
Resource Bundlesures.hicu::ResourceBundle
Normalizationunorm2.hicu::Normalizer2
Calendarsucal.hicu::Calendar
Date and Time Formattingudat.hicu::DateFormat
Message Formattingumsg.hicu::MessageFormat
Number Formattingunumberformatter.h, unum.hicu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)
Number Spellout
(Rule Based Number Formatting)
unum.h
(use UNUM_SPELLOUT)
icu::RuleBasedNumberFormat
Text Transformation
(Transliteration)
utrans.hicu::Transliterator
Bidirectional Algorithmubidi.h, ubiditransform.hC API
Arabic Shapingushape.hC API
Collationucol.hicu::Collator
String Searchingusearch.hicu::StringSearch
Index Characters/
Bucketing for Sorted Lists
(no C API)icu::AlphabeticIndex
Text Boundary Analysis
(Break Iteration)
ubrk.hicu::BreakIterator
Regular Expressionsuregex.hicu::RegexPattern, icu::RegexMatcher
StringPrepusprep.hC API
International Domain Names in Applications:
- * UTS #46 in C/C++, IDNA2003 only via C API
uidna.hidna.h
Identifier Spoofing & Confusabilityuspoof.hC API
Universal Time Scaleutmscale.hC API
Paragraph Layout / Complex Text Layoutplayout.hicu::ParagraphLayout
ICU I/Oustdio.hustream.h
- * This main page is generated from docmain.h - */ diff --git a/deps/node/deps/icu-small/source/common/unicode/dtintrv.h b/deps/node/deps/icu-small/source/common/unicode/dtintrv.h deleted file mode 100644 index c99011e2..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/dtintrv.h +++ /dev/null @@ -1,160 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2008-2009, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -* -* File DTINTRV.H -* -******************************************************************************* -*/ - -#ifndef __DTINTRV_H__ -#define __DTINTRV_H__ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" - -/** - * \file - * \brief C++ API: Date Interval data type - */ - - -U_NAMESPACE_BEGIN - - -/** - * This class represents a date interval. - * It is a pair of UDate representing from UDate 1 to UDate 2. - * @stable ICU 4.0 -**/ -class U_COMMON_API DateInterval : public UObject { -public: - - /** - * Construct a DateInterval given a from date and a to date. - * @param fromDate The from date in date interval. - * @param toDate The to date in date interval. - * @stable ICU 4.0 - */ - DateInterval(UDate fromDate, UDate toDate); - - /** - * destructor - * @stable ICU 4.0 - */ - virtual ~DateInterval(); - - /** - * Get the from date. - * @return the from date in dateInterval. - * @stable ICU 4.0 - */ - UDate getFromDate() const; - - /** - * Get the to date. - * @return the to date in dateInterval. - * @stable ICU 4.0 - */ - UDate getToDate() const; - - - /** - * Return the class ID for this class. This is useful only for comparing to - * a return value from getDynamicClassID(). For example: - *
-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .       derived::getStaticClassID()) ...
-     * 
- * @return The class ID for all objects of this class. - * @stable ICU 4.0 - */ - static UClassID U_EXPORT2 getStaticClassID(void); - - /** - * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This - * method is to implement a simple version of RTTI, since not all C++ - * compilers support genuine RTTI. Polymorphic operator==() and clone() - * methods call this method. - * - * @return The class ID for this object. All objects of a - * given class have the same class ID. Objects of - * other classes have different class IDs. - * @stable ICU 4.0 - */ - virtual UClassID getDynamicClassID(void) const; - - - /** - * Copy constructor. - * @stable ICU 4.0 - */ - DateInterval(const DateInterval& other); - - /** - * Default assignment operator - * @stable ICU 4.0 - */ - DateInterval& operator=(const DateInterval&); - - /** - * Equality operator. - * @return TRUE if the two DateIntervals are the same - * @stable ICU 4.0 - */ - virtual UBool operator==(const DateInterval& other) const; - - /** - * Non-equality operator - * @return TRUE if the two DateIntervals are not the same - * @stable ICU 4.0 - */ - UBool operator!=(const DateInterval& other) const; - - - /** - * clone this object. - * The caller owns the result and should delete it when done. - * @return a cloned DateInterval - * @stable ICU 4.0 - */ - virtual DateInterval* clone() const; - -private: - /** - * Default constructor, not implemented. - */ - DateInterval(); - - UDate fromDate; - UDate toDate; - -} ;// end class DateInterval - - -inline UDate -DateInterval::getFromDate() const { - return fromDate; -} - - -inline UDate -DateInterval::getToDate() const { - return toDate; -} - - -inline UBool -DateInterval::operator!=(const DateInterval& other) const { - return ( !operator==(other) ); -} - - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/edits.h b/deps/node/deps/icu-small/source/common/unicode/edits.h deleted file mode 100644 index 79e98b0c..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/edits.h +++ /dev/null @@ -1,526 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// edits.h -// created: 2016dec30 Markus W. Scherer - -#ifndef __EDITS_H__ -#define __EDITS_H__ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" - -/** - * \file - * \brief C++ API: C++ class Edits for low-level string transformations on styled text. - */ - -U_NAMESPACE_BEGIN - -class UnicodeString; - -/** - * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions - * in linear progression. Does not support moving/reordering of text. - * - * There are two types of edits: change edits and no-change edits. Add edits to - * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and - * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity, - * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one - * mapping between code points in the source and destination strings. - * - * After all edits have been added, instances of this class should be considered immutable, and an - * {@link Edits::Iterator} can be used for queries. - * - * There are four flavors of Edits::Iterator: - * - *
    - *
  • {@link #getFineIterator()} retains full granularity of change edits. - *
  • {@link #getFineChangesIterator()} retains full granularity of change edits, and when calling - * next() on the iterator, skips over no-change edits (unchanged regions). - *
  • {@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change - * edits are automatically merged during the construction phase.) - *
  • {@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when - * calling next() on the iterator, skips over no-change edits (unchanged regions). - *
- * - * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the - * following fine edits: - *
    - *
  • abc ⇨ abc (no-change) - *
  • ß ⇨ ss (change) - *
  • D ⇨ d (change) - *
  • e ⇨ e (no-change) - *
  • F ⇨ f (change) - *
- * and the following coarse edits (note how adjacent change edits get merged together): - *
    - *
  • abc ⇨ abc (no-change) - *
  • ßD ⇨ ssd (change) - *
  • e ⇨ e (no-change) - *
  • F ⇨ f (change) - *
- * - * The "fine changes" and "coarse changes" iterators will step through only the change edits when their - * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when - * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()` - * methods are used to walk through the string. - * - * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in - * UCharacterCaseTest.java. - * - * An Edits object tracks a separate UErrorCode, but ICU string transformation functions - * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. - * - * @stable ICU 59 - */ -class U_COMMON_API Edits U_FINAL : public UMemory { -public: - /** - * Constructs an empty object. - * @stable ICU 59 - */ - Edits() : - array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), - errorCode_(U_ZERO_ERROR) {} - /** - * Copy constructor. - * @param other source edits - * @stable ICU 60 - */ - Edits(const Edits &other) : - array(stackArray), capacity(STACK_CAPACITY), length(other.length), - delta(other.delta), numChanges(other.numChanges), - errorCode_(other.errorCode_) { - copyArray(other); - } - /** - * Move constructor, might leave src empty. - * This object will have the same contents that the source object had. - * @param src source edits - * @stable ICU 60 - */ - Edits(Edits &&src) U_NOEXCEPT : - array(stackArray), capacity(STACK_CAPACITY), length(src.length), - delta(src.delta), numChanges(src.numChanges), - errorCode_(src.errorCode_) { - moveArray(src); - } - - /** - * Destructor. - * @stable ICU 59 - */ - ~Edits(); - - /** - * Assignment operator. - * @param other source edits - * @return *this - * @stable ICU 60 - */ - Edits &operator=(const Edits &other); - - /** - * Move assignment operator, might leave src empty. - * This object will have the same contents that the source object had. - * The behavior is undefined if *this and src are the same object. - * @param src source edits - * @return *this - * @stable ICU 60 - */ - Edits &operator=(Edits &&src) U_NOEXCEPT; - - /** - * Resets the data but may not release memory. - * @stable ICU 59 - */ - void reset() U_NOEXCEPT; - - /** - * Adds a no-change edit: a record for an unchanged segment of text. - * Normally called from inside ICU string transformation functions, not user code. - * @stable ICU 59 - */ - void addUnchanged(int32_t unchangedLength); - /** - * Adds a change edit: a record for a text replacement/insertion/deletion. - * Normally called from inside ICU string transformation functions, not user code. - * @stable ICU 59 - */ - void addReplace(int32_t oldLength, int32_t newLength); - /** - * Sets the UErrorCode if an error occurred while recording edits. - * Preserves older error codes in the outErrorCode. - * Normally called from inside ICU string transformation functions, not user code. - * @param outErrorCode Set to an error code if it does not contain one already - * and an error occurred while recording edits. - * Otherwise unchanged. - * @return TRUE if U_FAILURE(outErrorCode) - * @stable ICU 59 - */ - UBool copyErrorTo(UErrorCode &outErrorCode); - - /** - * How much longer is the new text compared with the old text? - * @return new length minus old length - * @stable ICU 59 - */ - int32_t lengthDelta() const { return delta; } - /** - * @return TRUE if there are any change edits - * @stable ICU 59 - */ - UBool hasChanges() const { return numChanges != 0; } - - /** - * @return the number of change edits - * @stable ICU 60 - */ - int32_t numberOfChanges() const { return numChanges; } - - /** - * Access to the list of edits. - * - * At any moment in time, an instance of this class points to a single edit: a "window" into a span - * of the source string and the corresponding span of the destination string. The source string span - * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string - * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars. - * - * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`, - * and `findDestinationIndex(int32_t, UErrorCode &)` methods. - * Calling any of these methods mutates the iterator to make it point to the corresponding edit. - * - * For more information, see the documentation for {@link Edits}. - * - * @see getCoarseIterator - * @see getFineIterator - * @stable ICU 59 - */ - struct U_COMMON_API Iterator U_FINAL : public UMemory { - /** - * Default constructor, empty iterator. - * @stable ICU 60 - */ - Iterator() : - array(nullptr), index(0), length(0), - remaining(0), onlyChanges_(FALSE), coarse(FALSE), - dir(0), changed(FALSE), oldLength_(0), newLength_(0), - srcIndex(0), replIndex(0), destIndex(0) {} - /** - * Copy constructor. - * @stable ICU 59 - */ - Iterator(const Iterator &other) = default; - /** - * Assignment operator. - * @stable ICU 59 - */ - Iterator &operator=(const Iterator &other) = default; - - /** - * Advances the iterator to the next edit. - * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, - * or else the function returns immediately. Check for U_FAILURE() - * on output or use with function chaining. (See User Guide for details.) - * @return TRUE if there is another edit - * @stable ICU 59 - */ - UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } - - /** - * Moves the iterator to the edit that contains the source index. - * The source index may be found in a no-change edit - * even if normal iteration would skip no-change edits. - * Normal iteration can continue from a found edit. - * - * The iterator state before this search logically does not matter. - * (It may affect the performance of the search.) - * - * The iterator state after this search is undefined - * if the source index is out of bounds for the source string. - * - * @param i source index - * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, - * or else the function returns immediately. Check for U_FAILURE() - * on output or use with function chaining. (See User Guide for details.) - * @return TRUE if the edit for the source index was found - * @stable ICU 59 - */ - UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { - return findIndex(i, TRUE, errorCode) == 0; - } - - /** - * Moves the iterator to the edit that contains the destination index. - * The destination index may be found in a no-change edit - * even if normal iteration would skip no-change edits. - * Normal iteration can continue from a found edit. - * - * The iterator state before this search logically does not matter. - * (It may affect the performance of the search.) - * - * The iterator state after this search is undefined - * if the source index is out of bounds for the source string. - * - * @param i destination index - * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, - * or else the function returns immediately. Check for U_FAILURE() - * on output or use with function chaining. (See User Guide for details.) - * @return TRUE if the edit for the destination index was found - * @stable ICU 60 - */ - UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) { - return findIndex(i, FALSE, errorCode) == 0; - } - - /** - * Computes the destination index corresponding to the given source index. - * If the source index is inside a change edit (not at its start), - * then the destination index at the end of that edit is returned, - * since there is no information about index mapping inside a change edit. - * - * (This means that indexes to the start and middle of an edit, - * for example around a grapheme cluster, are mapped to indexes - * encompassing the entire edit. - * The alternative, mapping an interior index to the start, - * would map such an interval to an empty one.) - * - * This operation will usually but not always modify this object. - * The iterator state after this search is undefined. - * - * @param i source index - * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, - * or else the function returns immediately. Check for U_FAILURE() - * on output or use with function chaining. (See User Guide for details.) - * @return destination index; undefined if i is not 0..string length - * @stable ICU 60 - */ - int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode); - - /** - * Computes the source index corresponding to the given destination index. - * If the destination index is inside a change edit (not at its start), - * then the source index at the end of that edit is returned, - * since there is no information about index mapping inside a change edit. - * - * (This means that indexes to the start and middle of an edit, - * for example around a grapheme cluster, are mapped to indexes - * encompassing the entire edit. - * The alternative, mapping an interior index to the start, - * would map such an interval to an empty one.) - * - * This operation will usually but not always modify this object. - * The iterator state after this search is undefined. - * - * @param i destination index - * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, - * or else the function returns immediately. Check for U_FAILURE() - * on output or use with function chaining. (See User Guide for details.) - * @return source index; undefined if i is not 0..string length - * @stable ICU 60 - */ - int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); - - /** - * Returns whether the edit currently represented by the iterator is a change edit. - * - * @return TRUE if this edit replaces oldLength() units with newLength() different ones. - * FALSE if oldLength units remain unchanged. - * @stable ICU 59 - */ - UBool hasChange() const { return changed; } - - /** - * The length of the current span in the source string, which starts at {@link #sourceIndex}. - * - * @return the number of units in the original string which are replaced or remain unchanged. - * @stable ICU 59 - */ - int32_t oldLength() const { return oldLength_; } - - /** - * The length of the current span in the destination string, which starts at - * {@link #destinationIndex}, or in the replacement string, which starts at - * {@link #replacementIndex}. - * - * @return the number of units in the modified string, if hasChange() is TRUE. - * Same as oldLength if hasChange() is FALSE. - * @stable ICU 59 - */ - int32_t newLength() const { return newLength_; } - - /** - * The start index of the current span in the source string; the span has length - * {@link #oldLength}. - * - * @return the current index into the source string - * @stable ICU 59 - */ - int32_t sourceIndex() const { return srcIndex; } - - /** - * The start index of the current span in the replacement string; the span has length - * {@link #newLength}. Well-defined only if the current edit is a change edit. - * - * The *replacement string* is the concatenation of all substrings of the destination - * string corresponding to change edits. - * - * This method is intended to be used together with operations that write only replacement - * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option). - * The source string can then be modified in-place. - * - * @return the current index into the replacement-characters-only string, - * not counting unchanged spans - * @stable ICU 59 - */ - int32_t replacementIndex() const { - // TODO: Throw an exception if we aren't in a change edit? - return replIndex; - } - - /** - * The start index of the current span in the destination string; the span has length - * {@link #newLength}. - * - * @return the current index into the full destination string - * @stable ICU 59 - */ - int32_t destinationIndex() const { return destIndex; } - -#ifndef U_HIDE_INTERNAL_API - /** - * A string representation of the current edit represented by the iterator for debugging. You - * should not depend on the contents of the return string. - * @internal - */ - UnicodeString& toString(UnicodeString& appendTo) const; -#endif // U_HIDE_INTERNAL_API - - private: - friend class Edits; - - Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); - - int32_t readLength(int32_t head); - void updateNextIndexes(); - void updatePreviousIndexes(); - UBool noNext(); - UBool next(UBool onlyChanges, UErrorCode &errorCode); - UBool previous(UErrorCode &errorCode); - /** @return -1: error or i<0; 0: found; 1: i>=string length */ - int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode); - - const uint16_t *array; - int32_t index, length; - // 0 if we are not within compressed equal-length changes. - // Otherwise the number of remaining changes, including the current one. - int32_t remaining; - UBool onlyChanges_, coarse; - - int8_t dir; // iteration direction: back(<0), initial(0), forward(>0) - UBool changed; - int32_t oldLength_, newLength_; - int32_t srcIndex, replIndex, destIndex; - }; - - /** - * Returns an Iterator for coarse-grained change edits - * (adjacent change edits are treated as one). - * Can be used to perform simple string updates. - * Skips no-change edits. - * @return an Iterator that merges adjacent changes. - * @stable ICU 59 - */ - Iterator getCoarseChangesIterator() const { - return Iterator(array, length, TRUE, TRUE); - } - - /** - * Returns an Iterator for coarse-grained change and no-change edits - * (adjacent change edits are treated as one). - * Can be used to perform simple string updates. - * Adjacent change edits are treated as one edit. - * @return an Iterator that merges adjacent changes. - * @stable ICU 59 - */ - Iterator getCoarseIterator() const { - return Iterator(array, length, FALSE, TRUE); - } - - /** - * Returns an Iterator for fine-grained change edits - * (full granularity of change edits is retained). - * Can be used for modifying styled text. - * Skips no-change edits. - * @return an Iterator that separates adjacent changes. - * @stable ICU 59 - */ - Iterator getFineChangesIterator() const { - return Iterator(array, length, TRUE, FALSE); - } - - /** - * Returns an Iterator for fine-grained change and no-change edits - * (full granularity of change edits is retained). - * Can be used for modifying styled text. - * @return an Iterator that separates adjacent changes. - * @stable ICU 59 - */ - Iterator getFineIterator() const { - return Iterator(array, length, FALSE, FALSE); - } - - /** - * Merges the two input Edits and appends the result to this object. - * - * Consider two string transformations (for example, normalization and case mapping) - * where each records Edits in addition to writing an output string.
- * Edits ab reflect how substrings of input string a - * map to substrings of intermediate string b.
- * Edits bc reflect how substrings of intermediate string b - * map to substrings of output string c.
- * This function merges ab and bc such that the additional edits - * recorded in this object reflect how substrings of input string a - * map to substrings of output string c. - * - * If unrelated Edits are passed in where the output string of the first - * has a different length than the input string of the second, - * then a U_ILLEGAL_ARGUMENT_ERROR is reported. - * - * @param ab reflects how substrings of input string a - * map to substrings of intermediate string b. - * @param bc reflects how substrings of intermediate string b - * map to substrings of output string c. - * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, - * or else the function returns immediately. Check for U_FAILURE() - * on output or use with function chaining. (See User Guide for details.) - * @return *this, with the merged edits appended - * @stable ICU 60 - */ - Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); - -private: - void releaseArray() U_NOEXCEPT; - Edits ©Array(const Edits &other); - Edits &moveArray(Edits &src) U_NOEXCEPT; - - void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } - int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } - - void append(int32_t r); - UBool growArray(); - - static const int32_t STACK_CAPACITY = 100; - uint16_t *array; - int32_t capacity; - int32_t length; - int32_t delta; - int32_t numChanges; - UErrorCode errorCode_; - uint16_t stackArray[STACK_CAPACITY]; -}; - -U_NAMESPACE_END - -#endif // __EDITS_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/enumset.h b/deps/node/deps/icu-small/source/common/unicode/enumset.h deleted file mode 100644 index 82d633ed..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/enumset.h +++ /dev/null @@ -1,69 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2012,2014 International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -*/ - -/** - * \file - * \brief C++: internal template EnumSet<> - */ - -#ifndef ENUMSET_H -#define ENUMSET_H - -#include "unicode/utypes.h" - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/* Can't use #ifndef U_HIDE_INTERNAL_API for the entire EnumSet class, needed in .h file declarations */ -/** - * enum bitset for boolean fields. Similar to Java EnumSet<>. - * Needs to range check. Used for private instance variables. - * @internal - * \cond - */ -template -class EnumSet { -public: - inline EnumSet() : fBools(0) {} - inline EnumSet(const EnumSet& other) : fBools(other.fBools) {} - inline ~EnumSet() {} -#ifndef U_HIDE_INTERNAL_API - inline void clear() { fBools=0; } - inline void add(T toAdd) { set(toAdd, 1); } - inline void remove(T toRemove) { set(toRemove, 0); } - inline int32_t contains(T toCheck) const { return get(toCheck); } - inline void set(T toSet, int32_t v) { fBools=(fBools&(~flag(toSet)))|(v?(flag(toSet)):0); } - inline int32_t get(T toCheck) const { return (fBools & flag(toCheck))?1:0; } - inline UBool isValidEnum(T toCheck) const { return (toCheck>=minValue&&toCheck& operator=(const EnumSet& other) { - fBools = other.fBools; - return *this; - } - - inline uint32_t getAll() const { - return fBools; - } -#endif /* U_HIDE_INTERNAL_API */ - -private: - inline uint32_t flag(T toCheck) const { return (1<<(toCheck-minValue)); } -private: - uint32_t fBools; -}; - -/** \endcond */ - -U_NAMESPACE_END - -#endif /* U_SHOW_CPLUSPLUS_API */ -#endif /* ENUMSET_H */ diff --git a/deps/node/deps/icu-small/source/common/unicode/errorcode.h b/deps/node/deps/icu-small/source/common/unicode/errorcode.h deleted file mode 100644 index 1e5df8f0..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/errorcode.h +++ /dev/null @@ -1,139 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: errorcode.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009mar10 -* created by: Markus W. Scherer -*/ - -#ifndef __ERRORCODE_H__ -#define __ERRORCODE_H__ - -/** - * \file - * \brief C++ API: ErrorCode class intended to make it easier to use - * ICU C and C++ APIs from C++ user code. - */ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" - -U_NAMESPACE_BEGIN - -/** - * Wrapper class for UErrorCode, with conversion operators for direct use - * in ICU C and C++ APIs. - * Intended to be used as a base class, where a subclass overrides - * the handleFailure() function so that it throws an exception, - * does an assert(), logs an error, etc. - * This is not an abstract base class. This class can be used and instantiated - * by itself, although it will be more useful when subclassed. - * - * Features: - * - The constructor initializes the internal UErrorCode to U_ZERO_ERROR, - * removing one common source of errors. - * - Same use in C APIs taking a UErrorCode * (pointer) - * and C++ taking UErrorCode & (reference) via conversion operators. - * - Possible automatic checking for success when it goes out of scope. - * - * Note: For automatic checking for success in the destructor, a subclass - * must implement such logic in its own destructor because the base class - * destructor cannot call a subclass function (like handleFailure()). - * The ErrorCode base class destructor does nothing. - * - * Note also: While it is possible for a destructor to throw an exception, - * it is generally unsafe to do so. This means that in a subclass the destructor - * and the handleFailure() function may need to take different actions. - * - * Sample code: - * \code - * class IcuErrorCode: public icu::ErrorCode { - * public: - * virtual ~IcuErrorCode() { // should be defined in .cpp as "key function" - * // Safe because our handleFailure() does not throw exceptions. - * if(isFailure()) { handleFailure(); } - * } - * protected: - * virtual void handleFailure() const { - * log_failure(u_errorName(errorCode)); - * exit(errorCode); - * } - * }; - * IcuErrorCode error_code; - * UConverter *cnv = ucnv_open("Shift-JIS", error_code); - * length = ucnv_fromUChars(dest, capacity, src, length, error_code); - * ucnv_close(cnv); - * // IcuErrorCode destructor checks for success. - * \endcode - * - * @stable ICU 4.2 - */ -class U_COMMON_API ErrorCode: public UMemory { -public: - /** - * Default constructor. Initializes its UErrorCode to U_ZERO_ERROR. - * @stable ICU 4.2 - */ - ErrorCode() : errorCode(U_ZERO_ERROR) {} - /** Destructor, does nothing. See class documentation for details. @stable ICU 4.2 */ - virtual ~ErrorCode(); - /** Conversion operator, returns a reference. @stable ICU 4.2 */ - operator UErrorCode & () { return errorCode; } - /** Conversion operator, returns a pointer. @stable ICU 4.2 */ - operator UErrorCode * () { return &errorCode; } - /** Tests for U_SUCCESS(). @stable ICU 4.2 */ - UBool isSuccess() const { return U_SUCCESS(errorCode); } - /** Tests for U_FAILURE(). @stable ICU 4.2 */ - UBool isFailure() const { return U_FAILURE(errorCode); } - /** Returns the UErrorCode value. @stable ICU 4.2 */ - UErrorCode get() const { return errorCode; } - /** Sets the UErrorCode value. @stable ICU 4.2 */ - void set(UErrorCode value) { errorCode=value; } - /** Returns the UErrorCode value and resets it to U_ZERO_ERROR. @stable ICU 4.2 */ - UErrorCode reset(); - /** - * Asserts isSuccess(). - * In other words, this method checks for a failure code, - * and the base class handles it like this: - * \code - * if(isFailure()) { handleFailure(); } - * \endcode - * @stable ICU 4.4 - */ - void assertSuccess() const; - /** - * Return a string for the UErrorCode value. - * The string will be the same as the name of the error code constant - * in the UErrorCode enum. - * @stable ICU 4.4 - */ - const char* errorName() const; - -protected: - /** - * Internal UErrorCode, accessible to subclasses. - * @stable ICU 4.2 - */ - UErrorCode errorCode; - /** - * Called by assertSuccess() if isFailure() is true. - * A subclass should override this function to deal with a failure code: - * Throw an exception, log an error, terminate the program, or similar. - * @stable ICU 4.2 - */ - virtual void handleFailure() const {} -}; - -U_NAMESPACE_END - -#endif // __ERRORCODE_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/node/deps/icu-small/source/common/unicode/filteredbrk.h deleted file mode 100644 index 2444114e..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/filteredbrk.h +++ /dev/null @@ -1,145 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 1997-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -*/ - -#ifndef FILTEREDBRK_H -#define FILTEREDBRK_H - -#include "unicode/utypes.h" -#include "unicode/brkiter.h" - -#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION - -U_NAMESPACE_BEGIN - -/** - * \file - * \brief C++ API: FilteredBreakIteratorBuilder - */ - -/** - * The BreakIteratorFilter is used to modify the behavior of a BreakIterator - * by constructing a new BreakIterator which suppresses certain segment boundaries. - * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . - * For example, a typical English Sentence Break Iterator would break on the space - * in the string "Mr. Smith" (resulting in two segments), - * but with "Mr." as an exception, a filtered break iterator - * would consider the string "Mr. Smith" to be a single segment. - * - * @stable ICU 56 - */ -class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { - public: - /** - * destructor. - * @stable ICU 56 - */ - virtual ~FilteredBreakIteratorBuilder(); - - /** - * Construct a FilteredBreakIteratorBuilder based on rules in a locale. - * The rules are taken from CLDR exception data for the locale, - * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions - * This is the equivalent of calling createInstance(UErrorCode&) - * and then repeatedly calling addNoBreakAfter(...) with the contents - * of the CLDR exception data. - * @param where the locale. - * @param status The error code. - * @return the new builder - * @stable ICU 56 - */ - static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); - -#ifndef U_HIDE_DEPRECATED_API - /** - * This function has been deprecated in favor of createEmptyInstance, which has - * identical behavior. - * @param status The error code. - * @return the new builder - * @deprecated ICU 60 use createEmptyInstance instead - * @see createEmptyInstance() - */ - static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); -#endif /* U_HIDE_DEPRECATED_API */ - - /** - * Construct an empty FilteredBreakIteratorBuilder. - * In this state, it will not suppress any segment boundaries. - * @param status The error code. - * @return the new builder - * @stable ICU 60 - */ - static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status); - - /** - * Suppress a certain string from being the end of a segment. - * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned - * by the iterator. - * @param string the string to suppress, such as "Mr." - * @param status error code - * @return returns TRUE if the string was not present and now added, - * FALSE if the call was a no-op because the string was already being suppressed. - * @stable ICU 56 - */ - virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; - - /** - * Stop suppressing a certain string from being the end of the segment. - * This function does not create any new segment boundaries, but only serves to un-do - * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of - * locale data which may be suppressing certain strings. - * @param string the exception to remove - * @param status error code - * @return returns TRUE if the string was present and now removed, - * FALSE if the call was a no-op because the string was not being suppressed. - * @stable ICU 56 - */ - virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; - - /** - * This function has been deprecated in favor of wrapIteratorWithFilter() - * The behavior is identical. - * @param adoptBreakIterator the break iterator to adopt - * @param status error code - * @return the new BreakIterator, owned by the caller. - * @deprecated ICU 60 use wrapIteratorWithFilter() instead - * @see wrapBreakIteratorWithFilter() - */ - virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; - - /** - * Wrap (adopt) an existing break iterator in a new filtered instance. - * The resulting BreakIterator is owned by the caller. - * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. - * Note that the adoptBreakIterator is adopted by the new BreakIterator - * and should no longer be used by the caller. - * The FilteredBreakIteratorBuilder may be reused. - * This function is an alias for build() - * @param adoptBreakIterator the break iterator to adopt - * @param status error code - * @return the new BreakIterator, owned by the caller. - * @stable ICU 60 - */ - inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) { - return build(adoptBreakIterator, status); - } - - protected: - /** - * For subclass use - * @stable ICU 56 - */ - FilteredBreakIteratorBuilder(); -}; - - -U_NAMESPACE_END - -#endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION - -#endif // #ifndef FILTEREDBRK_H diff --git a/deps/node/deps/icu-small/source/common/unicode/icudataver.h b/deps/node/deps/icu-small/source/common/unicode/icudataver.h deleted file mode 100644 index d5c728da..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/icudataver.h +++ /dev/null @@ -1,43 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2009-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -*/ - - -/** - * \file - * \brief C API: access to ICU Data Version number - */ - -#ifndef __ICU_DATA_VER_H__ -#define __ICU_DATA_VER_H__ - -#include "unicode/utypes.h" - -/** - * @stable ICU 49 - */ -#define U_ICU_VERSION_BUNDLE "icuver" - -/** - * @stable ICU 49 - */ -#define U_ICU_DATA_KEY "DataVersion" - -/** - * Retrieves the data version from icuver and stores it in dataVersionFillin. - * - * @param dataVersionFillin icuver data version information to be filled in if not-null - * @param status stores the error code from the calls to resource bundle - * - * @stable ICU 49 - */ -U_STABLE void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status); - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/icuplug.h b/deps/node/deps/icu-small/source/common/unicode/icuplug.h deleted file mode 100644 index 827cbe94..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/icuplug.h +++ /dev/null @@ -1,387 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2009-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : icuplug.h -* -* Date Name Description -* 10/29/2009 sl New. -****************************************************************************** -*/ - -/** - * \file - * \brief C API: ICU Plugin API - * - *

C API: ICU Plugin API

- * - *

C API allowing run-time loadable modules that extend or modify ICU functionality.

- * - *

Loading and Configuration

- * - *

At ICU startup time, the environment variable "ICU_PLUGINS" will be - * queried for a directory name. If it is not set, the preprocessor symbol - * "DEFAULT_ICU_PLUGINS" will be checked for a default value.

- * - *

Within the above-named directory, the file "icuplugins##.txt" will be - * opened, if present, where ## is the major+minor number of the currently - * running ICU (such as, 44 for ICU 4.4, thus icuplugins44.txt)

- * - *

The configuration file has this format:

- * - *
    - *
  • Hash (#) begins a comment line
  • - * - *
  • Non-comment lines have two or three components: - * LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]
  • - * - *
  • Tabs or spaces separate the three items.
  • - * - *
  • LIBRARYNAME is the name of a shared library, either a short name if - * it is on the loader path, or a full pathname.
  • - * - *
  • ENTRYPOINT is the short (undecorated) symbol name of the plugin's - * entrypoint, as above.
  • - * - *
  • CONFIGURATION is the entire rest of the line . It's passed as-is to - * the plugin.
  • - *
- * - *

An example configuration file is, in its entirety:

- * - * \code - * # this is icuplugins44.txt - * testplug.dll myPlugin hello=world - * \endcode - *

Plugins are categorized as "high" or "low" level. Low level are those - * which must be run BEFORE high level plugins, and before any operations - * which cause ICU to be 'initialized'. If a plugin is low level but - * causes ICU to allocate memory or become initialized, that plugin is said - * to cause a 'level change'.

- * - *

At load time, ICU first queries all plugins to determine their level, - * then loads all 'low' plugins first, and then loads all 'high' plugins. - * Plugins are otherwise loaded in the order listed in the configuration file.

- * - *

Implementing a Plugin

- * \code - * U_CAPI UPlugTokenReturn U_EXPORT2 - * myPlugin (UPlugData *plug, UPlugReason reason, UErrorCode *status) { - * if(reason==UPLUG_REASON_QUERY) { - * uplug_setPlugName(plug, "Simple Plugin"); - * uplug_setPlugLevel(plug, UPLUG_LEVEL_HIGH); - * } else if(reason==UPLUG_REASON_LOAD) { - * ... Set up some ICU things here.... - * } else if(reason==UPLUG_REASON_UNLOAD) { - * ... unload, clean up ... - * } - * return UPLUG_TOKEN; - * } - * \endcode - * - *

The UPlugData* is an opaque pointer to the plugin-specific data, and is - * used in all other API calls.

- * - *

The API contract is:

- *
  1. The plugin MUST always return UPLUG_TOKEN as a return value- to - * indicate that it is a valid plugin.
  2. - * - *
  3. When the 'reason' parameter is set to UPLUG_REASON_QUERY, the - * plugin MUST call uplug_setPlugLevel() to indicate whether it is a high - * level or low level plugin.
  4. - * - *
  5. When the 'reason' parameter is UPLUG_REASON_QUERY, the plugin - * SHOULD call uplug_setPlugName to indicate a human readable plugin name.
- * - * - * \internal ICU 4.4 Technology Preview - */ - - -#ifndef ICUPLUG_H -#define ICUPLUG_H - -#include "unicode/utypes.h" - - -#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN) - - - -/* === Basic types === */ - -#ifndef U_HIDE_INTERNAL_API -/** - * @{ - * Opaque structure passed to/from a plugin. - * use the APIs to access it. - * @internal ICU 4.4 Technology Preview - */ - -struct UPlugData; -typedef struct UPlugData UPlugData; - -/** @} */ - -/** - * Random Token to identify a valid ICU plugin. Plugins must return this - * from the entrypoint. - * @internal ICU 4.4 Technology Preview - */ -#define UPLUG_TOKEN 0x54762486 - -/** - * Max width of names, symbols, and configuration strings - * @internal ICU 4.4 Technology Preview - */ -#define UPLUG_NAME_MAX 100 - - -/** - * Return value from a plugin entrypoint. - * Must always be set to UPLUG_TOKEN - * @see UPLUG_TOKEN - * @internal ICU 4.4 Technology Preview - */ -typedef uint32_t UPlugTokenReturn; - -/** - * Reason code for the entrypoint's call - * @internal ICU 4.4 Technology Preview - */ -typedef enum { - UPLUG_REASON_QUERY = 0, /**< The plugin is being queried for info. **/ - UPLUG_REASON_LOAD = 1, /**< The plugin is being loaded. **/ - UPLUG_REASON_UNLOAD = 2, /**< The plugin is being unloaded. **/ - /** - * Number of known reasons. - * @internal The numeric value may change over time, see ICU ticket #12420. - */ - UPLUG_REASON_COUNT -} UPlugReason; - - -/** - * Level of plugin loading - * INITIAL: UNKNOWN - * QUERY: INVALID -> { LOW | HIGH } - * ERR -> INVALID - * @internal ICU 4.4 Technology Preview - */ -typedef enum { - UPLUG_LEVEL_INVALID = 0, /**< The plugin is invalid, hasn't called uplug_setLevel, or can't load. **/ - UPLUG_LEVEL_UNKNOWN = 1, /**< The plugin is waiting to be installed. **/ - UPLUG_LEVEL_LOW = 2, /**< The plugin must be called before u_init completes **/ - UPLUG_LEVEL_HIGH = 3, /**< The plugin can run at any time. **/ - /** - * Number of known levels. - * @internal The numeric value may change over time, see ICU ticket #12420. - */ - UPLUG_LEVEL_COUNT -} UPlugLevel; - -/** - * Entrypoint for an ICU plugin. - * @param plug the UPlugData handle. - * @param status the plugin's extended status code. - * @return A valid plugin must return UPLUG_TOKEN - * @internal ICU 4.4 Technology Preview - */ -typedef UPlugTokenReturn (U_EXPORT2 UPlugEntrypoint) ( - UPlugData *plug, - UPlugReason reason, - UErrorCode *status); - -/* === Needed for Implementing === */ - -/** - * Request that this plugin not be unloaded at cleanup time. - * This is appropriate for plugins which cannot be cleaned up. - * @see u_cleanup() - * @param plug plugin - * @param dontUnload set true if this plugin can't be unloaded - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL void U_EXPORT2 -uplug_setPlugNoUnload(UPlugData *plug, UBool dontUnload); - -/** - * Set the level of this plugin. - * @param plug plugin data handle - * @param level the level of this plugin - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL void U_EXPORT2 -uplug_setPlugLevel(UPlugData *plug, UPlugLevel level); - -/** - * Get the level of this plugin. - * @param plug plugin data handle - * @return the level of this plugin - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL UPlugLevel U_EXPORT2 -uplug_getPlugLevel(UPlugData *plug); - -/** - * Get the lowest level of plug which can currently load. - * For example, if UPLUG_LEVEL_LOW is returned, then low level plugins may load - * if UPLUG_LEVEL_HIGH is returned, then only high level plugins may load. - * @return the lowest level of plug which can currently load - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL UPlugLevel U_EXPORT2 -uplug_getCurrentLevel(void); - - -/** - * Get plug load status - * @return The error code of this plugin's load attempt. - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL UErrorCode U_EXPORT2 -uplug_getPlugLoadStatus(UPlugData *plug); - -/** - * Set the human-readable name of this plugin. - * @param plug plugin data handle - * @param name the name of this plugin. The first UPLUG_NAME_MAX characters willi be copied into a new buffer. - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL void U_EXPORT2 -uplug_setPlugName(UPlugData *plug, const char *name); - -/** - * Get the human-readable name of this plugin. - * @param plug plugin data handle - * @return the name of this plugin - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL const char * U_EXPORT2 -uplug_getPlugName(UPlugData *plug); - -/** - * Return the symbol name for this plugin, if known. - * @param plug plugin data handle - * @return the symbol name, or NULL - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL const char * U_EXPORT2 -uplug_getSymbolName(UPlugData *plug); - -/** - * Return the library name for this plugin, if known. - * @param plug plugin data handle - * @param status error code - * @return the library name, or NULL - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL const char * U_EXPORT2 -uplug_getLibraryName(UPlugData *plug, UErrorCode *status); - -/** - * Return the library used for this plugin, if known. - * Plugins could use this to load data out of their - * @param plug plugin data handle - * @return the library, or NULL - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL void * U_EXPORT2 -uplug_getLibrary(UPlugData *plug); - -/** - * Return the plugin-specific context data. - * @param plug plugin data handle - * @return the context, or NULL if not set - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL void * U_EXPORT2 -uplug_getContext(UPlugData *plug); - -/** - * Set the plugin-specific context data. - * @param plug plugin data handle - * @param context new context to set - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL void U_EXPORT2 -uplug_setContext(UPlugData *plug, void *context); - - -/** - * Get the configuration string, if available. - * The string is in the platform default codepage. - * @param plug plugin data handle - * @return configuration string, or else null. - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL const char * U_EXPORT2 -uplug_getConfiguration(UPlugData *plug); - -/** - * Return all currently installed plugins, from newest to oldest - * Usage Example: - * \code - * UPlugData *plug = NULL; - * while(plug=uplug_nextPlug(plug)) { - * ... do something with 'plug' ... - * } - * \endcode - * Not thread safe- do not call while plugs are added or removed. - * @param prior pass in 'NULL' to get the first (most recent) plug, - * otherwise pass the value returned on a prior call to uplug_nextPlug - * @return the next oldest plugin, or NULL if no more. - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL UPlugData* U_EXPORT2 -uplug_nextPlug(UPlugData *prior); - -/** - * Inject a plugin as if it were loaded from a library. - * This is useful for testing plugins. - * Note that it will have a 'NULL' library pointer associated - * with it, and therefore no llibrary will be closed at cleanup time. - * Low level plugins may not be able to load, as ordering can't be enforced. - * @param entrypoint entrypoint to install - * @param config user specified configuration string, if available, or NULL. - * @param status error result - * @return the new UPlugData associated with this plugin, or NULL if error. - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL UPlugData* U_EXPORT2 -uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status); - - -/** - * Inject a plugin from a library, as if the information came from a config file. - * Low level plugins may not be able to load, and ordering can't be enforced. - * @param libName DLL name to load - * @param sym symbol of plugin (UPlugEntrypoint function) - * @param config configuration string, or NULL - * @param status error result - * @return the new UPlugData associated with this plugin, or NULL if error. - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL UPlugData* U_EXPORT2 -uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status); - -/** - * Remove a plugin. - * Will request the plugin to be unloaded, and close the library if needed - * @param plug plugin handle to close - * @param status error result - * @internal ICU 4.4 Technology Preview - */ -U_INTERNAL void U_EXPORT2 -uplug_removePlug(UPlugData *plug, UErrorCode *status); -#endif /* U_HIDE_INTERNAL_API */ - -#endif /* UCONFIG_ENABLE_PLUGINS */ - -#endif /* _ICUPLUG */ diff --git a/deps/node/deps/icu-small/source/common/unicode/idna.h b/deps/node/deps/icu-small/source/common/unicode/idna.h deleted file mode 100644 index f08658e5..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/idna.h +++ /dev/null @@ -1,325 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: idna.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010mar05 -* created by: Markus W. Scherer -*/ - -#ifndef __IDNA_H__ -#define __IDNA_H__ - -/** - * \file - * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -#include "unicode/bytestream.h" -#include "unicode/stringpiece.h" -#include "unicode/uidna.h" -#include "unicode/unistr.h" - -U_NAMESPACE_BEGIN - -class IDNAInfo; - -/** - * Abstract base class for IDNA processing. - * See http://www.unicode.org/reports/tr46/ - * and http://www.ietf.org/rfc/rfc3490.txt - * - * The IDNA class is not intended for public subclassing. - * - * This C++ API currently only implements UTS #46. - * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) - * and IDNA2003 (functions that do not use a service object). - * @stable ICU 4.6 - */ -class U_COMMON_API IDNA : public UObject { -public: - /** - * Destructor. - * @stable ICU 4.6 - */ - ~IDNA(); - - /** - * Returns an IDNA instance which implements UTS #46. - * Returns an unmodifiable instance, owned by the caller. - * Cache it for multiple operations, and delete it when done. - * The instance is thread-safe, that is, it can be used concurrently. - * - * UTS #46 defines Unicode IDNA Compatibility Processing, - * updated to the latest version of Unicode and compatible with both - * IDNA2003 and IDNA2008. - * - * The worker functions use transitional processing, including deviation mappings, - * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE - * is used in which case the deviation characters are passed through without change. - * - * Disallowed characters are mapped to U+FFFD. - * - * For available options see the uidna.h header. - * Operations with the UTS #46 instance do not support the - * UIDNA_ALLOW_UNASSIGNED option. - * - * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). - * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than - * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. - * - * @param options Bit set to modify the processing and error checking. - * See option bit set values in uidna.h. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the UTS #46 IDNA instance, if successful - * @stable ICU 4.6 - */ - static IDNA * - createUTS46Instance(uint32_t options, UErrorCode &errorCode); - - /** - * Converts a single domain name label into its ASCII form for DNS lookup. - * If any processing step fails, then info.hasErrors() will be TRUE and - * the result might not be an ASCII string. - * The label might be modified according to the types of errors. - * Labels with severe errors will be left in (or turned into) their Unicode form. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param label Input domain name label - * @param dest Destination string object - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual UnicodeString & - labelToASCII(const UnicodeString &label, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const = 0; - - /** - * Converts a single domain name label into its Unicode form for human-readable display. - * If any processing step fails, then info.hasErrors() will be TRUE. - * The label might be modified according to the types of errors. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param label Input domain name label - * @param dest Destination string object - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual UnicodeString & - labelToUnicode(const UnicodeString &label, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const = 0; - - /** - * Converts a whole domain name into its ASCII form for DNS lookup. - * If any processing step fails, then info.hasErrors() will be TRUE and - * the result might not be an ASCII string. - * The domain name might be modified according to the types of errors. - * Labels with severe errors will be left in (or turned into) their Unicode form. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param name Input domain name - * @param dest Destination string object - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual UnicodeString & - nameToASCII(const UnicodeString &name, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const = 0; - - /** - * Converts a whole domain name into its Unicode form for human-readable display. - * If any processing step fails, then info.hasErrors() will be TRUE. - * The domain name might be modified according to the types of errors. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param name Input domain name - * @param dest Destination string object - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual UnicodeString & - nameToUnicode(const UnicodeString &name, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const = 0; - - // UTF-8 versions of the processing methods ---------------------------- *** - - /** - * Converts a single domain name label into its ASCII form for DNS lookup. - * UTF-8 version of labelToASCII(), same behavior. - * - * @param label Input domain name label - * @param dest Destination byte sink; Flush()ed if successful - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual void - labelToASCII_UTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - /** - * Converts a single domain name label into its Unicode form for human-readable display. - * UTF-8 version of labelToUnicode(), same behavior. - * - * @param label Input domain name label - * @param dest Destination byte sink; Flush()ed if successful - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual void - labelToUnicodeUTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - /** - * Converts a whole domain name into its ASCII form for DNS lookup. - * UTF-8 version of nameToASCII(), same behavior. - * - * @param name Input domain name - * @param dest Destination byte sink; Flush()ed if successful - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual void - nameToASCII_UTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - /** - * Converts a whole domain name into its Unicode form for human-readable display. - * UTF-8 version of nameToUnicode(), same behavior. - * - * @param name Input domain name - * @param dest Destination byte sink; Flush()ed if successful - * @param info Output container of IDNA processing details. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.6 - */ - virtual void - nameToUnicodeUTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; -}; - -class UTS46; - -/** - * Output container for IDNA processing errors. - * The IDNAInfo class is not suitable for subclassing. - * @stable ICU 4.6 - */ -class U_COMMON_API IDNAInfo : public UMemory { -public: - /** - * Constructor for stack allocation. - * @stable ICU 4.6 - */ - IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} - /** - * Were there IDNA processing errors? - * @return TRUE if there were processing errors - * @stable ICU 4.6 - */ - UBool hasErrors() const { return errors!=0; } - /** - * Returns a bit set indicating IDNA processing errors. - * See UIDNA_ERROR_... constants in uidna.h. - * @return bit set of processing errors - * @stable ICU 4.6 - */ - uint32_t getErrors() const { return errors; } - /** - * Returns TRUE if transitional and nontransitional processing produce different results. - * This is the case when the input label or domain name contains - * one or more deviation characters outside a Punycode label (see UTS #46). - *
    - *
  • With nontransitional processing, such characters are - * copied to the destination string. - *
  • With transitional processing, such characters are - * mapped (sharp s/sigma) or removed (joiner/nonjoiner). - *
- * @return TRUE if transitional and nontransitional processing produce different results - * @stable ICU 4.6 - */ - UBool isTransitionalDifferent() const { return isTransDiff; } - -private: - friend class UTS46; - - IDNAInfo(const IDNAInfo &other); // no copying - IDNAInfo &operator=(const IDNAInfo &other); // no copying - - void reset() { - errors=labelErrors=0; - isTransDiff=FALSE; - isBiDi=FALSE; - isOkBiDi=TRUE; - } - - uint32_t errors, labelErrors; - UBool isTransDiff; - UBool isBiDi; - UBool isOkBiDi; -}; - -U_NAMESPACE_END - -#endif // UCONFIG_NO_IDNA -#endif // __IDNA_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/localpointer.h b/deps/node/deps/icu-small/source/common/unicode/localpointer.h deleted file mode 100644 index e17ee3d8..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/localpointer.h +++ /dev/null @@ -1,524 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: localpointer.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009nov13 -* created by: Markus W. Scherer -*/ - -#ifndef __LOCALPOINTER_H__ -#define __LOCALPOINTER_H__ - -/** - * \file - * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code. - * - * These classes are inspired by - * - std::auto_ptr - * - boost::scoped_ptr & boost::scoped_array - * - Taligent Safe Pointers (TOnlyPointerTo) - * - * but none of those provide for all of the goals for ICU smart pointers: - * - Smart pointer owns the object and releases it when it goes out of scope. - * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust. - * - ICU-compatible: No exceptions. - * - Need to be able to orphan/release the pointer and its ownership. - * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects. - * - * For details see http://site.icu-project.org/design/cpp/scoped_ptr - */ - -#include "unicode/utypes.h" - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * "Smart pointer" base class; do not use directly: use LocalPointer etc. - * - * Base class for smart pointer classes that do not throw exceptions. - * - * Do not use this base class directly, since it does not delete its pointer. - * A subclass must implement methods that delete the pointer: - * Destructor and adoptInstead(). - * - * There is no operator T *() provided because the programmer must decide - * whether to use getAlias() (without transfer of ownership) or orphan() - * (with transfer of ownership and NULLing of the pointer). - * - * @see LocalPointer - * @see LocalArray - * @see U_DEFINE_LOCAL_OPEN_POINTER - * @stable ICU 4.4 - */ -template -class LocalPointerBase { -public: - /** - * Constructor takes ownership. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - explicit LocalPointerBase(T *p=NULL) : ptr(p) {} - /** - * Destructor deletes the object it owns. - * Subclass must override: Base class does nothing. - * @stable ICU 4.4 - */ - ~LocalPointerBase() { /* delete ptr; */ } - /** - * NULL check. - * @return TRUE if ==NULL - * @stable ICU 4.4 - */ - UBool isNull() const { return ptr==NULL; } - /** - * NULL check. - * @return TRUE if !=NULL - * @stable ICU 4.4 - */ - UBool isValid() const { return ptr!=NULL; } - /** - * Comparison with a simple pointer, so that existing code - * with ==NULL need not be changed. - * @param other simple pointer for comparison - * @return true if this pointer value equals other - * @stable ICU 4.4 - */ - bool operator==(const T *other) const { return ptr==other; } - /** - * Comparison with a simple pointer, so that existing code - * with !=NULL need not be changed. - * @param other simple pointer for comparison - * @return true if this pointer value differs from other - * @stable ICU 4.4 - */ - bool operator!=(const T *other) const { return ptr!=other; } - /** - * Access without ownership change. - * @return the pointer value - * @stable ICU 4.4 - */ - T *getAlias() const { return ptr; } - /** - * Access without ownership change. - * @return the pointer value as a reference - * @stable ICU 4.4 - */ - T &operator*() const { return *ptr; } - /** - * Access without ownership change. - * @return the pointer value - * @stable ICU 4.4 - */ - T *operator->() const { return ptr; } - /** - * Gives up ownership; the internal pointer becomes NULL. - * @return the pointer value; - * caller becomes responsible for deleting the object - * @stable ICU 4.4 - */ - T *orphan() { - T *p=ptr; - ptr=NULL; - return p; - } - /** - * Deletes the object it owns, - * and adopts (takes ownership of) the one passed in. - * Subclass must override: Base class does not delete the object. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - void adoptInstead(T *p) { - // delete ptr; - ptr=p; - } -protected: - /** - * Actual pointer. - * @internal - */ - T *ptr; -private: - // No comparison operators with other LocalPointerBases. - bool operator==(const LocalPointerBase &other); - bool operator!=(const LocalPointerBase &other); - // No ownership sharing: No copy constructor, no assignment operator. - LocalPointerBase(const LocalPointerBase &other); - void operator=(const LocalPointerBase &other); - // No heap allocation. Use only on the stack. - static void * U_EXPORT2 operator new(size_t size); - static void * U_EXPORT2 operator new[](size_t size); -#if U_HAVE_PLACEMENT_NEW - static void * U_EXPORT2 operator new(size_t, void *ptr); -#endif -}; - -/** - * "Smart pointer" class, deletes objects via the standard C++ delete operator. - * For most methods see the LocalPointerBase base class. - * - * Usage example: - * \code - * LocalPointer s(new UnicodeString((UChar32)0x50005)); - * int32_t length=s->length(); // 2 - * char16_t lead=s->charAt(0); // 0xd900 - * if(some condition) { return; } // no need to explicitly delete the pointer - * s.adoptInstead(new UnicodeString((char16_t)0xfffc)); - * length=s->length(); // 1 - * // no need to explicitly delete the pointer - * \endcode - * - * @see LocalPointerBase - * @stable ICU 4.4 - */ -template -class LocalPointer : public LocalPointerBase { -public: - using LocalPointerBase::operator*; - using LocalPointerBase::operator->; - /** - * Constructor takes ownership. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - explicit LocalPointer(T *p=NULL) : LocalPointerBase(p) {} - /** - * Constructor takes ownership and reports an error if NULL. - * - * This constructor is intended to be used with other-class constructors - * that may report a failure UErrorCode, - * so that callers need to check only for U_FAILURE(errorCode) - * and not also separately for isNull(). - * - * @param p simple pointer to an object that is adopted - * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR - * if p==NULL and no other failure code had been set - * @stable ICU 55 - */ - LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase(p) { - if(p==NULL && U_SUCCESS(errorCode)) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } - } - /** - * Move constructor, leaves src with isNull(). - * @param src source smart pointer - * @stable ICU 56 - */ - LocalPointer(LocalPointer &&src) U_NOEXCEPT : LocalPointerBase(src.ptr) { - src.ptr=NULL; - } - /** - * Destructor deletes the object it owns. - * @stable ICU 4.4 - */ - ~LocalPointer() { - delete LocalPointerBase::ptr; - } - /** - * Move assignment operator, leaves src with isNull(). - * The behavior is undefined if *this and src are the same object. - * @param src source smart pointer - * @return *this - * @stable ICU 56 - */ - LocalPointer &operator=(LocalPointer &&src) U_NOEXCEPT { - return moveFrom(src); - } - // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API - /** - * Move assignment, leaves src with isNull(). - * The behavior is undefined if *this and src are the same object. - * - * Can be called explicitly, does not need C++11 support. - * @param src source smart pointer - * @return *this - * @draft ICU 56 - */ - LocalPointer &moveFrom(LocalPointer &src) U_NOEXCEPT { - delete LocalPointerBase::ptr; - LocalPointerBase::ptr=src.ptr; - src.ptr=NULL; - return *this; - } - /** - * Swap pointers. - * @param other other smart pointer - * @stable ICU 56 - */ - void swap(LocalPointer &other) U_NOEXCEPT { - T *temp=LocalPointerBase::ptr; - LocalPointerBase::ptr=other.ptr; - other.ptr=temp; - } - /** - * Non-member LocalPointer swap function. - * @param p1 will get p2's pointer - * @param p2 will get p1's pointer - * @stable ICU 56 - */ - friend inline void swap(LocalPointer &p1, LocalPointer &p2) U_NOEXCEPT { - p1.swap(p2); - } - /** - * Deletes the object it owns, - * and adopts (takes ownership of) the one passed in. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - void adoptInstead(T *p) { - delete LocalPointerBase::ptr; - LocalPointerBase::ptr=p; - } - /** - * Deletes the object it owns, - * and adopts (takes ownership of) the one passed in. - * - * If U_FAILURE(errorCode), then the current object is retained and the new one deleted. - * - * If U_SUCCESS(errorCode) but the input pointer is NULL, - * then U_MEMORY_ALLOCATION_ERROR is set, - * the current object is deleted, and NULL is set. - * - * @param p simple pointer to an object that is adopted - * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR - * if p==NULL and no other failure code had been set - * @stable ICU 55 - */ - void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) { - if(U_SUCCESS(errorCode)) { - delete LocalPointerBase::ptr; - LocalPointerBase::ptr=p; - if(p==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - delete p; - } - } -}; - -/** - * "Smart pointer" class, deletes objects via the C++ array delete[] operator. - * For most methods see the LocalPointerBase base class. - * Adds operator[] for array item access. - * - * Usage example: - * \code - * LocalArray a(new UnicodeString[2]); - * a[0].append((char16_t)0x61); - * if(some condition) { return; } // no need to explicitly delete the array - * a.adoptInstead(new UnicodeString[4]); - * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse(); - * // no need to explicitly delete the array - * \endcode - * - * @see LocalPointerBase - * @stable ICU 4.4 - */ -template -class LocalArray : public LocalPointerBase { -public: - using LocalPointerBase::operator*; - using LocalPointerBase::operator->; - /** - * Constructor takes ownership. - * @param p simple pointer to an array of T objects that is adopted - * @stable ICU 4.4 - */ - explicit LocalArray(T *p=NULL) : LocalPointerBase(p) {} - /** - * Constructor takes ownership and reports an error if NULL. - * - * This constructor is intended to be used with other-class constructors - * that may report a failure UErrorCode, - * so that callers need to check only for U_FAILURE(errorCode) - * and not also separately for isNull(). - * - * @param p simple pointer to an array of T objects that is adopted - * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR - * if p==NULL and no other failure code had been set - * @stable ICU 56 - */ - LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase(p) { - if(p==NULL && U_SUCCESS(errorCode)) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } - } - /** - * Move constructor, leaves src with isNull(). - * @param src source smart pointer - * @stable ICU 56 - */ - LocalArray(LocalArray &&src) U_NOEXCEPT : LocalPointerBase(src.ptr) { - src.ptr=NULL; - } - /** - * Destructor deletes the array it owns. - * @stable ICU 4.4 - */ - ~LocalArray() { - delete[] LocalPointerBase::ptr; - } - /** - * Move assignment operator, leaves src with isNull(). - * The behavior is undefined if *this and src are the same object. - * @param src source smart pointer - * @return *this - * @stable ICU 56 - */ - LocalArray &operator=(LocalArray &&src) U_NOEXCEPT { - return moveFrom(src); - } - // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API - /** - * Move assignment, leaves src with isNull(). - * The behavior is undefined if *this and src are the same object. - * - * Can be called explicitly, does not need C++11 support. - * @param src source smart pointer - * @return *this - * @draft ICU 56 - */ - LocalArray &moveFrom(LocalArray &src) U_NOEXCEPT { - delete[] LocalPointerBase::ptr; - LocalPointerBase::ptr=src.ptr; - src.ptr=NULL; - return *this; - } - /** - * Swap pointers. - * @param other other smart pointer - * @stable ICU 56 - */ - void swap(LocalArray &other) U_NOEXCEPT { - T *temp=LocalPointerBase::ptr; - LocalPointerBase::ptr=other.ptr; - other.ptr=temp; - } - /** - * Non-member LocalArray swap function. - * @param p1 will get p2's pointer - * @param p2 will get p1's pointer - * @stable ICU 56 - */ - friend inline void swap(LocalArray &p1, LocalArray &p2) U_NOEXCEPT { - p1.swap(p2); - } - /** - * Deletes the array it owns, - * and adopts (takes ownership of) the one passed in. - * @param p simple pointer to an array of T objects that is adopted - * @stable ICU 4.4 - */ - void adoptInstead(T *p) { - delete[] LocalPointerBase::ptr; - LocalPointerBase::ptr=p; - } - /** - * Deletes the array it owns, - * and adopts (takes ownership of) the one passed in. - * - * If U_FAILURE(errorCode), then the current array is retained and the new one deleted. - * - * If U_SUCCESS(errorCode) but the input pointer is NULL, - * then U_MEMORY_ALLOCATION_ERROR is set, - * the current array is deleted, and NULL is set. - * - * @param p simple pointer to an array of T objects that is adopted - * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR - * if p==NULL and no other failure code had been set - * @stable ICU 56 - */ - void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) { - if(U_SUCCESS(errorCode)) { - delete[] LocalPointerBase::ptr; - LocalPointerBase::ptr=p; - if(p==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - delete[] p; - } - } - /** - * Array item access (writable). - * No index bounds check. - * @param i array index - * @return reference to the array item - * @stable ICU 4.4 - */ - T &operator[](ptrdiff_t i) const { return LocalPointerBase::ptr[i]; } -}; - -/** - * \def U_DEFINE_LOCAL_OPEN_POINTER - * "Smart pointer" definition macro, deletes objects via the closeFunction. - * Defines a subclass of LocalPointerBase which works just - * like LocalPointer except that this subclass will use the closeFunction - * rather than the C++ delete operator. - * - * Usage example: - * \code - * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode)); - * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), - * utf8Out, (int32_t)sizeof(utf8Out), - * utf8In, utf8InLength, &errorCode); - * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap - * \endcode - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ - class LocalPointerClassName : public LocalPointerBase { \ - public: \ - using LocalPointerBase::operator*; \ - using LocalPointerBase::operator->; \ - explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase(p) {} \ - LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \ - : LocalPointerBase(src.ptr) { \ - src.ptr=NULL; \ - } \ - ~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \ - LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \ - return moveFrom(src); \ - } \ - LocalPointerClassName &moveFrom(LocalPointerClassName &src) U_NOEXCEPT { \ - if (ptr != NULL) { closeFunction(ptr); } \ - LocalPointerBase::ptr=src.ptr; \ - src.ptr=NULL; \ - return *this; \ - } \ - void swap(LocalPointerClassName &other) U_NOEXCEPT { \ - Type *temp=LocalPointerBase::ptr; \ - LocalPointerBase::ptr=other.ptr; \ - other.ptr=temp; \ - } \ - friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \ - p1.swap(p2); \ - } \ - void adoptInstead(Type *p) { \ - if (ptr != NULL) { closeFunction(ptr); } \ - ptr=p; \ - } \ - } - -U_NAMESPACE_END - -#endif /* U_SHOW_CPLUSPLUS_API */ -#endif /* __LOCALPOINTER_H__ */ diff --git a/deps/node/deps/icu-small/source/common/unicode/locdspnm.h b/deps/node/deps/icu-small/source/common/unicode/locdspnm.h deleted file mode 100644 index 7f227829..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/locdspnm.h +++ /dev/null @@ -1,207 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2010-2016, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -*/ - -#ifndef LOCDSPNM_H -#define LOCDSPNM_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Provides display names of Locale and its components. - */ - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/locid.h" -#include "unicode/strenum.h" -#include "unicode/uscript.h" -#include "unicode/uldnames.h" -#include "unicode/udisplaycontext.h" - -U_NAMESPACE_BEGIN - -/** - * Returns display names of Locales and components of Locales. For - * more information on language, script, region, variant, key, and - * values, see Locale. - * @stable ICU 4.4 - */ -class U_COMMON_API LocaleDisplayNames : public UObject { -public: - /** - * Destructor. - * @stable ICU 4.4 - */ - virtual ~LocaleDisplayNames(); - - /** - * Convenience overload of - * {@link #createInstance(const Locale& locale, UDialectHandling dialectHandling)} - * that specifies STANDARD dialect handling. - * @param locale the display locale - * @return a LocaleDisplayNames instance - * @stable ICU 4.4 - */ - static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale); - - /** - * Returns an instance of LocaleDisplayNames that returns names - * formatted for the provided locale, using the provided - * dialectHandling. - * - * @param locale the display locale - * @param dialectHandling how to select names for locales - * @return a LocaleDisplayNames instance - * @stable ICU 4.4 - */ - static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale, - UDialectHandling dialectHandling); - - /** - * Returns an instance of LocaleDisplayNames that returns names formatted - * for the provided locale, using the provided UDisplayContext settings. - * - * @param locale the display locale - * @param contexts List of one or more context settings (e.g. for dialect - * handling, capitalization, etc. - * @param length Number of items in the contexts list - * @return a LocaleDisplayNames instance - * @stable ICU 51 - */ - static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale, - UDisplayContext *contexts, int32_t length); - - // getters for state - /** - * Returns the locale used to determine the display names. This is - * not necessarily the same locale passed to {@link #createInstance}. - * @return the display locale - * @stable ICU 4.4 - */ - virtual const Locale& getLocale() const = 0; - - /** - * Returns the dialect handling used in the display names. - * @return the dialect handling enum - * @stable ICU 4.4 - */ - virtual UDialectHandling getDialectHandling() const = 0; - - /** - * Returns the UDisplayContext value for the specified UDisplayContextType. - * @param type the UDisplayContextType whose value to return - * @return the UDisplayContext for the specified type. - * @stable ICU 51 - */ - virtual UDisplayContext getContext(UDisplayContextType type) const = 0; - - // names for entire locales - /** - * Returns the display name of the provided locale. - * @param locale the locale whose display name to return - * @param result receives the locale's display name - * @return the display name of the provided locale - * @stable ICU 4.4 - */ - virtual UnicodeString& localeDisplayName(const Locale& locale, - UnicodeString& result) const = 0; - - /** - * Returns the display name of the provided locale id. - * @param localeId the id of the locale whose display name to return - * @param result receives the locale's display name - * @return the display name of the provided locale - * @stable ICU 4.4 - */ - virtual UnicodeString& localeDisplayName(const char* localeId, - UnicodeString& result) const = 0; - - // names for components of a locale id - /** - * Returns the display name of the provided language code. - * @param lang the language code - * @param result receives the language code's display name - * @return the display name of the provided language code - * @stable ICU 4.4 - */ - virtual UnicodeString& languageDisplayName(const char* lang, - UnicodeString& result) const = 0; - - /** - * Returns the display name of the provided script code. - * @param script the script code - * @param result receives the script code's display name - * @return the display name of the provided script code - * @stable ICU 4.4 - */ - virtual UnicodeString& scriptDisplayName(const char* script, - UnicodeString& result) const = 0; - - /** - * Returns the display name of the provided script code. - * @param scriptCode the script code number - * @param result receives the script code's display name - * @return the display name of the provided script code - * @stable ICU 4.4 - */ - virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode, - UnicodeString& result) const = 0; - - /** - * Returns the display name of the provided region code. - * @param region the region code - * @param result receives the region code's display name - * @return the display name of the provided region code - * @stable ICU 4.4 - */ - virtual UnicodeString& regionDisplayName(const char* region, - UnicodeString& result) const = 0; - - /** - * Returns the display name of the provided variant. - * @param variant the variant string - * @param result receives the variant's display name - * @return the display name of the provided variant - * @stable ICU 4.4 - */ - virtual UnicodeString& variantDisplayName(const char* variant, - UnicodeString& result) const = 0; - - /** - * Returns the display name of the provided locale key. - * @param key the locale key name - * @param result receives the locale key's display name - * @return the display name of the provided locale key - * @stable ICU 4.4 - */ - virtual UnicodeString& keyDisplayName(const char* key, - UnicodeString& result) const = 0; - - /** - * Returns the display name of the provided value (used with the provided key). - * @param key the locale key name - * @param value the locale key's value - * @param result receives the value's display name - * @return the display name of the provided value - * @stable ICU 4.4 - */ - virtual UnicodeString& keyValueDisplayName(const char* key, const char* value, - UnicodeString& result) const = 0; -}; - -inline LocaleDisplayNames* LocaleDisplayNames::createInstance(const Locale& locale) { - return LocaleDisplayNames::createInstance(locale, ULDN_STANDARD_NAMES); -} - -U_NAMESPACE_END - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/locid.h b/deps/node/deps/icu-small/source/common/unicode/locid.h deleted file mode 100644 index 415bced8..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/locid.h +++ /dev/null @@ -1,1181 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1996-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File locid.h -* -* Created by: Helena Shih -* -* Modification History: -* -* Date Name Description -* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to -* get and set it. -* 04/02/97 aliu Made operator!= inline; fixed return value of getName(). -* 04/15/97 aliu Cleanup for AIX/Win32. -* 04/24/97 aliu Numerous changes per code review. -* 08/18/98 stephen Added tokenizeString(),changed getDisplayName() -* 09/08/98 stephen Moved definition of kEmptyString for Mac Port -* 11/09/99 weiv Added const char * getName() const; -* 04/12/00 srl removing unicodestring api's and cached hash code -* 08/10/01 grhoten Change the static Locales to accessor functions -****************************************************************************** -*/ - -#ifndef LOCID_H -#define LOCID_H - -#include "unicode/bytestream.h" -#include "unicode/localpointer.h" -#include "unicode/strenum.h" -#include "unicode/stringpiece.h" -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/putil.h" -#include "unicode/uloc.h" - -/** - * \file - * \brief C++ API: Locale ID object. - */ - -U_NAMESPACE_BEGIN - -// Forward Declarations -void U_CALLCONV locale_available_init(); /**< @internal */ - -class StringEnumeration; -class UnicodeString; - -/** - * A Locale object represents a specific geographical, political, - * or cultural region. An operation that requires a Locale to perform - * its task is called locale-sensitive and uses the Locale - * to tailor information for the user. For example, displaying a number - * is a locale-sensitive operation--the number should be formatted - * according to the customs/conventions of the user's native country, - * region, or culture. - * - * The Locale class is not suitable for subclassing. - * - *

- * You can create a Locale object using the constructor in - * this class: - * \htmlonly

\endhtmlonly - *
- *       Locale( const   char*  language,
- *               const   char*  country,
- *               const   char*  variant);
- * 
- * \htmlonly
\endhtmlonly - * The first argument to the constructors is a valid ISO - * Language Code. These codes are the lower-case two-letter - * codes as defined by ISO-639. - * You can find a full list of these codes at: - *
- * http://www.loc.gov/standards/iso639-2/ - * - *

- * The second argument to the constructors is a valid ISO Country - * Code. These codes are the upper-case two-letter codes - * as defined by ISO-3166. - * You can find a full list of these codes at a number of sites, such as: - *
- * http://www.iso.org/iso/en/prods-services/iso3166ma/index.html - * - *

- * The third constructor requires a third argument--the Variant. - * The Variant codes are vendor and browser-specific. - * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX. - * Where there are two variants, separate them with an underscore, and - * put the most important one first. For - * example, a Traditional Spanish collation might be referenced, with - * "ES", "ES", "Traditional_POSIX". - * - *

- * Because a Locale object is just an identifier for a region, - * no validity check is performed when you construct a Locale. - * If you want to see whether particular resources are available for the - * Locale you construct, you must query those resources. For - * example, ask the NumberFormat for the locales it supports - * using its getAvailableLocales method. - *
Note: When you ask for a resource for a particular - * locale, you get back the best available match, not necessarily - * precisely what you asked for. For more information, look at - * ResourceBundle. - * - *

- * The Locale class provides a number of convenient constants - * that you can use to create Locale objects for commonly used - * locales. For example, the following refers to a Locale object - * for the United States: - * \htmlonly

\endhtmlonly - *
- *       Locale::getUS()
- * 
- * \htmlonly
\endhtmlonly - * - *

- * Once you've created a Locale you can query it for information about - * itself. Use getCountry to get the ISO Country Code and - * getLanguage to get the ISO Language Code. You can - * use getDisplayCountry to get the - * name of the country suitable for displaying to the user. Similarly, - * you can use getDisplayLanguage to get the name of - * the language suitable for displaying to the user. Interestingly, - * the getDisplayXXX methods are themselves locale-sensitive - * and have two versions: one that uses the default locale and one - * that takes a locale as an argument and displays the name or country in - * a language appropriate to that locale. - * - *

- * ICU provides a number of classes that perform locale-sensitive - * operations. For example, the NumberFormat class formats - * numbers, currency, or percentages in a locale-sensitive manner. Classes - * such as NumberFormat have a number of convenience methods - * for creating a default object of that type. For example, the - * NumberFormat class provides these three convenience methods - * for creating a default NumberFormat object: - * \htmlonly

\endhtmlonly - *
- *     UErrorCode success = U_ZERO_ERROR;
- *     Locale myLocale;
- *     NumberFormat *nf;
- *
- *     nf = NumberFormat::createInstance( success );          delete nf;
- *     nf = NumberFormat::createCurrencyInstance( success );  delete nf;
- *     nf = NumberFormat::createPercentInstance( success );   delete nf;
- * 
- * \htmlonly
\endhtmlonly - * Each of these methods has two variants; one with an explicit locale - * and one without; the latter using the default locale. - * \htmlonly
\endhtmlonly - *
- *     nf = NumberFormat::createInstance( myLocale, success );          delete nf;
- *     nf = NumberFormat::createCurrencyInstance( myLocale, success );  delete nf;
- *     nf = NumberFormat::createPercentInstance( myLocale, success );   delete nf;
- * 
- * \htmlonly
\endhtmlonly - * A Locale is the mechanism for identifying the kind of object - * (NumberFormat) that you would like to get. The locale is - * just a mechanism for identifying objects, - * not a container for the objects themselves. - * - *

- * Each class that performs locale-sensitive operations allows you - * to get all the available objects of that type. You can sift - * through these objects by language, country, or variant, - * and use the display names to present a menu to the user. - * For example, you can create a menu of all the collation objects - * suitable for a given language. Such classes implement these - * three class methods: - * \htmlonly

\endhtmlonly - *
- *       static Locale* getAvailableLocales(int32_t& numLocales)
- *       static UnicodeString& getDisplayName(const Locale&  objectLocale,
- *                                            const Locale&  displayLocale,
- *                                            UnicodeString& displayName)
- *       static UnicodeString& getDisplayName(const Locale&  objectLocale,
- *                                            UnicodeString& displayName)
- * 
- * \htmlonly
\endhtmlonly - * - * @stable ICU 2.0 - * @see ResourceBundle - */ -class U_COMMON_API Locale : public UObject { -public: - /** Useful constant for the Root locale. @stable ICU 4.4 */ - static const Locale &U_EXPORT2 getRoot(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getEnglish(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getFrench(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getGerman(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getItalian(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getJapanese(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getKorean(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getChinese(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getSimplifiedChinese(void); - /** Useful constant for this language. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getTraditionalChinese(void); - - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getFrance(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getGermany(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getItaly(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getJapan(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getKorea(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getChina(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getPRC(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getTaiwan(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getUK(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getUS(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getCanada(void); - /** Useful constant for this country/region. @stable ICU 2.0 */ - static const Locale &U_EXPORT2 getCanadaFrench(void); - - - /** - * Construct a default locale object, a Locale for the default locale ID. - * - * @see getDefault - * @see uloc_getDefault - * @stable ICU 2.0 - */ - Locale(); - - /** - * Construct a locale from language, country, variant. - * If an error occurs, then the constructed object will be "bogus" - * (isBogus() will return TRUE). - * - * @param language Lowercase two-letter or three-letter ISO-639 code. - * This parameter can instead be an ICU style C locale (e.g. "en_US"), - * but the other parameters must not be used. - * This parameter can be NULL; if so, - * the locale is initialized to match the current default locale. - * (This is the same as using the default constructor.) - * Please note: The Java Locale class does NOT accept the form - * 'new Locale("en_US")' but only 'new Locale("en","US")' - * - * @param country Uppercase two-letter ISO-3166 code. (optional) - * @param variant Uppercase vendor and browser specific code. See class - * description. (optional) - * @param keywordsAndValues A string consisting of keyword/values pairs, such as - * "collation=phonebook;currency=euro" - * - * @see getDefault - * @see uloc_getDefault - * @stable ICU 2.0 - */ - Locale( const char * language, - const char * country = 0, - const char * variant = 0, - const char * keywordsAndValues = 0); - - /** - * Initializes a Locale object from another Locale object. - * - * @param other The Locale object being copied in. - * @stable ICU 2.0 - */ - Locale(const Locale& other); - -#ifndef U_HIDE_DRAFT_API - /** - * Move constructor; might leave source in bogus state. - * This locale will have the same contents that the source locale had. - * - * @param other The Locale object being moved in. - * @draft ICU 63 - */ - Locale(Locale&& other) U_NOEXCEPT; -#endif // U_HIDE_DRAFT_API - - /** - * Destructor - * @stable ICU 2.0 - */ - virtual ~Locale() ; - - /** - * Replaces the entire contents of *this with the specified value. - * - * @param other The Locale object being copied in. - * @return *this - * @stable ICU 2.0 - */ - Locale& operator=(const Locale& other); - -#ifndef U_HIDE_DRAFT_API - /** - * Move assignment operator; might leave source in bogus state. - * This locale will have the same contents that the source locale had. - * The behavior is undefined if *this and the source are the same object. - * - * @param other The Locale object being moved in. - * @return *this - * @draft ICU 63 - */ - Locale& operator=(Locale&& other) U_NOEXCEPT; -#endif // U_HIDE_DRAFT_API - - /** - * Checks if two locale keys are the same. - * - * @param other The locale key object to be compared with this. - * @return True if the two locale keys are the same, false otherwise. - * @stable ICU 2.0 - */ - UBool operator==(const Locale& other) const; - - /** - * Checks if two locale keys are not the same. - * - * @param other The locale key object to be compared with this. - * @return True if the two locale keys are not the same, false - * otherwise. - * @stable ICU 2.0 - */ - UBool operator!=(const Locale& other) const; - - /** - * Clone this object. - * Clones can be used concurrently in multiple threads. - * If an error occurs, then NULL is returned. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see getDynamicClassID - * @stable ICU 2.8 - */ - Locale *clone() const; - -#ifndef U_HIDE_SYSTEM_API - /** - * Common methods of getting the current default Locale. Used for the - * presentation: menus, dialogs, etc. Generally set once when your applet or - * application is initialized, then never reset. (If you do reset the - * default locale, you probably want to reload your GUI, so that the change - * is reflected in your interface.) - * - * More advanced programs will allow users to use different locales for - * different fields, e.g. in a spreadsheet. - * - * Note that the initial setting will match the host system. - * @return a reference to the Locale object for the default locale ID - * @system - * @stable ICU 2.0 - */ - static const Locale& U_EXPORT2 getDefault(void); - - /** - * Sets the default. Normally set once at the beginning of a process, - * then never reset. - * setDefault() only changes ICU's default locale ID, not - * the default locale ID of the runtime environment. - * - * @param newLocale Locale to set to. If NULL, set to the value obtained - * from the runtime environment. - * @param success The error code. - * @system - * @stable ICU 2.0 - */ - static void U_EXPORT2 setDefault(const Locale& newLocale, - UErrorCode& success); -#endif /* U_HIDE_SYSTEM_API */ - -#ifndef U_HIDE_DRAFT_API - /** - * Returns a Locale for the specified BCP47 language tag string. - * If the specified language tag contains any ill-formed subtags, - * the first such subtag and all following subtags are ignored. - *

- * This implements the 'Language-Tag' production of BCP47, and so - * supports grandfathered (regular and irregular) as well as private - * use language tags. Private use tags are represented as 'x-whatever', - * and grandfathered tags are converted to their canonical replacements - * where they exist. Note that a few grandfathered tags have no modern - * replacement, these will be converted using the fallback described in - * the first paragraph, so some information might be lost. - * @param tag the input BCP47 language tag. - * @param status error information if creating the Locale failed. - * @return the Locale for the specified BCP47 language tag. - * @draft ICU 63 - */ - static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status); - - /** - * Returns a well-formed language tag for this Locale. - *

- * Note: Any locale fields which do not satisfy the BCP47 syntax - * requirement will be silently omitted from the result. - * - * If this function fails, partial output may have been written to the sink. - * - * @param sink the output sink receiving the BCP47 language - * tag for this Locale. - * @param status error information if creating the language tag failed. - * @draft ICU 63 - */ - void toLanguageTag(ByteSink& sink, UErrorCode& status) const; - - /** - * Returns a well-formed language tag for this Locale. - *

- * Note: Any locale fields which do not satisfy the BCP47 syntax - * requirement will be silently omitted from the result. - * - * @param status error information if creating the language tag failed. - * @return the BCP47 language tag for this Locale. - * @draft ICU 63 - */ - template - inline StringClass toLanguageTag(UErrorCode& status) const; -#endif // U_HIDE_DRAFT_API - - /** - * Creates a locale which has had minimal canonicalization - * as per uloc_getName(). - * @param name The name to create from. If name is null, - * the default Locale is used. - * @return new locale object - * @stable ICU 2.0 - * @see uloc_getName - */ - static Locale U_EXPORT2 createFromName(const char *name); - - /** - * Creates a locale from the given string after canonicalizing - * the string by calling uloc_canonicalize(). - * @param name the locale ID to create from. Must not be NULL. - * @return a new locale object corresponding to the given name - * @stable ICU 3.0 - * @see uloc_canonicalize - */ - static Locale U_EXPORT2 createCanonical(const char* name); - - /** - * Returns the locale's ISO-639 language code. - * @return An alias to the code - * @stable ICU 2.0 - */ - inline const char * getLanguage( ) const; - - /** - * Returns the locale's ISO-15924 abbreviation script code. - * @return An alias to the code - * @see uscript_getShortName - * @see uscript_getCode - * @stable ICU 2.8 - */ - inline const char * getScript( ) const; - - /** - * Returns the locale's ISO-3166 country code. - * @return An alias to the code - * @stable ICU 2.0 - */ - inline const char * getCountry( ) const; - - /** - * Returns the locale's variant code. - * @return An alias to the code - * @stable ICU 2.0 - */ - inline const char * getVariant( ) const; - - /** - * Returns the programmatic name of the entire locale, with the language, - * country and variant separated by underbars. If a field is missing, up - * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN", - * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO" - * @return A pointer to "name". - * @stable ICU 2.0 - */ - inline const char * getName() const; - - /** - * Returns the programmatic name of the entire locale as getName() would return, - * but without keywords. - * @return A pointer to "name". - * @see getName - * @stable ICU 2.8 - */ - const char * getBaseName() const; - -#ifndef U_HIDE_DRAFT_API - /** - * Add the likely subtags for this Locale, per the algorithm described - * in the following CLDR technical report: - * - * http://www.unicode.org/reports/tr35/#Likely_Subtags - * - * If this Locale is already in the maximal form, or not valid, or there is - * no data available for maximization, the Locale will be unchanged. - * - * For example, "und-Zzzz" cannot be maximized, since there is no - * reasonable maximization. - * - * Examples: - * - * "en" maximizes to "en_Latn_US" - * - * "de" maximizes to "de_Latn_US" - * - * "sr" maximizes to "sr_Cyrl_RS" - * - * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) - * - * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) - * - * @param status error information if maximizing this Locale failed. - * If this Locale is not well-formed, the error code is - * U_ILLEGAL_ARGUMENT_ERROR. - * @draft ICU 63 - */ - void addLikelySubtags(UErrorCode& status); - - /** - * Minimize the subtags for this Locale, per the algorithm described - * in the following CLDR technical report: - * - * http://www.unicode.org/reports/tr35/#Likely_Subtags - * - * If this Locale is already in the minimal form, or not valid, or there is - * no data available for minimization, the Locale will be unchanged. - * - * Since the minimization algorithm relies on proper maximization, see the - * comments for addLikelySubtags for reasons why there might not be any - * data. - * - * Examples: - * - * "en_Latn_US" minimizes to "en" - * - * "de_Latn_US" minimizes to "de" - * - * "sr_Cyrl_RS" minimizes to "sr" - * - * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the - * script, and minimizing to "zh" would imply "zh_Hans_CN".) - * - * @param status error information if maximizing this Locale failed. - * If this Locale is not well-formed, the error code is - * U_ILLEGAL_ARGUMENT_ERROR. - * @draft ICU 63 - */ - void minimizeSubtags(UErrorCode& status); -#endif // U_HIDE_DRAFT_API - - /** - * Gets the list of keywords for the specified locale. - * - * @param status the status code - * @return pointer to StringEnumeration class, or NULL if there are no keywords. - * Client must dispose of it by calling delete. - * @see getKeywords - * @stable ICU 2.8 - */ - StringEnumeration * createKeywords(UErrorCode &status) const; - -#ifndef U_HIDE_DRAFT_API - - /** - * Gets the list of Unicode keywords for the specified locale. - * - * @param status the status code - * @return pointer to StringEnumeration class, or NULL if there are no keywords. - * Client must dispose of it by calling delete. - * @see getUnicodeKeywords - * @draft ICU 63 - */ - StringEnumeration * createUnicodeKeywords(UErrorCode &status) const; - - /** - * Gets the set of keywords for this Locale. - * - * A wrapper to call createKeywords() and write the resulting - * keywords as standard strings (or compatible objects) into any kind of - * container that can be written to by an STL style output iterator. - * - * @param iterator an STL style output iterator to write the keywords to. - * @param status error information if creating set of keywords failed. - * @draft ICU 63 - */ - template - inline void getKeywords(OutputIterator iterator, UErrorCode& status) const; - - /** - * Gets the set of Unicode keywords for this Locale. - * - * A wrapper to call createUnicodeKeywords() and write the resulting - * keywords as standard strings (or compatible objects) into any kind of - * container that can be written to by an STL style output iterator. - * - * @param iterator an STL style output iterator to write the keywords to. - * @param status error information if creating set of keywords failed. - * @draft ICU 63 - */ - template - inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const; - -#endif // U_HIDE_DRAFT_API - - /** - * Gets the value for a keyword. - * - * This uses legacy keyword=value pairs, like "collation=phonebook". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword for which we want the value. Case insensitive. - * @param buffer The buffer to receive the keyword value. - * @param bufferCapacity The capacity of receiving buffer - * @param status Returns any error information while performing this operation. - * @return the length of the keyword value - * - * @stable ICU 2.8 - */ - int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const; - -#ifndef U_HIDE_DRAFT_API - /** - * Gets the value for a keyword. - * - * This uses legacy keyword=value pairs, like "collation=phonebook". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword for which we want the value. - * @param sink the sink to receive the keyword value. - * @param status error information if getting the value failed. - * @draft ICU 63 - */ - void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; - - /** - * Gets the value for a keyword. - * - * This uses legacy keyword=value pairs, like "collation=phonebook". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword for which we want the value. - * @param status error information if getting the value failed. - * @return the keyword value. - * @draft ICU 63 - */ - template - inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const; - - /** - * Gets the Unicode value for a Unicode keyword. - * - * This uses Unicode key-value pairs, like "co-phonebk". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword for which we want the value. - * @param sink the sink to receive the keyword value. - * @param status error information if getting the value failed. - * @draft ICU 63 - */ - void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; - - /** - * Gets the Unicode value for a Unicode keyword. - * - * This uses Unicode key-value pairs, like "co-phonebk". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword for which we want the value. - * @param status error information if getting the value failed. - * @return the keyword value. - * @draft ICU 63 - */ - template - inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const; -#endif // U_HIDE_DRAFT_API - - /** - * Sets or removes the value for a keyword. - * - * For removing all keywords, use getBaseName(), - * and construct a new Locale if it differs from getName(). - * - * This uses legacy keyword=value pairs, like "collation=phonebook". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword to be set. Case insensitive. - * @param keywordValue value of the keyword to be set. If 0-length or - * NULL, will result in the keyword being removed. No error is given if - * that keyword does not exist. - * @param status Returns any error information while performing this operation. - * - * @stable ICU 49 - */ - void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status); - -#ifndef U_HIDE_DRAFT_API - /** - * Sets or removes the value for a keyword. - * - * For removing all keywords, use getBaseName(), - * and construct a new Locale if it differs from getName(). - * - * This uses legacy keyword=value pairs, like "collation=phonebook". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword to be set. - * @param keywordValue value of the keyword to be set. If 0-length or - * NULL, will result in the keyword being removed. No error is given if - * that keyword does not exist. - * @param status Returns any error information while performing this operation. - * @draft ICU 63 - */ - void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); - - /** - * Sets or removes the Unicode value for a Unicode keyword. - * - * For removing all keywords, use getBaseName(), - * and construct a new Locale if it differs from getName(). - * - * This uses Unicode key-value pairs, like "co-phonebk". - * - * ICU4C doesn't do automatic conversion between legacy and Unicode - * keywords and values in getters and setters (as opposed to ICU4J). - * - * @param keywordName name of the keyword to be set. - * @param keywordValue value of the keyword to be set. If 0-length or - * NULL, will result in the keyword being removed. No error is given if - * that keyword does not exist. - * @param status Returns any error information while performing this operation. - * @draft ICU 63 - */ - void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); -#endif // U_HIDE_DRAFT_API - - /** - * returns the locale's three-letter language code, as specified - * in ISO draft standard ISO-639-2. - * @return An alias to the code, or an empty string - * @stable ICU 2.0 - */ - const char * getISO3Language() const; - - /** - * Fills in "name" with the locale's three-letter ISO-3166 country code. - * @return An alias to the code, or an empty string - * @stable ICU 2.0 - */ - const char * getISO3Country() const; - - /** - * Returns the Windows LCID value corresponding to this locale. - * This value is stored in the resource data for the locale as a one-to-four-digit - * hexadecimal number. If the resource is missing, in the wrong format, or - * there is no Windows LCID value that corresponds to this locale, returns 0. - * @stable ICU 2.0 - */ - uint32_t getLCID(void) const; - - /** - * Returns whether this locale's script is written right-to-left. - * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). - * If no likely script is known, then FALSE is returned. - * - * A script is right-to-left according to the CLDR script metadata - * which corresponds to whether the script's letters have Bidi_Class=R or AL. - * - * Returns TRUE for "ar" and "en-Hebr", FALSE for "zh" and "fa-Cyrl". - * - * @return TRUE if the locale's script is written right-to-left - * @stable ICU 54 - */ - UBool isRightToLeft() const; - - /** - * Fills in "dispLang" with the name of this locale's language in a format suitable for - * user display in the default locale. For example, if the locale's language code is - * "fr" and the default locale's language code is "en", this function would set - * dispLang to "French". - * @param dispLang Receives the language's display name. - * @return A reference to "dispLang". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const; - - /** - * Fills in "dispLang" with the name of this locale's language in a format suitable for - * user display in the locale specified by "displayLocale". For example, if the locale's - * language code is "en" and displayLocale's language code is "fr", this function would set - * dispLang to "Anglais". - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * displayLocale would result in "Anglais", while passing Locale::getGerman() - * for displayLocale would result in "Englisch". - * @param dispLang Receives the language's display name. - * @return A reference to "dispLang". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayLanguage( const Locale& displayLocale, - UnicodeString& dispLang) const; - - /** - * Fills in "dispScript" with the name of this locale's script in a format suitable - * for user display in the default locale. For example, if the locale's script code - * is "LATN" and the default locale's language code is "en", this function would set - * dispScript to "Latin". - * @param dispScript Receives the scripts's display name. - * @return A reference to "dispScript". - * @stable ICU 2.8 - */ - UnicodeString& getDisplayScript( UnicodeString& dispScript) const; - - /** - * Fills in "dispScript" with the name of this locale's country in a format suitable - * for user display in the locale specified by "displayLocale". For example, if the locale's - * script code is "LATN" and displayLocale's language code is "en", this function would set - * dispScript to "Latin". - * @param displayLocale Specifies the locale to be used to display the name. In other - * words, if the locale's script code is "LATN", passing - * Locale::getFrench() for displayLocale would result in "", while - * passing Locale::getGerman() for displayLocale would result in - * "". - * @param dispScript Receives the scripts's display name. - * @return A reference to "dispScript". - * @stable ICU 2.8 - */ - UnicodeString& getDisplayScript( const Locale& displayLocale, - UnicodeString& dispScript) const; - - /** - * Fills in "dispCountry" with the name of this locale's country in a format suitable - * for user display in the default locale. For example, if the locale's country code - * is "FR" and the default locale's language code is "en", this function would set - * dispCountry to "France". - * @param dispCountry Receives the country's display name. - * @return A reference to "dispCountry". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const; - - /** - * Fills in "dispCountry" with the name of this locale's country in a format suitable - * for user display in the locale specified by "displayLocale". For example, if the locale's - * country code is "US" and displayLocale's language code is "fr", this function would set - * dispCountry to "États-Unis". - * @param displayLocale Specifies the locale to be used to display the name. In other - * words, if the locale's country code is "US", passing - * Locale::getFrench() for displayLocale would result in "États-Unis", while - * passing Locale::getGerman() for displayLocale would result in - * "Vereinigte Staaten". - * @param dispCountry Receives the country's display name. - * @return A reference to "dispCountry". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayCountry( const Locale& displayLocale, - UnicodeString& dispCountry) const; - - /** - * Fills in "dispVar" with the name of this locale's variant code in a format suitable - * for user display in the default locale. - * @param dispVar Receives the variant's name. - * @return A reference to "dispVar". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayVariant( UnicodeString& dispVar) const; - - /** - * Fills in "dispVar" with the name of this locale's variant code in a format - * suitable for user display in the locale specified by "displayLocale". - * @param displayLocale Specifies the locale to be used to display the name. - * @param dispVar Receives the variant's display name. - * @return A reference to "dispVar". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayVariant( const Locale& displayLocale, - UnicodeString& dispVar) const; - - /** - * Fills in "name" with the name of this locale in a format suitable for user display - * in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(), - * and getDisplayVariant() to do its work, and outputs the display name in the format - * "language (country[,variant])". For example, if the default locale is en_US, then - * fr_FR's display name would be "French (France)", and es_MX_Traditional's display name - * would be "Spanish (Mexico,Traditional)". - * @param name Receives the locale's display name. - * @return A reference to "name". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayName( UnicodeString& name) const; - - /** - * Fills in "name" with the name of this locale in a format suitable for user display - * in the locale specified by "displayLocale". This function uses getDisplayLanguage(), - * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display - * name in the format "language (country[,variant])". For example, if displayLocale is - * fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's - * display name would be "norvégien (Norvège,NY)". - * @param displayLocale Specifies the locale to be used to display the name. - * @param name Receives the locale's display name. - * @return A reference to "name". - * @stable ICU 2.0 - */ - UnicodeString& getDisplayName( const Locale& displayLocale, - UnicodeString& name) const; - - /** - * Generates a hash code for the locale. - * @stable ICU 2.0 - */ - int32_t hashCode(void) const; - - /** - * Sets the locale to bogus - * A bogus locale represents a non-existing locale associated - * with services that can be instantiated from non-locale data - * in addition to locale (for example, collation can be - * instantiated from a locale and from a rule set). - * @stable ICU 2.1 - */ - void setToBogus(); - - /** - * Gets the bogus state. Locale object can be bogus if it doesn't exist - * @return FALSE if it is a real locale, TRUE if it is a bogus locale - * @stable ICU 2.1 - */ - UBool isBogus(void) const; - - /** - * Returns a list of all installed locales. - * @param count Receives the number of locales in the list. - * @return A pointer to an array of Locale objects. This array is the list - * of all locales with installed resource files. The called does NOT - * get ownership of this list, and must NOT delete it. - * @stable ICU 2.0 - */ - static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); - - /** - * Gets a list of all available 2-letter country codes defined in ISO 3166. This is a - * pointer to an array of pointers to arrays of char. All of these pointers are - * owned by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - * @return a list of all available country codes - * @stable ICU 2.0 - */ - static const char* const* U_EXPORT2 getISOCountries(); - - /** - * Gets a list of all available language codes defined in ISO 639. This is a pointer - * to an array of pointers to arrays of char. All of these pointers are owned - * by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - * @return a list of all available language codes - * @stable ICU 2.0 - */ - static const char* const* U_EXPORT2 getISOLanguages(); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - -protected: /* only protected for testing purposes. DO NOT USE. */ -#ifndef U_HIDE_INTERNAL_API - /** - * Set this from a single POSIX style locale string. - * @internal - */ - void setFromPOSIXID(const char *posixID); -#endif /* U_HIDE_INTERNAL_API */ - -private: - /** - * Initialize the locale object with a new name. - * Was deprecated - used in implementation - moved internal - * - * @param cLocaleID The new locale name. - * @param canonicalize whether to call uloc_canonicalize on cLocaleID - */ - Locale& init(const char* cLocaleID, UBool canonicalize); - - /* - * Internal constructor to allow construction of a locale object with - * NO side effects. (Default constructor tries to get - * the default locale.) - */ - enum ELocaleType { - eBOGUS - }; - Locale(ELocaleType); - - /** - * Initialize the locale cache for commonly used locales - */ - static Locale *getLocaleCache(void); - - char language[ULOC_LANG_CAPACITY]; - char script[ULOC_SCRIPT_CAPACITY]; - char country[ULOC_COUNTRY_CAPACITY]; - int32_t variantBegin; - char* fullName; - char fullNameBuffer[ULOC_FULLNAME_CAPACITY]; - // name without keywords - char* baseName; - void initBaseName(UErrorCode& status); - - UBool fIsBogus; - - static const Locale &getLocale(int locid); - - /** - * A friend to allow the default locale to be set by either the C or C++ API. - * @internal (private) - */ - friend Locale *locale_set_default_internal(const char *, UErrorCode& status); - - /** - * @internal (private) - */ - friend void U_CALLCONV locale_available_init(); -}; - -inline UBool -Locale::operator!=(const Locale& other) const -{ - return !operator==(other); -} - -#ifndef U_HIDE_DRAFT_API -template inline StringClass -Locale::toLanguageTag(UErrorCode& status) const -{ - StringClass result; - StringByteSink sink(&result); - toLanguageTag(sink, status); - return result; -} -#endif // U_HIDE_DRAFT_API - -inline const char * -Locale::getCountry() const -{ - return country; -} - -inline const char * -Locale::getLanguage() const -{ - return language; -} - -inline const char * -Locale::getScript() const -{ - return script; -} - -inline const char * -Locale::getVariant() const -{ - return &baseName[variantBegin]; -} - -inline const char * -Locale::getName() const -{ - return fullName; -} - -#ifndef U_HIDE_DRAFT_API - -template inline void -Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const -{ - LocalPointer keys(createKeywords(status)); - if (U_FAILURE(status)) { - return; - } - for (;;) { - int32_t resultLength; - const char* buffer = keys->next(&resultLength, status); - if (U_FAILURE(status) || buffer == nullptr) { - return; - } - *iterator++ = StringClass(buffer, resultLength); - } -} - -template inline void -Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const -{ - LocalPointer keys(createUnicodeKeywords(status)); - if (U_FAILURE(status)) { - return; - } - for (;;) { - int32_t resultLength; - const char* buffer = keys->next(&resultLength, status); - if (U_FAILURE(status) || buffer == nullptr) { - return; - } - *iterator++ = StringClass(buffer, resultLength); - } -} - -template inline StringClass -Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const -{ - StringClass result; - StringByteSink sink(&result); - getKeywordValue(keywordName, sink, status); - return result; -} - -template inline StringClass -Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const -{ - StringClass result; - StringByteSink sink(&result); - getUnicodeKeywordValue(keywordName, sink, status); - return result; -} - -#endif // U_HIDE_DRAFT_API - -inline UBool -Locale::isBogus(void) const { - return fIsBogus; -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/messagepattern.h b/deps/node/deps/icu-small/source/common/unicode/messagepattern.h deleted file mode 100644 index 9f2a8655..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/messagepattern.h +++ /dev/null @@ -1,945 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2013, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: messagepattern.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011mar14 -* created by: Markus W. Scherer -*/ - -#ifndef __MESSAGEPATTERN_H__ -#define __MESSAGEPATTERN_H__ - -/** - * \file - * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/parseerr.h" -#include "unicode/unistr.h" - -/** - * Mode for when an apostrophe starts quoted literal text for MessageFormat output. - * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h - * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). - *

- * A pair of adjacent apostrophes always results in a single apostrophe in the output, - * even when the pair is between two single, text-quoting apostrophes. - *

- * The following table shows examples of desired MessageFormat.format() output - * with the pattern strings that yield that output. - *

- * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
Desired outputDOUBLE_OPTIONALDOUBLE_REQUIRED
I see {many}I see '{many}'(same)
I said {'Wow!'}I said '{''Wow!''}'(same)
I don't knowI don't know OR
I don''t know
I don''t know
- * @stable ICU 4.8 - * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE - */ -enum UMessagePatternApostropheMode { - /** - * A literal apostrophe is represented by - * either a single or a double apostrophe pattern character. - * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text - * if it immediately precedes a curly brace {}, - * or a pipe symbol | if inside a choice format, - * or a pound symbol # if inside a plural format. - *

- * This is the default behavior starting with ICU 4.8. - * @stable ICU 4.8 - */ - UMSGPAT_APOS_DOUBLE_OPTIONAL, - /** - * A literal apostrophe must be represented by - * a double apostrophe pattern character. - * A single apostrophe always starts quoted literal text. - *

- * This is the behavior of ICU 4.6 and earlier, and of the JDK. - * @stable ICU 4.8 - */ - UMSGPAT_APOS_DOUBLE_REQUIRED -}; -/** - * @stable ICU 4.8 - */ -typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; - -/** - * MessagePattern::Part type constants. - * @stable ICU 4.8 - */ -enum UMessagePatternPartType { - /** - * Start of a message pattern (main or nested). - * The length is 0 for the top-level message - * and for a choice argument sub-message, otherwise 1 for the '{'. - * The value indicates the nesting level, starting with 0 for the main message. - *

- * There is always a later MSG_LIMIT part. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_MSG_START, - /** - * End of a message pattern (main or nested). - * The length is 0 for the top-level message and - * the last sub-message of a choice argument, - * otherwise 1 for the '}' or (in a choice argument style) the '|'. - * The value indicates the nesting level, starting with 0 for the main message. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_MSG_LIMIT, - /** - * Indicates a substring of the pattern string which is to be skipped when formatting. - * For example, an apostrophe that begins or ends quoted text - * would be indicated with such a part. - * The value is undefined and currently always 0. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_SKIP_SYNTAX, - /** - * Indicates that a syntax character needs to be inserted for auto-quoting. - * The length is 0. - * The value is the character code of the insertion character. (U+0027=APOSTROPHE) - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_INSERT_CHAR, - /** - * Indicates a syntactic (non-escaped) # symbol in a plural variant. - * When formatting, replace this part's substring with the - * (value-offset) for the plural argument value. - * The value is undefined and currently always 0. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_REPLACE_NUMBER, - /** - * Start of an argument. - * The length is 1 for the '{'. - * The value is the ordinal value of the ArgType. Use getArgType(). - *

- * This part is followed by either an ARG_NUMBER or ARG_NAME, - * followed by optional argument sub-parts (see UMessagePatternArgType constants) - * and finally an ARG_LIMIT part. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_START, - /** - * End of an argument. - * The length is 1 for the '}'. - * The value is the ordinal value of the ArgType. Use getArgType(). - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_LIMIT, - /** - * The argument number, provided by the value. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_NUMBER, - /** - * The argument name. - * The value is undefined and currently always 0. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_NAME, - /** - * The argument type. - * The value is undefined and currently always 0. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_TYPE, - /** - * The argument style text. - * The value is undefined and currently always 0. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_STYLE, - /** - * A selector substring in a "complex" argument style. - * The value is undefined and currently always 0. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_SELECTOR, - /** - * An integer value, for example the offset or an explicit selector value - * in a PluralFormat style. - * The part value is the integer value. - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_INT, - /** - * A numeric value, for example the offset or an explicit selector value - * in a PluralFormat style. - * The part value is an index into an internal array of numeric values; - * use getNumericValue(). - * @stable ICU 4.8 - */ - UMSGPAT_PART_TYPE_ARG_DOUBLE -}; -/** - * @stable ICU 4.8 - */ -typedef enum UMessagePatternPartType UMessagePatternPartType; - -/** - * Argument type constants. - * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. - * - * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, - * with a nesting level one greater than the surrounding message. - * @stable ICU 4.8 - */ -enum UMessagePatternArgType { - /** - * The argument has no specified type. - * @stable ICU 4.8 - */ - UMSGPAT_ARG_TYPE_NONE, - /** - * The argument has a "simple" type which is provided by the ARG_TYPE part. - * An ARG_STYLE part might follow that. - * @stable ICU 4.8 - */ - UMSGPAT_ARG_TYPE_SIMPLE, - /** - * The argument is a ChoiceFormat with one or more - * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. - * @stable ICU 4.8 - */ - UMSGPAT_ARG_TYPE_CHOICE, - /** - * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset - * (e.g., offset:1) - * and one or more (ARG_SELECTOR [explicit-value] message) tuples. - * If the selector has an explicit value (e.g., =2), then - * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. - * Otherwise the message immediately follows the ARG_SELECTOR. - * @stable ICU 4.8 - */ - UMSGPAT_ARG_TYPE_PLURAL, - /** - * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. - * @stable ICU 4.8 - */ - UMSGPAT_ARG_TYPE_SELECT, - /** - * The argument is an ordinal-number PluralFormat - * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. - * @stable ICU 50 - */ - UMSGPAT_ARG_TYPE_SELECTORDINAL -}; -/** - * @stable ICU 4.8 - */ -typedef enum UMessagePatternArgType UMessagePatternArgType; - -/** - * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE - * Returns TRUE if the argument type has a plural style part sequence and semantics, - * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. - * @stable ICU 50 - */ -#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ - ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) - -enum { - /** - * Return value from MessagePattern.validateArgumentName() for when - * the string is a valid "pattern identifier" but not a number. - * @stable ICU 4.8 - */ - UMSGPAT_ARG_NAME_NOT_NUMBER=-1, - - /** - * Return value from MessagePattern.validateArgumentName() for when - * the string is invalid. - * It might not be a valid "pattern identifier", - * or it have only ASCII digits but there is a leading zero or the number is too large. - * @stable ICU 4.8 - */ - UMSGPAT_ARG_NAME_NOT_VALID=-2 -}; - -/** - * Special value that is returned by getNumericValue(Part) when no - * numeric value is defined for a part. - * @see MessagePattern.getNumericValue() - * @stable ICU 4.8 - */ -#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) - -U_NAMESPACE_BEGIN - -class MessagePatternDoubleList; -class MessagePatternPartsList; - -/** - * Parses and represents ICU MessageFormat patterns. - * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. - * Used in the implementations of those classes as well as in tools - * for message validation, translation and format conversion. - *

- * The parser handles all syntax relevant for identifying message arguments. - * This includes "complex" arguments whose style strings contain - * nested MessageFormat pattern substrings. - * For "simple" arguments (with no nested MessageFormat pattern substrings), - * the argument style is not parsed any further. - *

- * The parser handles named and numbered message arguments and allows both in one message. - *

- * Once a pattern has been parsed successfully, iterate through the parsed data - * with countParts(), getPart() and related methods. - *

- * The data logically represents a parse tree, but is stored and accessed - * as a list of "parts" for fast and simple parsing and to minimize object allocations. - * Arguments and nested messages are best handled via recursion. - * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns - * the index of the corresponding _LIMIT "part". - *

- * List of "parts": - *

- * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
- * argument = noneArg | simpleArg | complexArg
- * complexArg = choiceArg | pluralArg | selectArg
- *
- * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
- * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
- * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
- * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
- * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
- *
- * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
- * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
- * selectStyle = (ARG_SELECTOR message)+
- * 
- *
    - *
  • Literal output text is not represented directly by "parts" but accessed - * between parts of a message, from one part's getLimit() to the next part's getIndex(). - *
  • ARG_START.CHOICE stands for an ARG_START Part with ArgType CHOICE. - *
  • In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or - * the less-than-or-equal-to sign (U+2264). - *
  • In the pluralStyle, the first, optional numeric Part has the "offset:" value. - * The optional numeric Part between each (ARG_SELECTOR, message) pair - * is the value of an explicit-number selector like "=2", - * otherwise the selector is a non-numeric identifier. - *
  • The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. - *
- *

- * This class is not intended for public subclassing. - * - * @stable ICU 4.8 - */ -class U_COMMON_API MessagePattern : public UObject { -public: - /** - * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 4.8 - */ - MessagePattern(UErrorCode &errorCode); - - /** - * Constructs an empty MessagePattern. - * @param mode Explicit UMessagePatternApostropheMode. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 4.8 - */ - MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); - - /** - * Constructs a MessagePattern with default UMessagePatternApostropheMode and - * parses the MessageFormat pattern string. - * @param pattern a MessageFormat pattern string - * @param parseError Struct to receive information on the position - * of an error within the pattern. - * Can be NULL. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * TODO: turn @throws into UErrorCode specifics? - * @throws IllegalArgumentException for syntax errors in the pattern string - * @throws IndexOutOfBoundsException if certain limits are exceeded - * (e.g., argument number too high, argument name too long, etc.) - * @throws NumberFormatException if a number could not be parsed - * @stable ICU 4.8 - */ - MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); - - /** - * Copy constructor. - * @param other Object to copy. - * @stable ICU 4.8 - */ - MessagePattern(const MessagePattern &other); - - /** - * Assignment operator. - * @param other Object to copy. - * @return *this=other - * @stable ICU 4.8 - */ - MessagePattern &operator=(const MessagePattern &other); - - /** - * Destructor. - * @stable ICU 4.8 - */ - virtual ~MessagePattern(); - - /** - * Parses a MessageFormat pattern string. - * @param pattern a MessageFormat pattern string - * @param parseError Struct to receive information on the position - * of an error within the pattern. - * Can be NULL. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return *this - * @throws IllegalArgumentException for syntax errors in the pattern string - * @throws IndexOutOfBoundsException if certain limits are exceeded - * (e.g., argument number too high, argument name too long, etc.) - * @throws NumberFormatException if a number could not be parsed - * @stable ICU 4.8 - */ - MessagePattern &parse(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode); - - /** - * Parses a ChoiceFormat pattern string. - * @param pattern a ChoiceFormat pattern string - * @param parseError Struct to receive information on the position - * of an error within the pattern. - * Can be NULL. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return *this - * @throws IllegalArgumentException for syntax errors in the pattern string - * @throws IndexOutOfBoundsException if certain limits are exceeded - * (e.g., argument number too high, argument name too long, etc.) - * @throws NumberFormatException if a number could not be parsed - * @stable ICU 4.8 - */ - MessagePattern &parseChoiceStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode); - - /** - * Parses a PluralFormat pattern string. - * @param pattern a PluralFormat pattern string - * @param parseError Struct to receive information on the position - * of an error within the pattern. - * Can be NULL. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return *this - * @throws IllegalArgumentException for syntax errors in the pattern string - * @throws IndexOutOfBoundsException if certain limits are exceeded - * (e.g., argument number too high, argument name too long, etc.) - * @throws NumberFormatException if a number could not be parsed - * @stable ICU 4.8 - */ - MessagePattern &parsePluralStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode); - - /** - * Parses a SelectFormat pattern string. - * @param pattern a SelectFormat pattern string - * @param parseError Struct to receive information on the position - * of an error within the pattern. - * Can be NULL. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return *this - * @throws IllegalArgumentException for syntax errors in the pattern string - * @throws IndexOutOfBoundsException if certain limits are exceeded - * (e.g., argument number too high, argument name too long, etc.) - * @throws NumberFormatException if a number could not be parsed - * @stable ICU 4.8 - */ - MessagePattern &parseSelectStyle(const UnicodeString &pattern, - UParseError *parseError, UErrorCode &errorCode); - - /** - * Clears this MessagePattern. - * countParts() will return 0. - * @stable ICU 4.8 - */ - void clear(); - - /** - * Clears this MessagePattern and sets the UMessagePatternApostropheMode. - * countParts() will return 0. - * @param mode The new UMessagePatternApostropheMode. - * @stable ICU 4.8 - */ - void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { - clear(); - aposMode=mode; - } - - /** - * @param other another object to compare with. - * @return TRUE if this object is equivalent to the other one. - * @stable ICU 4.8 - */ - UBool operator==(const MessagePattern &other) const; - - /** - * @param other another object to compare with. - * @return FALSE if this object is equivalent to the other one. - * @stable ICU 4.8 - */ - inline UBool operator!=(const MessagePattern &other) const { - return !operator==(other); - } - - /** - * @return A hash code for this object. - * @stable ICU 4.8 - */ - int32_t hashCode() const; - - /** - * @return this instance's UMessagePatternApostropheMode. - * @stable ICU 4.8 - */ - UMessagePatternApostropheMode getApostropheMode() const { - return aposMode; - } - - // Java has package-private jdkAposMode() here. - // In C++, this is declared in the MessageImpl class. - - /** - * @return the parsed pattern string (null if none was parsed). - * @stable ICU 4.8 - */ - const UnicodeString &getPatternString() const { - return msg; - } - - /** - * Does the parsed pattern have named arguments like {first_name}? - * @return TRUE if the parsed pattern has at least one named argument. - * @stable ICU 4.8 - */ - UBool hasNamedArguments() const { - return hasArgNames; - } - - /** - * Does the parsed pattern have numbered arguments like {2}? - * @return TRUE if the parsed pattern has at least one numbered argument. - * @stable ICU 4.8 - */ - UBool hasNumberedArguments() const { - return hasArgNumbers; - } - - /** - * Validates and parses an argument name or argument number string. - * An argument name must be a "pattern identifier", that is, it must contain - * no Unicode Pattern_Syntax or Pattern_White_Space characters. - * If it only contains ASCII digits, then it must be a small integer with no leading zero. - * @param name Input string. - * @return >=0 if the name is a valid number, - * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, - * ARG_NAME_NOT_VALID (-2) if it is neither. - * @stable ICU 4.8 - */ - static int32_t validateArgumentName(const UnicodeString &name); - - /** - * Returns a version of the parsed pattern string where each ASCII apostrophe - * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. - *

- * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." - * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." - * @return the deep-auto-quoted version of the parsed pattern string. - * @see MessageFormat.autoQuoteApostrophe() - * @stable ICU 4.8 - */ - UnicodeString autoQuoteApostropheDeep() const; - - class Part; - - /** - * Returns the number of "parts" created by parsing the pattern string. - * Returns 0 if no pattern has been parsed or clear() was called. - * @return the number of pattern parts. - * @stable ICU 4.8 - */ - int32_t countParts() const { - return partsLength; - } - - /** - * Gets the i-th pattern "part". - * @param i The index of the Part data. (0..countParts()-1) - * @return the i-th pattern "part". - * @stable ICU 4.8 - */ - const Part &getPart(int32_t i) const { - return parts[i]; - } - - /** - * Returns the UMessagePatternPartType of the i-th pattern "part". - * Convenience method for getPart(i).getType(). - * @param i The index of the Part data. (0..countParts()-1) - * @return The UMessagePatternPartType of the i-th Part. - * @stable ICU 4.8 - */ - UMessagePatternPartType getPartType(int32_t i) const { - return getPart(i).type; - } - - /** - * Returns the pattern index of the specified pattern "part". - * Convenience method for getPart(partIndex).getIndex(). - * @param partIndex The index of the Part data. (0..countParts()-1) - * @return The pattern index of this Part. - * @stable ICU 4.8 - */ - int32_t getPatternIndex(int32_t partIndex) const { - return getPart(partIndex).index; - } - - /** - * Returns the substring of the pattern string indicated by the Part. - * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). - * @param part a part of this MessagePattern. - * @return the substring associated with part. - * @stable ICU 4.8 - */ - UnicodeString getSubstring(const Part &part) const { - return msg.tempSubString(part.index, part.length); - } - - /** - * Compares the part's substring with the input string s. - * @param part a part of this MessagePattern. - * @param s a string. - * @return TRUE if getSubstring(part).equals(s). - * @stable ICU 4.8 - */ - UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { - return 0==msg.compare(part.index, part.length, s); - } - - /** - * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. - * @param part a part of this MessagePattern. - * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. - * @stable ICU 4.8 - */ - double getNumericValue(const Part &part) const; - - /** - * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. - * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) - * @return the "offset:" value. - * @stable ICU 4.8 - */ - double getPluralOffset(int32_t pluralStart) const; - - /** - * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. - * @param start The index of some Part data (0..countParts()-1); - * this Part should be of Type ARG_START or MSG_START. - * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, - * or start itself if getPartType(msgStart)!=ARG|MSG_START. - * @stable ICU 4.8 - */ - int32_t getLimitPartIndex(int32_t start) const { - int32_t limit=getPart(start).limitPartIndex; - if(limit parts=new ArrayList(); - MessagePatternPartsList *partsList; - Part *parts; - int32_t partsLength; - // ArrayList numericValues; - MessagePatternDoubleList *numericValuesList; - double *numericValues; - int32_t numericValuesLength; - UBool hasArgNames; - UBool hasArgNumbers; - UBool needsAutoQuoting; -}; - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_FORMATTING - -#endif // __MESSAGEPATTERN_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/normalizer2.h b/deps/node/deps/icu-small/source/common/unicode/normalizer2.h deleted file mode 100644 index 4caa0e31..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/normalizer2.h +++ /dev/null @@ -1,774 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: normalizer2.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009nov22 -* created by: Markus W. Scherer -*/ - -#ifndef __NORMALIZER2_H__ -#define __NORMALIZER2_H__ - -/** - * \file - * \brief C++ API: New API for Unicode Normalization. - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/stringpiece.h" -#include "unicode/uniset.h" -#include "unicode/unistr.h" -#include "unicode/unorm2.h" - -U_NAMESPACE_BEGIN - -class ByteSink; - -/** - * Unicode normalization functionality for standard Unicode normalization or - * for using custom mapping tables. - * All instances of this class are unmodifiable/immutable. - * Instances returned by getInstance() are singletons that must not be deleted by the caller. - * The Normalizer2 class is not intended for public subclassing. - * - * The primary functions are to produce a normalized string and to detect whether - * a string is already normalized. - * The most commonly used normalization forms are those defined in - * http://www.unicode.org/unicode/reports/tr15/ - * However, this API supports additional normalization forms for specialized purposes. - * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) - * and can be used in implementations of UTS #46. - * - * Not only are the standard compose and decompose modes supplied, - * but additional modes are provided as documented in the Mode enum. - * - * Some of the functions in this class identify normalization boundaries. - * At a normalization boundary, the portions of the string - * before it and starting from it do not interact and can be handled independently. - * - * The spanQuickCheckYes() stops at a normalization boundary. - * When the goal is a normalized string, then the text before the boundary - * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). - * - * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether - * a character is guaranteed to be at a normalization boundary, - * regardless of context. - * This is used for moving from one normalization boundary to the next - * or preceding boundary, and for performing iterative normalization. - * - * Iterative normalization is useful when only a small portion of a - * longer string needs to be processed. - * For example, in ICU, iterative normalization is used by the NormalizationTransliterator - * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() - * (to process only the substring for which sort key bytes are computed). - * - * The set of normalization boundaries returned by these functions may not be - * complete: There may be more boundaries that could be returned. - * Different functions may return different boundaries. - * @stable ICU 4.4 - */ -class U_COMMON_API Normalizer2 : public UObject { -public: - /** - * Destructor. - * @stable ICU 4.4 - */ - ~Normalizer2(); - - /** - * Returns a Normalizer2 instance for Unicode NFC normalization. - * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ - static const Normalizer2 * - getNFCInstance(UErrorCode &errorCode); - - /** - * Returns a Normalizer2 instance for Unicode NFD normalization. - * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ - static const Normalizer2 * - getNFDInstance(UErrorCode &errorCode); - - /** - * Returns a Normalizer2 instance for Unicode NFKC normalization. - * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ - static const Normalizer2 * - getNFKCInstance(UErrorCode &errorCode); - - /** - * Returns a Normalizer2 instance for Unicode NFKD normalization. - * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ - static const Normalizer2 * - getNFKDInstance(UErrorCode &errorCode); - - /** - * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. - * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ - static const Normalizer2 * - getNFKCCasefoldInstance(UErrorCode &errorCode); - - /** - * Returns a Normalizer2 instance which uses the specified data file - * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) - * and which composes or decomposes text according to the specified mode. - * Returns an unmodifiable singleton instance. Do not delete it. - * - * Use packageName=NULL for data files that are part of ICU's own data. - * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. - * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. - * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. - * - * @param packageName NULL for ICU built-in data, otherwise application data package name - * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file - * @param mode normalization mode (compose or decompose etc.) - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 4.4 - */ - static const Normalizer2 * - getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode &errorCode); - - /** - * Returns the normalized form of the source string. - * @param src source string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return normalized src - * @stable ICU 4.4 - */ - UnicodeString - normalize(const UnicodeString &src, UErrorCode &errorCode) const { - UnicodeString result; - normalize(src, result, errorCode); - return result; - } - /** - * Writes the normalized form of the source string to the destination string - * (replacing its contents) and returns the destination string. - * The source and destination strings must be different objects. - * @param src source string - * @param dest destination string; its contents is replaced with normalized src - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.4 - */ - virtual UnicodeString & - normalize(const UnicodeString &src, - UnicodeString &dest, - UErrorCode &errorCode) const = 0; - - /** - * Normalizes a UTF-8 string and optionally records how source substrings - * relate to changed and unchanged result substrings. - * - * Currently implemented completely only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * Otherwise currently converts to & from UTF-16 and does not support edits. - * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src Source UTF-8 string. - * @param sink A ByteSink to which the normalized UTF-8 result string is written. - * sink.Flush() is called at the end. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be nullptr. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 60 - */ - virtual void - normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, - Edits *edits, UErrorCode &errorCode) const; - - /** - * Appends the normalized form of the second string to the first string - * (merging them at the boundary) and returns the first string. - * The result is normalized if the first string was normalized. - * The first and second strings must be different objects. - * @param first string, should be normalized - * @param second string, will be normalized - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ - virtual UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const = 0; - /** - * Appends the second string to the first string - * (merging them at the boundary) and returns the first string. - * The result is normalized if both the strings were normalized. - * The first and second strings must be different objects. - * @param first string, should be normalized - * @param second string, should be normalized - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ - virtual UnicodeString & - append(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const = 0; - - /** - * Gets the decomposition mapping of c. - * Roughly equivalent to normalizing the String form of c - * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function - * returns FALSE and does not write a string - * if c does not have a decomposition mapping in this instance's data. - * This function is independent of the mode of the Normalizer2. - * @param c code point - * @param decomposition String object which will be set to c's - * decomposition mapping, if there is one. - * @return TRUE if c has a decomposition, otherwise FALSE - * @stable ICU 4.6 - */ - virtual UBool - getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; - - /** - * Gets the raw decomposition mapping of c. - * - * This is similar to the getDecomposition() method but returns the - * raw decomposition mapping as specified in UnicodeData.txt or - * (for custom data) in the mapping files processed by the gennorm2 tool. - * By contrast, getDecomposition() returns the processed, - * recursively-decomposed version of this mapping. - * - * When used on a standard NFKC Normalizer2 instance, - * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. - * - * When used on a standard NFC Normalizer2 instance, - * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); - * in this case, the result contains either one or two code points (=1..4 char16_ts). - * - * This function is independent of the mode of the Normalizer2. - * The default implementation returns FALSE. - * @param c code point - * @param decomposition String object which will be set to c's - * raw decomposition mapping, if there is one. - * @return TRUE if c has a decomposition, otherwise FALSE - * @stable ICU 49 - */ - virtual UBool - getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; - - /** - * Performs pairwise composition of a & b and returns the composite if there is one. - * - * Returns a composite code point c only if c has a two-way mapping to a+b. - * In standard Unicode normalization, this means that - * c has a canonical decomposition to a+b - * and c does not have the Full_Composition_Exclusion property. - * - * This function is independent of the mode of the Normalizer2. - * The default implementation returns a negative value. - * @param a A (normalization starter) code point. - * @param b Another code point. - * @return The non-negative composite code point if there is one; otherwise a negative value. - * @stable ICU 49 - */ - virtual UChar32 - composePair(UChar32 a, UChar32 b) const; - - /** - * Gets the combining class of c. - * The default implementation returns 0 - * but all standard implementations return the Unicode Canonical_Combining_Class value. - * @param c code point - * @return c's combining class - * @stable ICU 49 - */ - virtual uint8_t - getCombiningClass(UChar32 c) const; - - /** - * Tests if the string is normalized. - * Internally, in cases where the quickCheck() method would return "maybe" - * (which is only possible for the two COMPOSE modes) this method - * resolves to "yes" or "no" to provide a definitive result, - * at the cost of doing more work in those cases. - * @param s input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if s is normalized - * @stable ICU 4.4 - */ - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; - /** - * Tests if the UTF-8 string is normalized. - * Internally, in cases where the quickCheck() method would return "maybe" - * (which is only possible for the two COMPOSE modes) this method - * resolves to "yes" or "no" to provide a definitive result, - * at the cost of doing more work in those cases. - * - * This works for all normalization modes, - * but it is currently optimized for UTF-8 only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * For other modes it currently converts to UTF-16 and calls isNormalized(). - * - * @param s UTF-8 input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if s is normalized - * @stable ICU 60 - */ - virtual UBool - isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const; - - - /** - * Tests if the string is normalized. - * For the two COMPOSE modes, the result could be "maybe" in cases that - * would take a little more work to resolve definitively. - * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster - * combination of quick check + normalization, to avoid - * re-checking the "yes" prefix. - * @param s input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return UNormalizationCheckResult - * @stable ICU 4.4 - */ - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; - - /** - * Returns the end of the normalized substring of the input string. - * In other words, with end=spanQuickCheckYes(s, ec); - * the substring UnicodeString(s, 0, end) - * will pass the quick check with a "yes" result. - * - * The returned end index is usually one or more characters before the - * "no" or "maybe" character: The end index is at a normalization boundary. - * (See the class documentation for more about normalization boundaries.) - * - * When the goal is a normalized string and most input strings are expected - * to be normalized already, then call this method, - * and if it returns a prefix shorter than the input string, - * copy that prefix and use normalizeSecondAndAppend() for the remainder. - * @param s input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return "yes" span end index - * @stable ICU 4.4 - */ - virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; - - /** - * Tests if the character always has a normalization boundary before it, - * regardless of context. - * If true, then the character does not normalization-interact with - * preceding characters. - * In other words, a string containing this character can be normalized - * by processing portions before this character and starting from this - * character independently. - * This is used for iterative normalization. See the class documentation for details. - * @param c character to test - * @return TRUE if c has a normalization boundary before it - * @stable ICU 4.4 - */ - virtual UBool hasBoundaryBefore(UChar32 c) const = 0; - - /** - * Tests if the character always has a normalization boundary after it, - * regardless of context. - * If true, then the character does not normalization-interact with - * following characters. - * In other words, a string containing this character can be normalized - * by processing portions up to this character and after this - * character independently. - * This is used for iterative normalization. See the class documentation for details. - * Note that this operation may be significantly slower than hasBoundaryBefore(). - * @param c character to test - * @return TRUE if c has a normalization boundary after it - * @stable ICU 4.4 - */ - virtual UBool hasBoundaryAfter(UChar32 c) const = 0; - - /** - * Tests if the character is normalization-inert. - * If true, then the character does not change, nor normalization-interact with - * preceding or following characters. - * In other words, a string containing this character can be normalized - * by processing portions before this character and after this - * character independently. - * This is used for iterative normalization. See the class documentation for details. - * Note that this operation may be significantly slower than hasBoundaryBefore(). - * @param c character to test - * @return TRUE if c is normalization-inert - * @stable ICU 4.4 - */ - virtual UBool isInert(UChar32 c) const = 0; -}; - -/** - * Normalization filtered by a UnicodeSet. - * Normalizes portions of the text contained in the filter set and leaves - * portions not contained in the filter set unchanged. - * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). - * Not-in-the-filter text is treated as "is normalized" and "quick check yes". - * This class implements all of (and only) the Normalizer2 API. - * An instance of this class is unmodifiable/immutable but is constructed and - * must be destructed by the owner. - * @stable ICU 4.4 - */ -class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { -public: - /** - * Constructs a filtered normalizer wrapping any Normalizer2 instance - * and a filter set. - * Both are aliased and must not be modified or deleted while this object - * is used. - * The filter set should be frozen; otherwise the performance will suffer greatly. - * @param n2 wrapped Normalizer2 instance - * @param filterSet UnicodeSet which determines the characters to be normalized - * @stable ICU 4.4 - */ - FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : - norm2(n2), set(filterSet) {} - - /** - * Destructor. - * @stable ICU 4.4 - */ - ~FilteredNormalizer2(); - - /** - * Writes the normalized form of the source string to the destination string - * (replacing its contents) and returns the destination string. - * The source and destination strings must be different objects. - * @param src source string - * @param dest destination string; its contents is replaced with normalized src - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.4 - */ - virtual UnicodeString & - normalize(const UnicodeString &src, - UnicodeString &dest, - UErrorCode &errorCode) const U_OVERRIDE; - - /** - * Normalizes a UTF-8 string and optionally records how source substrings - * relate to changed and unchanged result substrings. - * - * Currently implemented completely only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * Otherwise currently converts to & from UTF-16 and does not support edits. - * - * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. - * @param src Source UTF-8 string. - * @param sink A ByteSink to which the normalized UTF-8 result string is written. - * sink.Flush() is called at the end. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). - * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first unless - * options includes U_EDITS_NO_RESET. edits can be nullptr. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 60 - */ - virtual void - normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, - Edits *edits, UErrorCode &errorCode) const U_OVERRIDE; - - /** - * Appends the normalized form of the second string to the first string - * (merging them at the boundary) and returns the first string. - * The result is normalized if the first string was normalized. - * The first and second strings must be different objects. - * @param first string, should be normalized - * @param second string, will be normalized - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ - virtual UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const U_OVERRIDE; - /** - * Appends the second string to the first string - * (merging them at the boundary) and returns the first string. - * The result is normalized if both the strings were normalized. - * The first and second strings must be different objects. - * @param first string, should be normalized - * @param second string, should be normalized - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ - virtual UnicodeString & - append(UnicodeString &first, - const UnicodeString &second, - UErrorCode &errorCode) const U_OVERRIDE; - - /** - * Gets the decomposition mapping of c. - * For details see the base class documentation. - * - * This function is independent of the mode of the Normalizer2. - * @param c code point - * @param decomposition String object which will be set to c's - * decomposition mapping, if there is one. - * @return TRUE if c has a decomposition, otherwise FALSE - * @stable ICU 4.6 - */ - virtual UBool - getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE; - - /** - * Gets the raw decomposition mapping of c. - * For details see the base class documentation. - * - * This function is independent of the mode of the Normalizer2. - * @param c code point - * @param decomposition String object which will be set to c's - * raw decomposition mapping, if there is one. - * @return TRUE if c has a decomposition, otherwise FALSE - * @stable ICU 49 - */ - virtual UBool - getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE; - - /** - * Performs pairwise composition of a & b and returns the composite if there is one. - * For details see the base class documentation. - * - * This function is independent of the mode of the Normalizer2. - * @param a A (normalization starter) code point. - * @param b Another code point. - * @return The non-negative composite code point if there is one; otherwise a negative value. - * @stable ICU 49 - */ - virtual UChar32 - composePair(UChar32 a, UChar32 b) const U_OVERRIDE; - - /** - * Gets the combining class of c. - * The default implementation returns 0 - * but all standard implementations return the Unicode Canonical_Combining_Class value. - * @param c code point - * @return c's combining class - * @stable ICU 49 - */ - virtual uint8_t - getCombiningClass(UChar32 c) const U_OVERRIDE; - - /** - * Tests if the string is normalized. - * For details see the Normalizer2 base class documentation. - * @param s input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if s is normalized - * @stable ICU 4.4 - */ - virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; - /** - * Tests if the UTF-8 string is normalized. - * Internally, in cases where the quickCheck() method would return "maybe" - * (which is only possible for the two COMPOSE modes) this method - * resolves to "yes" or "no" to provide a definitive result, - * at the cost of doing more work in those cases. - * - * This works for all normalization modes, - * but it is currently optimized for UTF-8 only for "compose" modes, - * such as for NFC, NFKC, and NFKC_Casefold - * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). - * For other modes it currently converts to UTF-16 and calls isNormalized(). - * - * @param s UTF-8 input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if s is normalized - * @stable ICU 60 - */ - virtual UBool - isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE; - /** - * Tests if the string is normalized. - * For details see the Normalizer2 base class documentation. - * @param s input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return UNormalizationCheckResult - * @stable ICU 4.4 - */ - virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; - /** - * Returns the end of the normalized substring of the input string. - * For details see the Normalizer2 base class documentation. - * @param s input string - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return "yes" span end index - * @stable ICU 4.4 - */ - virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; - - /** - * Tests if the character always has a normalization boundary before it, - * regardless of context. - * For details see the Normalizer2 base class documentation. - * @param c character to test - * @return TRUE if c has a normalization boundary before it - * @stable ICU 4.4 - */ - virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE; - - /** - * Tests if the character always has a normalization boundary after it, - * regardless of context. - * For details see the Normalizer2 base class documentation. - * @param c character to test - * @return TRUE if c has a normalization boundary after it - * @stable ICU 4.4 - */ - virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE; - - /** - * Tests if the character is normalization-inert. - * For details see the Normalizer2 base class documentation. - * @param c character to test - * @return TRUE if c is normalization-inert - * @stable ICU 4.4 - */ - virtual UBool isInert(UChar32 c) const U_OVERRIDE; -private: - UnicodeString & - normalize(const UnicodeString &src, - UnicodeString &dest, - USetSpanCondition spanCondition, - UErrorCode &errorCode) const; - - void - normalizeUTF8(uint32_t options, const char *src, int32_t length, - ByteSink &sink, Edits *edits, - USetSpanCondition spanCondition, - UErrorCode &errorCode) const; - - UnicodeString & - normalizeSecondAndAppend(UnicodeString &first, - const UnicodeString &second, - UBool doNormalize, - UErrorCode &errorCode) const; - - const Normalizer2 &norm2; - const UnicodeSet &set; -}; - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_NORMALIZATION -#endif // __NORMALIZER2_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/normlzr.h b/deps/node/deps/icu-small/source/common/unicode/normlzr.h deleted file mode 100644 index 82335ae6..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/normlzr.h +++ /dev/null @@ -1,809 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************** - * COPYRIGHT: - * Copyright (c) 1996-2015, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************** - */ - -#ifndef NORMLZR_H -#define NORMLZR_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Unicode Normalization - */ - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/chariter.h" -#include "unicode/normalizer2.h" -#include "unicode/unistr.h" -#include "unicode/unorm.h" -#include "unicode/uobject.h" - -U_NAMESPACE_BEGIN -/** - * Old Unicode normalization API. - * - * This API has been replaced by the Normalizer2 class and is only available - * for backward compatibility. This class simply delegates to the Normalizer2 class. - * There is one exception: The new API does not provide a replacement for Normalizer::compare(). - * - * The Normalizer class supports the standard normalization forms described in - * - * Unicode Standard Annex #15: Unicode Normalization Forms. - * - * The Normalizer class consists of two parts: - * - static functions that normalize strings or test if strings are normalized - * - a Normalizer object is an iterator that takes any kind of text and - * provides iteration over its normalized form - * - * The Normalizer class is not suitable for subclassing. - * - * For basic information about normalization forms and details about the C API - * please see the documentation in unorm.h. - * - * The iterator API with the Normalizer constructors and the non-static functions - * use a CharacterIterator as input. It is possible to pass a string which - * is then internally wrapped in a CharacterIterator. - * The input text is not normalized all at once, but incrementally where needed - * (providing efficient random access). - * This allows to pass in a large text but spend only a small amount of time - * normalizing a small part of that text. - * However, if the entire text is normalized, then the iterator will be - * slower than normalizing the entire text at once and iterating over the result. - * A possible use of the Normalizer iterator is also to report an index into the - * original text that is close to where the normalized characters come from. - * - * Important: The iterator API was cleaned up significantly for ICU 2.0. - * The earlier implementation reported the getIndex() inconsistently, - * and previous() could not be used after setIndex(), next(), first(), and current(). - * - * Normalizer allows to start normalizing from anywhere in the input text by - * calling setIndexOnly(), first(), or last(). - * Without calling any of these, the iterator will start at the beginning of the text. - * - * At any time, next() returns the next normalized code point (UChar32), - * with post-increment semantics (like CharacterIterator::next32PostInc()). - * previous() returns the previous normalized code point (UChar32), - * with pre-decrement semantics (like CharacterIterator::previous32()). - * - * current() returns the current code point - * (respectively the one at the newly set index) without moving - * the getIndex(). Note that if the text at the current position - * needs to be normalized, then these functions will do that. - * (This is why current() is not const.) - * It is more efficient to call setIndexOnly() instead, which does not - * normalize. - * - * getIndex() always refers to the position in the input text where the normalized - * code points are returned from. It does not always change with each returned - * code point. - * The code point that is returned from any of the functions - * corresponds to text at or after getIndex(), according to the - * function's iteration semantics (post-increment or pre-decrement). - * - * next() returns a code point from at or after the getIndex() - * from before the next() call. After the next() call, the getIndex() - * might have moved to where the next code point will be returned from - * (from a next() or current() call). - * This is semantically equivalent to array access with array[index++] - * (post-increment semantics). - * - * previous() returns a code point from at or after the getIndex() - * from after the previous() call. - * This is semantically equivalent to array access with array[--index] - * (pre-decrement semantics). - * - * Internally, the Normalizer iterator normalizes a small piece of text - * starting at the getIndex() and ending at a following "safe" index. - * The normalized results is stored in an internal string buffer, and - * the code points are iterated from there. - * With multiple iteration calls, this is repeated until the next piece - * of text needs to be normalized, and the getIndex() needs to be moved. - * - * The following "safe" index, the internal buffer, and the secondary - * iteration index into that buffer are not exposed on the API. - * This also means that it is currently not practical to return to - * a particular, arbitrary position in the text because one would need to - * know, and be able to set, in addition to the getIndex(), at least also the - * current index into the internal buffer. - * It is currently only possible to observe when getIndex() changes - * (with careful consideration of the iteration semantics), - * at which time the internal index will be 0. - * For example, if getIndex() is different after next() than before it, - * then the internal index is 0 and one can return to this getIndex() - * later with setIndexOnly(). - * - * Note: While the setIndex() and getIndex() refer to indices in the - * underlying Unicode input text, the next() and previous() methods - * iterate through characters in the normalized output. - * This means that there is not necessarily a one-to-one correspondence - * between characters returned by next() and previous() and the indices - * passed to and returned from setIndex() and getIndex(). - * It is for this reason that Normalizer does not implement the CharacterIterator interface. - * - * @author Laura Werner, Mark Davis, Markus Scherer - * @stable ICU 2.0 - */ -class U_COMMON_API Normalizer : public UObject { -public: -#ifndef U_HIDE_DEPRECATED_API - /** - * If DONE is returned from an iteration function that returns a code point, - * then there are no more normalization results available. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - enum { - DONE=0xffff - }; - - // Constructors - - /** - * Creates a new Normalizer object for iterating over the - * normalized form of a given string. - *

- * @param str The string to be normalized. The normalization - * will start at the beginning of the string. - * - * @param mode The normalization mode. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - Normalizer(const UnicodeString& str, UNormalizationMode mode); - - /** - * Creates a new Normalizer object for iterating over the - * normalized form of a given string. - *

- * @param str The string to be normalized. The normalization - * will start at the beginning of the string. - * - * @param length Length of the string, or -1 if NUL-terminated. - * @param mode The normalization mode. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode); - - /** - * Creates a new Normalizer object for iterating over the - * normalized form of the given text. - *

- * @param iter The input text to be normalized. The normalization - * will start at the beginning of the string. - * - * @param mode The normalization mode. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - Normalizer(const CharacterIterator& iter, UNormalizationMode mode); -#endif /* U_HIDE_DEPRECATED_API */ - - /** - * Copy constructor. - * @param copy The object to be copied. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - Normalizer(const Normalizer& copy); - - /** - * Destructor - * @deprecated ICU 56 Use Normalizer2 instead. - */ - virtual ~Normalizer(); - - - //------------------------------------------------------------------------- - // Static utility methods - //------------------------------------------------------------------------- - -#ifndef U_HIDE_DEPRECATED_API - /** - * Normalizes a UnicodeString according to the specified normalization mode. - * This is a wrapper for unorm_normalize(), using UnicodeString's. - * - * The options parameter specifies which optional - * Normalizer features are to be enabled for this operation. - * - * @param source the input string to be normalized. - * @param mode the normalization mode - * @param options the optional features to be enabled (0 for no options) - * @param result The normalized string (on output). - * @param status The error code. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static void U_EXPORT2 normalize(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UnicodeString& result, - UErrorCode &status); - - /** - * Compose a UnicodeString. - * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC. - * This is a wrapper for unorm_normalize(), using UnicodeString's. - * - * The options parameter specifies which optional - * Normalizer features are to be enabled for this operation. - * - * @param source the string to be composed. - * @param compat Perform compatibility decomposition before composition. - * If this argument is FALSE, only canonical - * decomposition will be performed. - * @param options the optional features to be enabled (0 for no options) - * @param result The composed string (on output). - * @param status The error code. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static void U_EXPORT2 compose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status); - - /** - * Static method to decompose a UnicodeString. - * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD. - * This is a wrapper for unorm_normalize(), using UnicodeString's. - * - * The options parameter specifies which optional - * Normalizer features are to be enabled for this operation. - * - * @param source the string to be decomposed. - * @param compat Perform compatibility decomposition. - * If this argument is FALSE, only canonical - * decomposition will be performed. - * @param options the optional features to be enabled (0 for no options) - * @param result The decomposed string (on output). - * @param status The error code. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static void U_EXPORT2 decompose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status); - - /** - * Performing quick check on a string, to quickly determine if the string is - * in a particular normalization format. - * This is a wrapper for unorm_quickCheck(), using a UnicodeString. - * - * Three types of result can be returned UNORM_YES, UNORM_NO or - * UNORM_MAYBE. Result UNORM_YES indicates that the argument - * string is in the desired normalized format, UNORM_NO determines that - * argument string is not in the desired normalized format. A - * UNORM_MAYBE result indicates that a more thorough check is required, - * the user may have to put the string in its normalized form and compare the - * results. - * @param source string for determining if it is in a normalized format - * @param mode normalization format - * @param status A reference to a UErrorCode to receive any errors - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see isNormalized - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static inline UNormalizationCheckResult - quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); - - /** - * Performing quick check on a string; same as the other version of quickCheck - * but takes an extra options parameter like most normalization functions. - * - * @param source string for determining if it is in a normalized format - * @param mode normalization format - * @param options the optional features to be enabled (0 for no options) - * @param status A reference to a UErrorCode to receive any errors - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see isNormalized - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static UNormalizationCheckResult - quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); - - /** - * Test if a string is in a given normalization form. - * This is semantically equivalent to source.equals(normalize(source, mode)) . - * - * Unlike unorm_quickCheck(), this function returns a definitive result, - * never a "maybe". - * For NFD, NFKD, and FCD, both functions work exactly the same. - * For NFC and NFKC where quickCheck may return "maybe", this function will - * perform further tests to arrive at a TRUE/FALSE result. - * - * @param src String that is to be tested if it is in a normalization format. - * @param mode Which normalization form to test for. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode" normalization form. - * - * @see quickCheck - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static inline UBool - isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); - - /** - * Test if a string is in a given normalization form; same as the other version of isNormalized - * but takes an extra options parameter like most normalization functions. - * - * @param src String that is to be tested if it is in a normalization format. - * @param mode Which normalization form to test for. - * @param options the optional features to be enabled (0 for no options) - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode" normalization form. - * - * @see quickCheck - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static UBool - isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); - - /** - * Concatenate normalized strings, making sure that the result is normalized as well. - * - * If both the left and the right strings are in - * the normalization form according to "mode/options", - * then the result will be - * - * \code - * dest=normalize(left+right, mode, options) - * \endcode - * - * For details see unorm_concatenate in unorm.h. - * - * @param left Left source string. - * @param right Right source string. - * @param result The output string. - * @param mode The normalization mode. - * @param options A bit set of normalization options. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return result - * - * @see unorm_concatenate - * @see normalize - * @see unorm_next - * @see unorm_previous - * - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static UnicodeString & - U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right, - UnicodeString &result, - UNormalizationMode mode, int32_t options, - UErrorCode &errorCode); -#endif /* U_HIDE_DEPRECATED_API */ - - /** - * Compare two strings for canonical equivalence. - * Further options include case-insensitive comparison and - * code point order (as opposed to code unit order). - * - * Canonical equivalence between two strings is defined as their normalized - * forms (NFD or NFC) being identical. - * This function compares strings incrementally instead of normalizing - * (and optionally case-folding) both strings entirely, - * improving performance significantly. - * - * Bulk normalization is only necessary if the strings do not fulfill the FCD - * conditions. Only in this case, and only if the strings are relatively long, - * is memory allocated temporarily. - * For FCD strings and short non-FCD strings there is no memory allocation. - * - * Semantically, this is equivalent to - * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) - * where code point order and foldCase are all optional. - * - * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match - * the case folding must be performed first, then the normalization. - * - * @param s1 First source string. - * @param s2 Second source string. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Case-sensitive comparison in code unit order, and the input strings - * are quick-checked for FCD. - * - * - UNORM_INPUT_IS_FCD - * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. - * If not set, the function will quickCheck for FCD - * and normalize if necessary. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_COMPARE_IGNORE_CASE - * Set to compare strings case-insensitively using case folding, - * instead of case-sensitively. - * If set, then the following case folding options are used. - * - * - Options as used with case-insensitive comparisons, currently: - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * (see u_strCaseCompare for details) - * - * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT - * - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return <0 or 0 or >0 as usual for string comparisons - * - * @see unorm_compare - * @see normalize - * @see UNORM_FCD - * @see u_strCompare - * @see u_strCaseCompare - * - * @stable ICU 2.2 - */ - static inline int32_t - compare(const UnicodeString &s1, const UnicodeString &s2, - uint32_t options, - UErrorCode &errorCode); - -#ifndef U_HIDE_DEPRECATED_API - //------------------------------------------------------------------------- - // Iteration API - //------------------------------------------------------------------------- - - /** - * Return the current character in the normalized text. - * current() may need to normalize some text at getIndex(). - * The getIndex() is not changed. - * - * @return the current normalized code point - * @deprecated ICU 56 Use Normalizer2 instead. - */ - UChar32 current(void); - - /** - * Return the first character in the normalized text. - * This is equivalent to setIndexOnly(startIndex()) followed by next(). - * (Post-increment semantics.) - * - * @return the first normalized code point - * @deprecated ICU 56 Use Normalizer2 instead. - */ - UChar32 first(void); - - /** - * Return the last character in the normalized text. - * This is equivalent to setIndexOnly(endIndex()) followed by previous(). - * (Pre-decrement semantics.) - * - * @return the last normalized code point - * @deprecated ICU 56 Use Normalizer2 instead. - */ - UChar32 last(void); - - /** - * Return the next character in the normalized text. - * (Post-increment semantics.) - * If the end of the text has already been reached, DONE is returned. - * The DONE value could be confused with a U+FFFF non-character code point - * in the text. If this is possible, you can test getIndex()startIndex() || first()!=DONE). (Calling first() will change - * the iterator state!) - * - * The C API unorm_previous() is more efficient and does not have this ambiguity. - * - * @return the previous normalized code point - * @deprecated ICU 56 Use Normalizer2 instead. - */ - UChar32 previous(void); - - /** - * Set the iteration position in the input text that is being normalized, - * without any immediate normalization. - * After setIndexOnly(), getIndex() will return the same index that is - * specified here. - * - * @param index the desired index in the input text. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void setIndexOnly(int32_t index); - - /** - * Reset the index to the beginning of the text. - * This is equivalent to setIndexOnly(startIndex)). - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void reset(void); - - /** - * Retrieve the current iteration position in the input text that is - * being normalized. - * - * A following call to next() will return a normalized code point from - * the input text at or after this index. - * - * After a call to previous(), getIndex() will point at or before the - * position in the input text where the normalized code point - * was returned from with previous(). - * - * @return the current index in the input text - * @deprecated ICU 56 Use Normalizer2 instead. - */ - int32_t getIndex(void) const; - - /** - * Retrieve the index of the start of the input text. This is the begin index - * of the CharacterIterator or the start (i.e. index 0) of the string - * over which this Normalizer is iterating. - * - * @return the smallest index in the input text where the Normalizer operates - * @deprecated ICU 56 Use Normalizer2 instead. - */ - int32_t startIndex(void) const; - - /** - * Retrieve the index of the end of the input text. This is the end index - * of the CharacterIterator or the length of the string - * over which this Normalizer is iterating. - * This end index is exclusive, i.e., the Normalizer operates only on characters - * before this index. - * - * @return the first index in the input text where the Normalizer does not operate - * @deprecated ICU 56 Use Normalizer2 instead. - */ - int32_t endIndex(void) const; - - /** - * Returns TRUE when both iterators refer to the same character in the same - * input text. - * - * @param that a Normalizer object to compare this one to - * @return comparison result - * @deprecated ICU 56 Use Normalizer2 instead. - */ - UBool operator==(const Normalizer& that) const; - - /** - * Returns FALSE when both iterators refer to the same character in the same - * input text. - * - * @param that a Normalizer object to compare this one to - * @return comparison result - * @deprecated ICU 56 Use Normalizer2 instead. - */ - inline UBool operator!=(const Normalizer& that) const; - - /** - * Returns a pointer to a new Normalizer that is a clone of this one. - * The caller is responsible for deleting the new clone. - * @return a pointer to a new Normalizer - * @deprecated ICU 56 Use Normalizer2 instead. - */ - Normalizer* clone(void) const; - - /** - * Generates a hash code for this iterator. - * - * @return the hash code - * @deprecated ICU 56 Use Normalizer2 instead. - */ - int32_t hashCode(void) const; - - //------------------------------------------------------------------------- - // Property access methods - //------------------------------------------------------------------------- - - /** - * Set the normalization mode for this object. - *

- * Note:If the normalization mode is changed while iterating - * over a string, calls to {@link #next() } and {@link #previous() } may - * return previously buffers characters in the old normalization mode - * until the iteration is able to re-sync at the next base character. - * It is safest to call {@link #setIndexOnly }, {@link #reset() }, - * {@link #setText }, {@link #first() }, - * {@link #last() }, etc. after calling setMode. - *

- * @param newMode the new mode for this Normalizer. - * @see #getUMode - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void setMode(UNormalizationMode newMode); - - /** - * Return the normalization mode for this object. - * - * This is an unusual name because there used to be a getMode() that - * returned a different type. - * - * @return the mode for this Normalizer - * @see #setMode - * @deprecated ICU 56 Use Normalizer2 instead. - */ - UNormalizationMode getUMode(void) const; - - /** - * Set options that affect this Normalizer's operation. - * Options do not change the basic composition or decomposition operation - * that is being performed, but they control whether - * certain optional portions of the operation are done. - * Currently the only available option is obsolete. - * - * It is possible to specify multiple options that are all turned on or off. - * - * @param option the option(s) whose value is/are to be set. - * @param value the new setting for the option. Use TRUE to - * turn the option(s) on and FALSE to turn it/them off. - * - * @see #getOption - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void setOption(int32_t option, - UBool value); - - /** - * Determine whether an option is turned on or off. - * If multiple options are specified, then the result is TRUE if any - * of them are set. - *

- * @param option the option(s) that are to be checked - * @return TRUE if any of the option(s) are set - * @see #setOption - * @deprecated ICU 56 Use Normalizer2 instead. - */ - UBool getOption(int32_t option) const; - - /** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning. - * - * @param newText a string that replaces the current input text - * @param status a UErrorCode - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void setText(const UnicodeString& newText, - UErrorCode &status); - - /** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning. - * - * @param newText a CharacterIterator object that replaces the current input text - * @param status a UErrorCode - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void setText(const CharacterIterator& newText, - UErrorCode &status); - - /** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning. - * - * @param newText a string that replaces the current input text - * @param length the length of the string, or -1 if NUL-terminated - * @param status a UErrorCode - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void setText(ConstChar16Ptr newText, - int32_t length, - UErrorCode &status); - /** - * Copies the input text into the UnicodeString argument. - * - * @param result Receives a copy of the text under iteration. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - void getText(UnicodeString& result); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * @returns a UClassID for this class. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - static UClassID U_EXPORT2 getStaticClassID(); -#endif /* U_HIDE_DEPRECATED_API */ - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * @return a UClassID for the actual class. - * @deprecated ICU 56 Use Normalizer2 instead. - */ - virtual UClassID getDynamicClassID() const; - -private: - //------------------------------------------------------------------------- - // Private functions - //------------------------------------------------------------------------- - - Normalizer(); // default constructor not implemented - Normalizer &operator=(const Normalizer &that); // assignment operator not implemented - - // Private utility methods for iteration - // For documentation, see the source code - UBool nextNormalize(); - UBool previousNormalize(); - - void init(); - void clearBuffer(void); - - //------------------------------------------------------------------------- - // Private data - //------------------------------------------------------------------------- - - FilteredNormalizer2*fFilteredNorm2; // owned if not NULL - const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 - UNormalizationMode fUMode; // deprecated - int32_t fOptions; - - // The input text and our position in it - CharacterIterator *text; - - // The normalization buffer is the result of normalization - // of the source in [currentIndex..nextIndex[ . - int32_t currentIndex, nextIndex; - - // A buffer for holding intermediate results - UnicodeString buffer; - int32_t bufferPos; -}; - -//------------------------------------------------------------------------- -// Inline implementations -//------------------------------------------------------------------------- - -#ifndef U_HIDE_DEPRECATED_API -inline UBool -Normalizer::operator!= (const Normalizer& other) const -{ return ! operator==(other); } - -inline UNormalizationCheckResult -Normalizer::quickCheck(const UnicodeString& source, - UNormalizationMode mode, - UErrorCode &status) { - return quickCheck(source, mode, 0, status); -} - -inline UBool -Normalizer::isNormalized(const UnicodeString& source, - UNormalizationMode mode, - UErrorCode &status) { - return isNormalized(source, mode, 0, status); -} -#endif /* U_HIDE_DEPRECATED_API */ - -inline int32_t -Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, - uint32_t options, - UErrorCode &errorCode) { - // all argument checking is done in unorm_compare - return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), - toUCharPtr(s2.getBuffer()), s2.length(), - options, - &errorCode); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - -#endif // NORMLZR_H diff --git a/deps/node/deps/icu-small/source/common/unicode/parseerr.h b/deps/node/deps/icu-small/source/common/unicode/parseerr.h deleted file mode 100644 index c0548760..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/parseerr.h +++ /dev/null @@ -1,94 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2005, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 03/14/00 aliu Creation. -* 06/27/00 aliu Change from C++ class to C struct -********************************************************************** -*/ -#ifndef PARSEERR_H -#define PARSEERR_H - -#include "unicode/utypes.h" - - -/** - * \file - * \brief C API: Parse Error Information - */ -/** - * The capacity of the context strings in UParseError. - * @stable ICU 2.0 - */ -enum { U_PARSE_CONTEXT_LEN = 16 }; - -/** - * A UParseError struct is used to returned detailed information about - * parsing errors. It is used by ICU parsing engines that parse long - * rules, patterns, or programs, where the text being parsed is long - * enough that more information than a UErrorCode is needed to - * localize the error. - * - *

The line, offset, and context fields are optional; parsing - * engines may choose not to use to use them. - * - *

The preContext and postContext strings include some part of the - * context surrounding the error. If the source text is "let for=7" - * and "for" is the error (e.g., because it is a reserved word), then - * some examples of what a parser might produce are the following: - * - *

- * preContext   postContext
- * ""           ""            The parser does not support context
- * "let "       "=7"          Pre- and post-context only
- * "let "       "for=7"       Pre- and post-context and error text
- * ""           "for"         Error text only
- * 
- * - *

Examples of engines which use UParseError (or may use it in the - * future) are Transliterator, RuleBasedBreakIterator, and - * RegexPattern. - * - * @stable ICU 2.0 - */ -typedef struct UParseError { - - /** - * The line on which the error occurred. If the parser uses this - * field, it sets it to the line number of the source text line on - * which the error appears, which will be a value >= 1. If the - * parse does not support line numbers, the value will be <= 0. - * @stable ICU 2.0 - */ - int32_t line; - - /** - * The character offset to the error. If the line field is >= 1, - * then this is the offset from the start of the line. Otherwise, - * this is the offset from the start of the text. If the parser - * does not support this field, it will have a value < 0. - * @stable ICU 2.0 - */ - int32_t offset; - - /** - * Textual context before the error. Null-terminated. The empty - * string if not supported by parser. - * @stable ICU 2.0 - */ - UChar preContext[U_PARSE_CONTEXT_LEN]; - - /** - * The error itself and/or textual context after the error. - * Null-terminated. The empty string if not supported by parser. - * @stable ICU 2.0 - */ - UChar postContext[U_PARSE_CONTEXT_LEN]; - -} UParseError; - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/parsepos.h b/deps/node/deps/icu-small/source/common/unicode/parsepos.h deleted file mode 100644 index 50cc56db..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/parsepos.h +++ /dev/null @@ -1,232 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved. -******************************************************************************* -* -* File PARSEPOS.H -* -* Modification History: -* -* Date Name Description -* 07/09/97 helena Converted from java. -* 07/17/98 stephen Added errorIndex support. -* 05/11/99 stephen Cleaned up. -******************************************************************************* -*/ - -#ifndef PARSEPOS_H -#define PARSEPOS_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" - - -U_NAMESPACE_BEGIN - -/** - * \file - * \brief C++ API: Canonical Iterator - */ -/** - * ParsePosition is a simple class used by Format - * and its subclasses to keep track of the current position during parsing. - * The parseObject method in the various Format - * classes requires a ParsePosition object as an argument. - * - *

- * By design, as you parse through a string with different formats, - * you can use the same ParsePosition, since the index parameter - * records the current position. - * - * The ParsePosition class is not suitable for subclassing. - * - * @version 1.3 10/30/97 - * @author Mark Davis, Helena Shih - * @see java.text.Format - */ - -class U_COMMON_API ParsePosition : public UObject { -public: - /** - * Default constructor, the index starts with 0 as default. - * @stable ICU 2.0 - */ - ParsePosition() - : UObject(), - index(0), - errorIndex(-1) - {} - - /** - * Create a new ParsePosition with the given initial index. - * @param newIndex the new text offset. - * @stable ICU 2.0 - */ - ParsePosition(int32_t newIndex) - : UObject(), - index(newIndex), - errorIndex(-1) - {} - - /** - * Copy constructor - * @param copy the object to be copied from. - * @stable ICU 2.0 - */ - ParsePosition(const ParsePosition& copy) - : UObject(copy), - index(copy.index), - errorIndex(copy.errorIndex) - {} - - /** - * Destructor - * @stable ICU 2.0 - */ - virtual ~ParsePosition(); - - /** - * Assignment operator - * @stable ICU 2.0 - */ - ParsePosition& operator=(const ParsePosition& copy); - - /** - * Equality operator. - * @return TRUE if the two parse positions are equal, FALSE otherwise. - * @stable ICU 2.0 - */ - UBool operator==(const ParsePosition& that) const; - - /** - * Equality operator. - * @return TRUE if the two parse positions are not equal, FALSE otherwise. - * @stable ICU 2.0 - */ - UBool operator!=(const ParsePosition& that) const; - - /** - * Clone this object. - * Clones can be used concurrently in multiple threads. - * If an error occurs, then NULL is returned. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see getDynamicClassID - * @stable ICU 2.8 - */ - ParsePosition *clone() const; - - /** - * Retrieve the current parse position. On input to a parse method, this - * is the index of the character at which parsing will begin; on output, it - * is the index of the character following the last character parsed. - * @return the current index. - * @stable ICU 2.0 - */ - int32_t getIndex(void) const; - - /** - * Set the current parse position. - * @param index the new index. - * @stable ICU 2.0 - */ - void setIndex(int32_t index); - - /** - * Set the index at which a parse error occurred. Formatters - * should set this before returning an error code from their - * parseObject method. The default value is -1 if this is not - * set. - * @stable ICU 2.0 - */ - void setErrorIndex(int32_t ei); - - /** - * Retrieve the index at which an error occurred, or -1 if the - * error index has not been set. - * @stable ICU 2.0 - */ - int32_t getErrorIndex(void) const; - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - -private: - /** - * Input: the place you start parsing. - *
Output: position where the parse stopped. - * This is designed to be used serially, - * with each call setting index up for the next one. - */ - int32_t index; - - /** - * The index at which a parse error occurred. - */ - int32_t errorIndex; - -}; - -inline ParsePosition& -ParsePosition::operator=(const ParsePosition& copy) -{ - index = copy.index; - errorIndex = copy.errorIndex; - return *this; -} - -inline UBool -ParsePosition::operator==(const ParsePosition& copy) const -{ - if(index != copy.index || errorIndex != copy.errorIndex) - return FALSE; - else - return TRUE; -} - -inline UBool -ParsePosition::operator!=(const ParsePosition& copy) const -{ - return !operator==(copy); -} - -inline int32_t -ParsePosition::getIndex() const -{ - return index; -} - -inline void -ParsePosition::setIndex(int32_t offset) -{ - this->index = offset; -} - -inline int32_t -ParsePosition::getErrorIndex() const -{ - return errorIndex; -} - -inline void -ParsePosition::setErrorIndex(int32_t ei) -{ - this->errorIndex = ei; -} -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/platform.h b/deps/node/deps/icu-small/source/common/unicode/platform.h deleted file mode 100644 index ee0d8b7a..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/platform.h +++ /dev/null @@ -1,855 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : platform.h -* -* Date Name Description -* 05/13/98 nos Creation (content moved here from ptypes.h). -* 03/02/99 stephen Added AS400 support. -* 03/30/99 stephen Added Linux support. -* 04/13/99 stephen Reworked for autoconf. -****************************************************************************** -*/ - -#ifndef _PLATFORM_H -#define _PLATFORM_H - -#include "unicode/uconfig.h" -#include "unicode/uvernum.h" - -/** - * \file - * \brief Basic types for the platform. - * - * This file used to be generated by autoconf/configure. - * Starting with ICU 49, platform.h is a normal source file, - * to simplify cross-compiling and working with non-autoconf/make build systems. - * - * When a value in this file does not work on a platform, then please - * try to derive it from the U_PLATFORM value - * (for which we might need a new value constant in rare cases) - * and/or from other macros that are predefined by the compiler - * or defined in standard (POSIX or platform or compiler) headers. - * - * As a temporary workaround, you can add an explicit \#define for some macros - * before it is first tested, or add an equivalent -D macro definition - * to the compiler's command line. - * - * Note: Some compilers provide ways to show the predefined macros. - * For example, with gcc you can compile an empty .c file and have the compiler - * print the predefined macros with - * \code - * gcc -E -dM -x c /dev/null | sort - * \endcode - * (You can provide an actual empty .c file rather than /dev/null. - * -x c++ is for C++.) - */ - -/** - * Define some things so that they can be documented. - * @internal - */ -#ifdef U_IN_DOXYGEN -/* - * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented. - * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented. - */ - -/* None for now. */ -#endif - -/** - * \def U_PLATFORM - * The U_PLATFORM macro defines the platform we're on. - * - * We used to define one different, value-less macro per platform. - * That made it hard to know the set of relevant platforms and macros, - * and hard to deal with variants of platforms. - * - * Starting with ICU 49, we define platforms as numeric macros, - * with ranges of values for related platforms and their variants. - * The U_PLATFORM macro is set to one of these values. - * - * Historical note from the Solaris Wikipedia article: - * AT&T and Sun collaborated on a project to merge the most popular Unix variants - * on the market at that time: BSD, System V, and Xenix. - * This became Unix System V Release 4 (SVR4). - * - * @internal - */ - -/** Unknown platform. @internal */ -#define U_PF_UNKNOWN 0 -/** Windows @internal */ -#define U_PF_WINDOWS 1000 -/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */ -#define U_PF_MINGW 1800 -/** - * Cygwin. Windows, calls to cygwin1.dll for Posix functions, - * using MSVC or GNU gcc and binutils. - * @internal - */ -#define U_PF_CYGWIN 1900 -/* Reserve 2000 for U_PF_UNIX? */ -/** HP-UX is based on UNIX System V. @internal */ -#define U_PF_HPUX 2100 -/** Solaris is a Unix operating system based on SVR4. @internal */ -#define U_PF_SOLARIS 2600 -/** BSD is a UNIX operating system derivative. @internal */ -#define U_PF_BSD 3000 -/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */ -#define U_PF_AIX 3100 -/** IRIX is based on UNIX System V with BSD extensions. @internal */ -#define U_PF_IRIX 3200 -/** - * Darwin is a POSIX-compliant operating system, composed of code developed by Apple, - * as well as code derived from NeXTSTEP, BSD, and other projects, - * built around the Mach kernel. - * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based. - * (Original description modified from WikiPedia.) - * @internal - */ -#define U_PF_DARWIN 3500 -/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */ -#define U_PF_IPHONE 3550 -/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */ -#define U_PF_QNX 3700 -/** Linux is a Unix-like operating system. @internal */ -#define U_PF_LINUX 4000 -/** - * Native Client is pretty close to Linux. - * See https://developer.chrome.com/native-client and - * http://www.chromium.org/nativeclient - * @internal - */ -#define U_PF_BROWSER_NATIVE_CLIENT 4020 -/** Android is based on Linux. @internal */ -#define U_PF_ANDROID 4050 -/** Fuchsia is a POSIX-ish platform. @internal */ -#define U_PF_FUCHSIA 4100 -/* Maximum value for Linux-based platform is 4499 */ -/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ -#define U_PF_OS390 9000 -/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */ -#define U_PF_OS400 9400 - -#ifdef U_PLATFORM - /* Use the predefined value. */ -#elif defined(__MINGW32__) -# define U_PLATFORM U_PF_MINGW -#elif defined(__CYGWIN__) -# define U_PLATFORM U_PF_CYGWIN -#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) -# define U_PLATFORM U_PF_WINDOWS -#elif defined(__ANDROID__) -# define U_PLATFORM U_PF_ANDROID - /* Android wchar_t support depends on the API level. */ -# include -#elif defined(__pnacl__) || defined(__native_client__) -# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT -#elif defined(__Fuchsia__) -# define U_PLATFORM U_PF_FUCHSIA -#elif defined(linux) || defined(__linux__) || defined(__linux) -# define U_PLATFORM U_PF_LINUX -#elif defined(__APPLE__) && defined(__MACH__) -# include -# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */ -# define U_PLATFORM U_PF_IPHONE -# else -# define U_PLATFORM U_PF_DARWIN -# endif -#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__) -# if defined(__FreeBSD__) -# include -# endif -# define U_PLATFORM U_PF_BSD -#elif defined(sun) || defined(__sun) - /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */ -# define U_PLATFORM U_PF_SOLARIS -# if defined(__GNUC__) - /* Solaris/GCC needs this header file to get the proper endianness. Normally, this - * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h - * is included which does not include this header file. - */ -# include -# endif -#elif defined(_AIX) || defined(__TOS_AIX__) -# define U_PLATFORM U_PF_AIX -#elif defined(_hpux) || defined(hpux) || defined(__hpux) -# define U_PLATFORM U_PF_HPUX -#elif defined(sgi) || defined(__sgi) -# define U_PLATFORM U_PF_IRIX -#elif defined(__QNX__) || defined(__QNXNTO__) -# define U_PLATFORM U_PF_QNX -#elif defined(__TOS_MVS__) -# define U_PLATFORM U_PF_OS390 -#elif defined(__OS400__) || defined(__TOS_OS400__) -# define U_PLATFORM U_PF_OS400 -#else -# define U_PLATFORM U_PF_UNKNOWN -#endif - -/** - * \def CYGWINMSVC - * Defined if this is Windows with Cygwin, but using MSVC rather than gcc. - * Otherwise undefined. - * @internal - */ -/* Commented out because this is already set in mh-cygwin-msvc -#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER) -# define CYGWINMSVC -#endif -*/ -#ifdef U_IN_DOXYGEN -# define CYGWINMSVC -#endif - -/** - * \def U_PLATFORM_USES_ONLY_WIN32_API - * Defines whether the platform uses only the Win32 API. - * Set to 1 for Windows/MSVC and MinGW but not Cygwin. - * @internal - */ -#ifdef U_PLATFORM_USES_ONLY_WIN32_API - /* Use the predefined value. */ -#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC) -# define U_PLATFORM_USES_ONLY_WIN32_API 1 -#else - /* Cygwin implements POSIX. */ -# define U_PLATFORM_USES_ONLY_WIN32_API 0 -#endif - -/** - * \def U_PLATFORM_HAS_WIN32_API - * Defines whether the Win32 API is available on the platform. - * Set to 1 for Windows/MSVC, MinGW and Cygwin. - * @internal - */ -#ifdef U_PLATFORM_HAS_WIN32_API - /* Use the predefined value. */ -#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -# define U_PLATFORM_HAS_WIN32_API 1 -#else -# define U_PLATFORM_HAS_WIN32_API 0 -#endif - -/** - * \def U_PLATFORM_HAS_WINUWP_API - * Defines whether target is intended for Universal Windows Platform API - * Set to 1 for Windows10 Release Solution Configuration - * @internal - */ -#ifdef U_PLATFORM_HAS_WINUWP_API - /* Use the predefined value. */ -#else -# define U_PLATFORM_HAS_WINUWP_API 0 -#endif - -/** - * \def U_PLATFORM_IMPLEMENTS_POSIX - * Defines whether the platform implements (most of) the POSIX API. - * Set to 1 for Cygwin and most other platforms. - * @internal - */ -#ifdef U_PLATFORM_IMPLEMENTS_POSIX - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API -# define U_PLATFORM_IMPLEMENTS_POSIX 0 -#else -# define U_PLATFORM_IMPLEMENTS_POSIX 1 -#endif - -/** - * \def U_PLATFORM_IS_LINUX_BASED - * Defines whether the platform is Linux or one of its derivatives. - * @internal - */ -#ifdef U_PLATFORM_IS_LINUX_BASED - /* Use the predefined value. */ -#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499 -# define U_PLATFORM_IS_LINUX_BASED 1 -#else -# define U_PLATFORM_IS_LINUX_BASED 0 -#endif - -/** - * \def U_PLATFORM_IS_DARWIN_BASED - * Defines whether the platform is Darwin or one of its derivatives. - * @internal - */ -#ifdef U_PLATFORM_IS_DARWIN_BASED - /* Use the predefined value. */ -#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE -# define U_PLATFORM_IS_DARWIN_BASED 1 -#else -# define U_PLATFORM_IS_DARWIN_BASED 0 -#endif - -/** - * \def U_HAVE_STDINT_H - * Defines whether stdint.h is available. It is a C99 standard header. - * We used to include inttypes.h which includes stdint.h but we usually do not need - * the additional definitions from inttypes.h. - * @internal - */ -#ifdef U_HAVE_STDINT_H - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API -# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600) - /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */ -# define U_HAVE_STDINT_H 1 -# else -# define U_HAVE_STDINT_H 0 -# endif -#elif U_PLATFORM == U_PF_SOLARIS - /* Solaris has inttypes.h but not stdint.h. */ -# define U_HAVE_STDINT_H 0 -#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) - /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ -# define U_HAVE_STDINT_H 0 -#else -# define U_HAVE_STDINT_H 1 -#endif - -/** - * \def U_HAVE_INTTYPES_H - * Defines whether inttypes.h is available. It is a C99 standard header. - * We include inttypes.h where it is available but stdint.h is not. - * @internal - */ -#ifdef U_HAVE_INTTYPES_H - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_SOLARIS - /* Solaris has inttypes.h but not stdint.h. */ -# define U_HAVE_INTTYPES_H 1 -#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) - /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ -# define U_HAVE_INTTYPES_H 1 -#else - /* Most platforms have both inttypes.h and stdint.h, or neither. */ -# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H -#endif - -/*===========================================================================*/ -/** @{ Compiler and environment features */ -/*===========================================================================*/ - -/** - * \def U_GCC_MAJOR_MINOR - * Indicates whether the compiler is gcc (test for != 0), - * and if so, contains its major (times 100) and minor version numbers. - * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0. - * - * For example, for testing for whether we have gcc, and whether it's 4.6 or higher, - * use "#if U_GCC_MAJOR_MINOR >= 406". - * @internal - */ -#ifdef __GNUC__ -# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__) -#else -# define U_GCC_MAJOR_MINOR 0 -#endif - -/** - * \def U_IS_BIG_ENDIAN - * Determines the endianness of the platform. - * @internal - */ -#ifdef U_IS_BIG_ENDIAN - /* Use the predefined value. */ -#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) -# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN) -#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) - /* gcc */ -# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) -# define U_IS_BIG_ENDIAN 1 -#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN) -# define U_IS_BIG_ENDIAN 0 -#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__) - /* These platforms do not appear to predefine any endianness macros. */ -# define U_IS_BIG_ENDIAN 1 -#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0) - /* HPPA do not appear to predefine any endianness macros. */ -# define U_IS_BIG_ENDIAN 1 -#elif defined(sparc) || defined(__sparc) || defined(__sparc__) - /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */ -# define U_IS_BIG_ENDIAN 1 -#else -# define U_IS_BIG_ENDIAN 0 -#endif - -/** - * \def U_HAVE_PLACEMENT_NEW - * Determines whether to override placement new and delete for STL. - * @stable ICU 2.6 - */ -#ifdef U_HAVE_PLACEMENT_NEW - /* Use the predefined value. */ -#elif defined(__BORLANDC__) -# define U_HAVE_PLACEMENT_NEW 0 -#else -# define U_HAVE_PLACEMENT_NEW 1 -#endif - -/** - * \def U_HAVE_DEBUG_LOCATION_NEW - * Define this to define the MFC debug version of the operator new. - * - * @stable ICU 3.4 - */ -#ifdef U_HAVE_DEBUG_LOCATION_NEW - /* Use the predefined value. */ -#elif defined(_MSC_VER) -# define U_HAVE_DEBUG_LOCATION_NEW 1 -#else -# define U_HAVE_DEBUG_LOCATION_NEW 0 -#endif - -/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */ -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif -#ifndef __has_cpp_attribute -# define __has_cpp_attribute(x) 0 -#endif -#ifndef __has_declspec_attribute -# define __has_declspec_attribute(x) 0 -#endif -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif -#ifndef __has_feature -# define __has_feature(x) 0 -#endif -#ifndef __has_extension -# define __has_extension(x) 0 -#endif -#ifndef __has_warning -# define __has_warning(x) 0 -#endif - -/** - * \def U_MALLOC_ATTR - * Attribute to mark functions as malloc-like - * @internal - */ -#if defined(__GNUC__) && __GNUC__>=3 -# define U_MALLOC_ATTR __attribute__ ((__malloc__)) -#else -# define U_MALLOC_ATTR -#endif - -/** - * \def U_ALLOC_SIZE_ATTR - * Attribute to specify the size of the allocated buffer for malloc-like functions - * @internal - */ -#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size) -# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X))) -# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y))) -#else -# define U_ALLOC_SIZE_ATTR(X) -# define U_ALLOC_SIZE_ATTR2(X,Y) -#endif - -/** - * \def U_CPLUSPLUS_VERSION - * 0 if no C++; 1, 11, 14, ... if C++. - * Support for specific features cannot always be determined by the C++ version alone. - * @internal - */ -#ifdef U_CPLUSPLUS_VERSION -# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus) -# undef U_CPLUSPLUS_VERSION -# define U_CPLUSPLUS_VERSION 0 -# endif - /* Otherwise use the predefined value. */ -#elif !defined(__cplusplus) -# define U_CPLUSPLUS_VERSION 0 -#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) -# define U_CPLUSPLUS_VERSION 14 -#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) -# define U_CPLUSPLUS_VERSION 11 -#else - // C++98 or C++03 -# define U_CPLUSPLUS_VERSION 1 -#endif - -#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) -// add in std::nullptr_t -namespace std { - typedef decltype(nullptr) nullptr_t; -}; -#endif - -/** - * \def U_NOEXCEPT - * "noexcept" if supported, otherwise empty. - * Some code, especially STL containers, uses move semantics of objects only - * if the move constructor and the move operator are declared as not throwing exceptions. - * @internal - */ -#ifdef U_NOEXCEPT - /* Use the predefined value. */ -#else -# define U_NOEXCEPT noexcept -#endif - -/** - * \def U_FALLTHROUGH - * Annotate intentional fall-through between switch labels. - * http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough - * @internal - */ -#ifndef __cplusplus - // Not for C. -#elif defined(U_FALLTHROUGH) - // Use the predefined value. -#elif defined(__clang__) - // Test for compiler vs. feature separately. - // Other compilers might choke on the feature test. -# if __has_cpp_attribute(clang::fallthrough) || \ - (__has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")) -# define U_FALLTHROUGH [[clang::fallthrough]] -# endif -#elif defined(__GNUC__) && (__GNUC__ >= 7) -# define U_FALLTHROUGH __attribute__((fallthrough)) -#endif - -#ifndef U_FALLTHROUGH -# define U_FALLTHROUGH -#endif - -/** @} */ - -/*===========================================================================*/ -/** @{ Character data types */ -/*===========================================================================*/ - -/** - * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform. - * @stable ICU 2.0 - */ -#define U_ASCII_FAMILY 0 - -/** - * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform. - * @stable ICU 2.0 - */ -#define U_EBCDIC_FAMILY 1 - -/** - * \def U_CHARSET_FAMILY - * - *

These definitions allow to specify the encoding of text - * in the char data type as defined by the platform and the compiler. - * It is enough to determine the code point values of "invariant characters", - * which are the ones shared by all encodings that are in use - * on a given platform.

- * - *

Those "invariant characters" should be all the uppercase and lowercase - * latin letters, the digits, the space, and "basic punctuation". - * Also, '\\n', '\\r', '\\t' should be available.

- * - *

The list of "invariant characters" is:
- * \code - * A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _ - * \endcode - *
- * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)

- * - *

This matches the IBM Syntactic Character Set (CS 640).

- * - *

In other words, all the graphic characters in 7-bit ASCII should - * be safely accessible except the following:

- * - * \code - * '\' - * '[' - * ']' - * '{' - * '}' - * '^' - * '~' - * '!' - * '#' - * '|' - * '$' - * '@' - * '`' - * \endcode - * @stable ICU 2.0 - */ -#ifdef U_CHARSET_FAMILY - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB) -# define U_CHARSET_FAMILY U_EBCDIC_FAMILY -#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__) -# define U_CHARSET_FAMILY U_EBCDIC_FAMILY -#else -# define U_CHARSET_FAMILY U_ASCII_FAMILY -#endif - -/** - * \def U_CHARSET_IS_UTF8 - * - * Hardcode the default charset to UTF-8. - * - * If this is set to 1, then - * - ICU will assume that all non-invariant char*, StringPiece, std::string etc. - * contain UTF-8 text, regardless of what the system API uses - * - some ICU code will use fast functions like u_strFromUTF8() - * rather than the more general and more heavy-weight conversion API (ucnv.h) - * - ucnv_getDefaultName() always returns "UTF-8" - * - ucnv_setDefaultName() is disabled and will not change the default charset - * - static builds of ICU are smaller - * - more functionality is available with the UCONFIG_NO_CONVERSION build-time - * configuration option (see unicode/uconfig.h) - * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable - * - * @stable ICU 4.2 - * @see UCONFIG_NO_CONVERSION - */ -#ifdef U_CHARSET_IS_UTF8 - /* Use the predefined value. */ -#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED -# define U_CHARSET_IS_UTF8 1 -#else -# define U_CHARSET_IS_UTF8 0 -#endif - -/** @} */ - -/*===========================================================================*/ -/** @{ Information about wchar support */ -/*===========================================================================*/ - -/** - * \def U_HAVE_WCHAR_H - * Indicates whether is available (1) or not (0). Set to 1 by default. - * - * @stable ICU 2.0 - */ -#ifdef U_HAVE_WCHAR_H - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9 - /* - * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t. - * The type and header existed, but the library functions did not work as expected. - * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway. - */ -# define U_HAVE_WCHAR_H 0 -#else -# define U_HAVE_WCHAR_H 1 -#endif - -/** - * \def U_SIZEOF_WCHAR_T - * U_SIZEOF_WCHAR_T==sizeof(wchar_t) - * - * @stable ICU 2.0 - */ -#ifdef U_SIZEOF_WCHAR_T - /* Use the predefined value. */ -#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9) - /* - * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring. - * Newer Mac OS X has size 4. - */ -# define U_SIZEOF_WCHAR_T 1 -#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN -# define U_SIZEOF_WCHAR_T 2 -#elif U_PLATFORM == U_PF_AIX - /* - * AIX 6.1 information, section "Wide character data representation": - * "... the wchar_t datatype is 32-bit in the 64-bit environment and - * 16-bit in the 32-bit environment." - * and - * "All locales use Unicode for their wide character code values (process code), - * except the IBM-eucTW codeset." - */ -# ifdef __64BIT__ -# define U_SIZEOF_WCHAR_T 4 -# else -# define U_SIZEOF_WCHAR_T 2 -# endif -#elif U_PLATFORM == U_PF_OS390 - /* - * z/OS V1R11 information center, section "LP64 | ILP32": - * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes. - * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes." - */ -# ifdef _LP64 -# define U_SIZEOF_WCHAR_T 4 -# else -# define U_SIZEOF_WCHAR_T 2 -# endif -#elif U_PLATFORM == U_PF_OS400 -# if defined(__UTF32__) - /* - * LOCALETYPE(*LOCALEUTF) is specified. - * Wide-character strings are in UTF-32, - * narrow-character strings are in UTF-8. - */ -# define U_SIZEOF_WCHAR_T 4 -# elif defined(__UCS2__) - /* - * LOCALETYPE(*LOCALEUCS2) is specified. - * Wide-character strings are in UCS-2, - * narrow-character strings are in EBCDIC. - */ -# define U_SIZEOF_WCHAR_T 2 -#else - /* - * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified. - * Wide-character strings are in 16-bit EBCDIC, - * narrow-character strings are in EBCDIC. - */ -# define U_SIZEOF_WCHAR_T 2 -# endif -#else -# define U_SIZEOF_WCHAR_T 4 -#endif - -#ifndef U_HAVE_WCSCPY -#define U_HAVE_WCSCPY U_HAVE_WCHAR_H -#endif - -/** @} */ - -/** - * \def U_HAVE_CHAR16_T - * Defines whether the char16_t type is available for UTF-16 - * and u"abc" UTF-16 string literals are supported. - * This is a new standard type and standard string literal syntax in C++0x - * but has been available in some compilers before. - * @internal - */ -#ifdef U_HAVE_CHAR16_T - /* Use the predefined value. */ -#else - /* - * Notes: - * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef - * and does not support u"abc" string literals. - * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for - * both char16_t and u"abc" string literals. - * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but - * does not support u"abc" string literals. - * C++11 and C11 require support for UTF-16 literals - * TODO: Fix for plain C. Doesn't work on Mac. - */ -# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) -# define U_HAVE_CHAR16_T 1 -# else -# define U_HAVE_CHAR16_T 0 -# endif -#endif - -/** - * @{ - * \def U_DECLARE_UTF16 - * Do not use this macro because it is not defined on all platforms. - * Use the UNICODE_STRING or U_STRING_DECL macros instead. - * @internal - */ -#ifdef U_DECLARE_UTF16 - /* Use the predefined value. */ -#elif U_HAVE_CHAR16_T \ - || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \ - || (defined(__HP_aCC) && __HP_aCC >= 035000) \ - || (defined(__HP_cc) && __HP_cc >= 111106) \ - || (defined(U_IN_DOXYGEN)) -# define U_DECLARE_UTF16(string) u ## string -#elif U_SIZEOF_WCHAR_T == 2 \ - && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__))) -# define U_DECLARE_UTF16(string) L ## string -#else - /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */ -#endif - -/** @} */ - -/*===========================================================================*/ -/** @{ Symbol import-export control */ -/*===========================================================================*/ - -#ifdef U_EXPORT - /* Use the predefined value. */ -#elif defined(U_STATIC_IMPLEMENTATION) -# define U_EXPORT -#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport)) -# define U_EXPORT __declspec(dllexport) -#elif defined(__GNUC__) -# define U_EXPORT __attribute__((visibility("default"))) -#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \ - || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) -# define U_EXPORT __global -/*#elif defined(__HP_aCC) || defined(__HP_cc) -# define U_EXPORT __declspec(dllexport)*/ -#else -# define U_EXPORT -#endif - -/* U_CALLCONV is releated to U_EXPORT2 */ -#ifdef U_EXPORT2 - /* Use the predefined value. */ -#elif defined(_MSC_VER) -# define U_EXPORT2 __cdecl -#else -# define U_EXPORT2 -#endif - -#ifdef U_IMPORT - /* Use the predefined value. */ -#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport)) - /* Windows needs to export/import data. */ -# define U_IMPORT __declspec(dllimport) -#else -# define U_IMPORT -#endif - -/** - * \def U_CALLCONV - * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary - * in callback function typedefs to make sure that the calling convention - * is compatible. - * - * This is only used for non-ICU-API functions. - * When a function is a public ICU API, - * you must use the U_CAPI and U_EXPORT2 qualifiers. - * - * Please note, you need to use U_CALLCONV after the *. - * - * NO : "static const char U_CALLCONV *func( . . . )" - * YES: "static const char* U_CALLCONV func( . . . )" - * - * @stable ICU 2.0 - */ -#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) -# define U_CALLCONV __cdecl -#else -# define U_CALLCONV U_EXPORT2 -#endif - -/** - * \def U_CALLCONV_FPTR - * Similar to U_CALLCONV, but only used on function pointers. - * @internal - */ -#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) -# define U_CALLCONV_FPTR U_CALLCONV -#else -# define U_CALLCONV_FPTR -#endif -/* @} */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ptypes.h b/deps/node/deps/icu-small/source/common/unicode/ptypes.h deleted file mode 100644 index 70324ffe..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ptypes.h +++ /dev/null @@ -1,130 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : ptypes.h -* -* Date Name Description -* 05/13/98 nos Creation (content moved here from ptypes.h). -* 03/02/99 stephen Added AS400 support. -* 03/30/99 stephen Added Linux support. -* 04/13/99 stephen Reworked for autoconf. -* 09/18/08 srl Moved basic types back to ptypes.h from platform.h -****************************************************************************** -*/ - -/** - * \file - * \brief C API: Definitions of integer types of various widths - */ - -#ifndef _PTYPES_H -#define _PTYPES_H - -/** - * \def __STDC_LIMIT_MACROS - * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations - * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested. - * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code - * that uses such limit macros. - * @internal - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS -#endif - -/* NULL, size_t, wchar_t */ -#include - -/* - * If all compilers provided all of the C99 headers and types, - * we would just unconditionally #include here - * and not need any of the stuff after including platform.h. - */ - -/* Find out if we have stdint.h etc. */ -#include "unicode/platform.h" - -/*===========================================================================*/ -/* Generic data types */ -/*===========================================================================*/ - -/* If your platform does not have the header, you may - need to edit the typedefs in the #else section below. - Use #if...#else...#endif with predefined compiler macros if possible. */ -#if U_HAVE_STDINT_H - -/* - * We mostly need (which defines the standard integer types) but not . - * includes and adds the printf/scanf helpers PRId32, SCNx16 etc. - * which we almost never use, plus stuff like imaxabs() which we never use. - */ -#include - -#if U_PLATFORM == U_PF_OS390 -/* The features header is needed to get (u)int64_t sometimes. */ -#include -/* z/OS has , but some versions are missing uint8_t (APAR PK62248). */ -#if !defined(__uint8_t) -#define __uint8_t 1 -typedef unsigned char uint8_t; -#endif -#endif /* U_PLATFORM == U_PF_OS390 */ - -#elif U_HAVE_INTTYPES_H - -# include - -#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */ - -/// \cond -#if ! U_HAVE_INT8_T -typedef signed char int8_t; -#endif - -#if ! U_HAVE_UINT8_T -typedef unsigned char uint8_t; -#endif - -#if ! U_HAVE_INT16_T -typedef signed short int16_t; -#endif - -#if ! U_HAVE_UINT16_T -typedef unsigned short uint16_t; -#endif - -#if ! U_HAVE_INT32_T -typedef signed int int32_t; -#endif - -#if ! U_HAVE_UINT32_T -typedef unsigned int uint32_t; -#endif - -#if ! U_HAVE_INT64_T -#ifdef _MSC_VER - typedef signed __int64 int64_t; -#else - typedef signed long long int64_t; -#endif -#endif - -#if ! U_HAVE_UINT64_T -#ifdef _MSC_VER - typedef unsigned __int64 uint64_t; -#else - typedef unsigned long long uint64_t; -#endif -#endif -/// \endcond - -#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */ - -#endif /* _PTYPES_H */ diff --git a/deps/node/deps/icu-small/source/common/unicode/putil.h b/deps/node/deps/icu-small/source/common/unicode/putil.h deleted file mode 100644 index 14bb99cc..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/putil.h +++ /dev/null @@ -1,183 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : putil.h -* -* Date Name Description -* 05/14/98 nos Creation (content moved here from utypes.h). -* 06/17/99 erm Added IEEE_754 -* 07/22/98 stephen Added IEEEremainder, max, min, trunc -* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity -* 08/24/98 stephen Added longBitsFromDouble -* 03/02/99 stephen Removed openFile(). Added AS400 support. -* 04/15/99 stephen Converted to C -* 11/15/99 helena Integrated S/390 changes for IEEE support. -* 01/11/00 helena Added u_getVersion. -****************************************************************************** -*/ - -#ifndef PUTIL_H -#define PUTIL_H - -#include "unicode/utypes.h" - /** - * \file - * \brief C API: Platform Utilities - */ - -/*==========================================================================*/ -/* Platform utilities */ -/*==========================================================================*/ - -/** - * Platform utilities isolates the platform dependencies of the - * library. For each platform which this code is ported to, these - * functions may have to be re-implemented. - */ - -/** - * Return the ICU data directory. - * The data directory is where common format ICU data files (.dat files) - * are loaded from. Note that normal use of the built-in ICU - * facilities does not require loading of an external data file; - * unless you are adding custom data to ICU, the data directory - * does not need to be set. - * - * The data directory is determined as follows: - * If u_setDataDirectory() has been called, that is it, otherwise - * if the ICU_DATA environment variable is set, use that, otherwise - * If a data directory was specified at ICU build time - * - * \code - * #define ICU_DATA_DIR "path" - * \endcode - * use that, - * otherwise no data directory is available. - * - * @return the data directory, or an empty string ("") if no data directory has - * been specified. - * - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 u_getDataDirectory(void); - - -/** - * Set the ICU data directory. - * The data directory is where common format ICU data files (.dat files) - * are loaded from. Note that normal use of the built-in ICU - * facilities does not require loading of an external data file; - * unless you are adding custom data to ICU, the data directory - * does not need to be set. - * - * This function should be called at most once in a process, before the - * first ICU operation (e.g., u_init()) that will require the loading of an - * ICU data file. - * This function is not thread-safe. Use it before calling ICU APIs from - * multiple threads. - * - * @param directory The directory to be set. - * - * @see u_init - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); - -#ifndef U_HIDE_INTERNAL_API -/** - * Return the time zone files override directory, or an empty string if - * no directory was specified. Certain time zone resources will be preferentially - * loaded from individual files in this directory. - * - * @return the time zone data override directory. - * @internal - */ -U_INTERNAL const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status); - -/** - * Set the time zone files override directory. - * This function is not thread safe; it must not be called concurrently with - * u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions. - * This function should only be called before using any ICU service that - * will access the time zone data. - * @internal - */ -U_INTERNAL void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status); -#endif /* U_HIDE_INTERNAL_API */ - - -/** - * @{ - * Filesystem file and path separator characters. - * Example: '/' and ':' on Unix, '\\' and ';' on Windows. - * @stable ICU 2.0 - */ -#if U_PLATFORM_USES_ONLY_WIN32_API -# define U_FILE_SEP_CHAR '\\' -# define U_FILE_ALT_SEP_CHAR '/' -# define U_PATH_SEP_CHAR ';' -# define U_FILE_SEP_STRING "\\" -# define U_FILE_ALT_SEP_STRING "/" -# define U_PATH_SEP_STRING ";" -#else -# define U_FILE_SEP_CHAR '/' -# define U_FILE_ALT_SEP_CHAR '/' -# define U_PATH_SEP_CHAR ':' -# define U_FILE_SEP_STRING "/" -# define U_FILE_ALT_SEP_STRING "/" -# define U_PATH_SEP_STRING ":" -#endif - -/** @} */ - -/** - * Convert char characters to UChar characters. - * This utility function is useful only for "invariant characters" - * that are encoded in the platform default encoding. - * They are a small, constant subset of the encoding and include - * just the latin letters, digits, and some punctuation. - * For details, see U_CHARSET_FAMILY. - * - * @param cs Input string, points to length - * character bytes from a subset of the platform encoding. - * @param us Output string, points to memory for length - * Unicode characters. - * @param length The number of characters to convert; this may - * include the terminating NUL. - * - * @see U_CHARSET_FAMILY - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_charsToUChars(const char *cs, UChar *us, int32_t length); - -/** - * Convert UChar characters to char characters. - * This utility function is useful only for "invariant characters" - * that can be encoded in the platform default encoding. - * They are a small, constant subset of the encoding and include - * just the latin letters, digits, and some punctuation. - * For details, see U_CHARSET_FAMILY. - * - * @param us Input string, points to length - * Unicode characters that can be encoded with the - * codepage-invariant subset of the platform encoding. - * @param cs Output string, points to memory for length - * character bytes. - * @param length The number of characters to convert; this may - * include the terminating NUL. - * - * @see U_CHARSET_FAMILY - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_UCharsToChars(const UChar *us, char *cs, int32_t length); - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/rbbi.h b/deps/node/deps/icu-small/source/common/unicode/rbbi.h deleted file mode 100644 index 47abd554..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/rbbi.h +++ /dev/null @@ -1,699 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -*************************************************************************** -* Copyright (C) 1999-2016 International Business Machines Corporation * -* and others. All rights reserved. * -*************************************************************************** - -********************************************************************** -* Date Name Description -* 10/22/99 alan Creation. -* 11/11/99 rgillam Complete port from Java. -********************************************************************** -*/ - -#ifndef RBBI_H -#define RBBI_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Rule Based Break Iterator - */ - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/brkiter.h" -#include "unicode/udata.h" -#include "unicode/parseerr.h" -#include "unicode/schriter.h" - -U_NAMESPACE_BEGIN - -/** @internal */ -class LanguageBreakEngine; -struct RBBIDataHeader; -class RBBIDataWrapper; -class UnhandledEngine; -class UStack; - -/** - * - * A subclass of BreakIterator whose behavior is specified using a list of rules. - *

Instances of this class are most commonly created by the factory methods of - * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc., - * and then used via the abstract API in class BreakIterator

- * - *

See the ICU User Guide for information on Break Iterator Rules.

- * - *

This class is not intended to be subclassed.

- */ -class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { - -private: - /** - * The UText through which this BreakIterator accesses the text - * @internal (private) - */ - UText fText; - -#ifndef U_HIDE_INTERNAL_API -public: -#endif /* U_HIDE_INTERNAL_API */ - /** - * The rule data for this BreakIterator instance. - * Not for general use; Public only for testing purposes. - * @internal - */ - RBBIDataWrapper *fData; -private: - - /** - * The current position of the iterator. Pinned, 0 < fPosition <= text.length. - * Never has the value UBRK_DONE (-1). - */ - int32_t fPosition; - - /** - * TODO: - */ - int32_t fRuleStatusIndex; - - /** - * Cache of previously determined boundary positions. - */ - class BreakCache; - BreakCache *fBreakCache; - - /** - * Cache of boundary positions within a region of text that has been - * sub-divided by dictionary based breaking. - */ - class DictionaryCache; - DictionaryCache *fDictionaryCache; - - /** - * - * If present, UStack of LanguageBreakEngine objects that might handle - * dictionary characters. Searched from top to bottom to find an object to - * handle a given character. - * @internal (private) - */ - UStack *fLanguageBreakEngines; - - /** - * - * If present, the special LanguageBreakEngine used for handling - * characters that are in the dictionary set, but not handled by any - * LangugageBreakEngine. - * @internal (private) - */ - UnhandledEngine *fUnhandledBreakEngine; - - /** - * Counter for the number of characters encountered with the "dictionary" - * flag set. - * @internal (private) - */ - uint32_t fDictionaryCharCount; - - /** - * A character iterator that refers to the same text as the UText, above. - * Only included for compatibility with old API, which was based on CharacterIterators. - * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. - */ - CharacterIterator *fCharIter; - - /** - * When the input text is provided by a UnicodeString, this will point to - * a characterIterator that wraps that data. Needed only for the - * implementation of getText(), a backwards compatibility issue. - */ - StringCharacterIterator fSCharIter; - - /** - * True when iteration has run off the end, and iterator functions should return UBRK_DONE. - */ - UBool fDone; - - //======================================================================= - // constructors - //======================================================================= - - /** - * Constructor from a flattened set of RBBI data in malloced memory. - * RulesBasedBreakIterators built from a custom set of rules - * are created via this constructor; the rules are compiled - * into memory, then the break iterator is constructed here. - * - * The break iterator adopts the memory, and will - * free it when done. - * @internal (private) - */ - RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); - - /** @internal */ - friend class RBBIRuleBuilder; - /** @internal */ - friend class BreakIterator; - -public: - - /** Default constructor. Creates an empty shell of an iterator, with no - * rules or text to iterate over. Object can subsequently be assigned to. - * @stable ICU 2.2 - */ - RuleBasedBreakIterator(); - - /** - * Copy constructor. Will produce a break iterator with the same behavior, - * and which iterates over the same text, as the one passed in. - * @param that The RuleBasedBreakIterator passed to be copied - * @stable ICU 2.0 - */ - RuleBasedBreakIterator(const RuleBasedBreakIterator& that); - - /** - * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. - * @param rules The break rules to be used. - * @param parseError In the event of a syntax error in the rules, provides the location - * within the rules of the problem. - * @param status Information on any errors encountered. - * @stable ICU 2.2 - */ - RuleBasedBreakIterator( const UnicodeString &rules, - UParseError &parseError, - UErrorCode &status); - - /** - * Construct a RuleBasedBreakIterator from a set of precompiled binary rules. - * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). - * Construction of a break iterator in this way is substantially faster than - * construction from source rules. - * - * Ownership of the storage containing the compiled rules remains with the - * caller of this function. The compiled rules must not be modified or - * deleted during the life of the break iterator. - * - * The compiled rules are not compatible across different major versions of ICU. - * The compiled rules are compatible only between machines with the same - * byte ordering (little or big endian) and the same base character set family - * (ASCII or EBCDIC). - * - * @see #getBinaryRules - * @param compiledRules A pointer to the compiled break rules to be used. - * @param ruleLength The length of the compiled break rules, in bytes. This - * corresponds to the length value produced by getBinaryRules(). - * @param status Information on any errors encountered, including invalid - * binary rules. - * @stable ICU 4.8 - */ - RuleBasedBreakIterator(const uint8_t *compiledRules, - uint32_t ruleLength, - UErrorCode &status); - - /** - * This constructor uses the udata interface to create a BreakIterator - * whose internal tables live in a memory-mapped file. "image" is an - * ICU UDataMemory handle for the pre-compiled break iterator tables. - * @param image handle to the memory image for the break iterator data. - * Ownership of the UDataMemory handle passes to the Break Iterator, - * which will be responsible for closing it when it is no longer needed. - * @param status Information on any errors encountered. - * @see udata_open - * @see #getBinaryRules - * @stable ICU 2.8 - */ - RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); - - /** - * Destructor - * @stable ICU 2.0 - */ - virtual ~RuleBasedBreakIterator(); - - /** - * Assignment operator. Sets this iterator to have the same behavior, - * and iterate over the same text, as the one passed in. - * @param that The RuleBasedBreakItertor passed in - * @return the newly created RuleBasedBreakIterator - * @stable ICU 2.0 - */ - RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); - - /** - * Equality operator. Returns TRUE if both BreakIterators are of the - * same class, have the same behavior, and iterate over the same text. - * @param that The BreakIterator to be compared for equality - * @return TRUE if both BreakIterators are of the - * same class, have the same behavior, and iterate over the same text. - * @stable ICU 2.0 - */ - virtual UBool operator==(const BreakIterator& that) const; - - /** - * Not-equal operator. If operator== returns TRUE, this returns FALSE, - * and vice versa. - * @param that The BreakIterator to be compared for inequality - * @return TRUE if both BreakIterators are not same. - * @stable ICU 2.0 - */ - UBool operator!=(const BreakIterator& that) const; - - /** - * Returns a newly-constructed RuleBasedBreakIterator with the same - * behavior, and iterating over the same text, as this one. - * Differs from the copy constructor in that it is polymorphic, and - * will correctly clone (copy) a derived class. - * clone() is thread safe. Multiple threads may simultaneously - * clone the same source break iterator. - * @return a newly-constructed RuleBasedBreakIterator - * @stable ICU 2.0 - */ - virtual BreakIterator* clone() const; - - /** - * Compute a hash code for this BreakIterator - * @return A hash code - * @stable ICU 2.0 - */ - virtual int32_t hashCode(void) const; - - /** - * Returns the description used to create this iterator - * @return the description used to create this iterator - * @stable ICU 2.0 - */ - virtual const UnicodeString& getRules(void) const; - - //======================================================================= - // BreakIterator overrides - //======================================================================= - - /** - *

- * Return a CharacterIterator over the text being analyzed. - * The returned character iterator is owned by the break iterator, and must - * not be deleted by the caller. Repeated calls to this function may - * return the same CharacterIterator. - *

- *

- * The returned character iterator must not be used concurrently with - * the break iterator. If concurrent operation is needed, clone the - * returned character iterator first and operate on the clone. - *

- *

- * When the break iterator is operating on text supplied via a UText, - * this function will fail. Lacking any way to signal failures, it - * returns an CharacterIterator containing no text. - * The function getUText() provides similar functionality, - * is reliable, and is more efficient. - *

- * - * TODO: deprecate this function? - * - * @return An iterator over the text being analyzed. - * @stable ICU 2.0 - */ - virtual CharacterIterator& getText(void) const; - - - /** - * Get a UText for the text being analyzed. - * The returned UText is a shallow clone of the UText used internally - * by the break iterator implementation. It can safely be used to - * access the text without impacting any break iterator operations, - * but the underlying text itself must not be altered. - * - * @param fillIn A UText to be filled in. If NULL, a new UText will be - * allocated to hold the result. - * @param status receives any error codes. - * @return The current UText for this break iterator. If an input - * UText was provided, it will always be returned. - * @stable ICU 3.4 - */ - virtual UText *getUText(UText *fillIn, UErrorCode &status) const; - - /** - * Set the iterator to analyze a new piece of text. This function resets - * the current iteration position to the beginning of the text. - * @param newText An iterator over the text to analyze. The BreakIterator - * takes ownership of the character iterator. The caller MUST NOT delete it! - * @stable ICU 2.0 - */ - virtual void adoptText(CharacterIterator* newText); - - /** - * Set the iterator to analyze a new piece of text. This function resets - * the current iteration position to the beginning of the text. - * - * The BreakIterator will retain a reference to the supplied string. - * The caller must not modify or delete the text while the BreakIterator - * retains the reference. - * - * @param newText The text to analyze. - * @stable ICU 2.0 - */ - virtual void setText(const UnicodeString& newText); - - /** - * Reset the break iterator to operate over the text represented by - * the UText. The iterator position is reset to the start. - * - * This function makes a shallow clone of the supplied UText. This means - * that the caller is free to immediately close or otherwise reuse the - * Utext that was passed as a parameter, but that the underlying text itself - * must not be altered while being referenced by the break iterator. - * - * @param text The UText used to change the text. - * @param status Receives any error codes. - * @stable ICU 3.4 - */ - virtual void setText(UText *text, UErrorCode &status); - - /** - * Sets the current iteration position to the beginning of the text, position zero. - * @return The offset of the beginning of the text, zero. - * @stable ICU 2.0 - */ - virtual int32_t first(void); - - /** - * Sets the current iteration position to the end of the text. - * @return The text's past-the-end offset. - * @stable ICU 2.0 - */ - virtual int32_t last(void); - - /** - * Advances the iterator either forward or backward the specified number of steps. - * Negative values move backward, and positive values move forward. This is - * equivalent to repeatedly calling next() or previous(). - * @param n The number of steps to move. The sign indicates the direction - * (negative is backwards, and positive is forwards). - * @return The character offset of the boundary position n boundaries away from - * the current one. - * @stable ICU 2.0 - */ - virtual int32_t next(int32_t n); - - /** - * Advances the iterator to the next boundary position. - * @return The position of the first boundary after this one. - * @stable ICU 2.0 - */ - virtual int32_t next(void); - - /** - * Moves the iterator backwards, to the last boundary preceding this one. - * @return The position of the last boundary position preceding this one. - * @stable ICU 2.0 - */ - virtual int32_t previous(void); - - /** - * Sets the iterator to refer to the first boundary position following - * the specified position. - * @param offset The position from which to begin searching for a break position. - * @return The position of the first break after the current position. - * @stable ICU 2.0 - */ - virtual int32_t following(int32_t offset); - - /** - * Sets the iterator to refer to the last boundary position before the - * specified position. - * @param offset The position to begin searching for a break from. - * @return The position of the last boundary before the starting position. - * @stable ICU 2.0 - */ - virtual int32_t preceding(int32_t offset); - - /** - * Returns true if the specified position is a boundary position. As a side - * effect, leaves the iterator pointing to the first boundary position at - * or after "offset". - * @param offset the offset to check. - * @return True if "offset" is a boundary position. - * @stable ICU 2.0 - */ - virtual UBool isBoundary(int32_t offset); - - /** - * Returns the current iteration position. Note that UBRK_DONE is never - * returned from this function; if iteration has run to the end of a - * string, current() will return the length of the string while - * next() will return UBRK_DONE). - * @return The current iteration position. - * @stable ICU 2.0 - */ - virtual int32_t current(void) const; - - - /** - * Return the status tag from the break rule that determined the boundary at - * the current iteration position. For break rules that do not specify a - * status, a default value of 0 is returned. If more than one break rule - * would cause a boundary to be located at some position in the text, - * the numerically largest of the applicable status values is returned. - *

- * Of the standard types of ICU break iterators, only word break and - * line break provide status values. The values are defined in - * the header file ubrk.h. For Word breaks, the status allows distinguishing between words - * that contain alphabetic letters, "words" that appear to be numbers, - * punctuation and spaces, words containing ideographic characters, and - * more. For Line Break, the status distinguishes between hard (mandatory) breaks - * and soft (potential) break positions. - *

- * getRuleStatus() can be called after obtaining a boundary - * position from next(), previous(), or - * any other break iterator functions that returns a boundary position. - *

- * Note that getRuleStatus() returns the value corresponding to - * current() index even after next() has returned DONE. - *

- * When creating custom break rules, one is free to define whatever - * status values may be convenient for the application. - *

- * @return the status from the break rule that determined the boundary - * at the current iteration position. - * - * @see UWordBreak - * @stable ICU 2.2 - */ - virtual int32_t getRuleStatus() const; - - /** - * Get the status (tag) values from the break rule(s) that determined the boundary - * at the current iteration position. - *

- * The returned status value(s) are stored into an array provided by the caller. - * The values are stored in sorted (ascending) order. - * If the capacity of the output array is insufficient to hold the data, - * the output will be truncated to the available length, and a - * U_BUFFER_OVERFLOW_ERROR will be signaled. - * - * @param fillInVec an array to be filled in with the status values. - * @param capacity the length of the supplied vector. A length of zero causes - * the function to return the number of status values, in the - * normal way, without attempting to store any values. - * @param status receives error codes. - * @return The number of rule status values from the rules that determined - * the boundary at the current iteration position. - * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value - * is the total number of status values that were available, - * not the reduced number that were actually returned. - * @see getRuleStatus - * @stable ICU 3.0 - */ - virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); - - /** - * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. - * This method is to implement a simple version of RTTI, since not all - * C++ compilers support genuine RTTI. Polymorphic operator==() and - * clone() methods call this method. - * - * @return The class ID for this object. All objects of a - * given class have the same class ID. Objects of - * other classes have different class IDs. - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const; - - /** - * Returns the class ID for this class. This is useful only for - * comparing to a return value from getDynamicClassID(). For example: - * - * Base* polymorphic_pointer = createPolymorphicObject(); - * if (polymorphic_pointer->getDynamicClassID() == - * Derived::getStaticClassID()) ... - * - * @return The class ID for all objects of this class. - * @stable ICU 2.0 - */ - static UClassID U_EXPORT2 getStaticClassID(void); - - /** - * Deprecated functionality. Use clone() instead. - * - * Create a clone (copy) of this break iterator in memory provided - * by the caller. The idea is to increase performance by avoiding - * a storage allocation. Use of this function is NOT RECOMMENDED. - * Performance gains are minimal, and correct buffer management is - * tricky. Use clone() instead. - * - * @param stackBuffer The pointer to the memory into which the cloned object - * should be placed. If NULL, allocate heap memory - * for the cloned object. - * @param BufferSize The size of the buffer. If zero, return the required - * buffer size, but do not clone the object. If the - * size was too small (but not zero), allocate heap - * storage for the cloned object. - * - * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be - * returned if the provided buffer was too small, and - * the clone was therefore put on the heap. - * - * @return Pointer to the clone object. This may differ from the stackBuffer - * address if the byte alignment of the stack buffer was not suitable - * or if the stackBuffer was too small to hold the clone. - * @deprecated ICU 52. Use clone() instead. - */ - virtual BreakIterator * createBufferClone(void *stackBuffer, - int32_t &BufferSize, - UErrorCode &status); - - - /** - * Return the binary form of compiled break rules, - * which can then be used to create a new break iterator at some - * time in the future. Creating a break iterator from pre-compiled rules - * is much faster than building one from the source form of the - * break rules. - * - * The binary data can only be used with the same version of ICU - * and on the same platform type (processor endian-ness) - * - * @param length Returns the length of the binary data. (Out parameter.) - * - * @return A pointer to the binary (compiled) rule data. The storage - * belongs to the RulesBasedBreakIterator object, not the - * caller, and must not be modified or deleted. - * @stable ICU 4.8 - */ - virtual const uint8_t *getBinaryRules(uint32_t &length); - - /** - * Set the subject text string upon which the break iterator is operating - * without changing any other aspect of the matching state. - * The new and previous text strings must have the same content. - * - * This function is intended for use in environments where ICU is operating on - * strings that may move around in memory. It provides a mechanism for notifying - * ICU that the string has been relocated, and providing a new UText to access the - * string in its new position. - * - * Note that the break iterator implementation never copies the underlying text - * of a string being processed, but always operates directly on the original text - * provided by the user. Refreshing simply drops the references to the old text - * and replaces them with references to the new. - * - * Caution: this function is normally used only by very specialized, - * system-level code. One example use case is with garbage collection that moves - * the text in memory. - * - * @param input The new (moved) text string. - * @param status Receives errors detected by this function. - * @return *this - * - * @stable ICU 49 - */ - virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status); - - -private: - //======================================================================= - // implementation - //======================================================================= - /** - * Dumps caches and performs other actions associated with a complete change - * in text or iteration position. - * @internal (private) - */ - void reset(void); - - /** - * Common initialization function, used by constructors and bufferClone. - * @internal (private) - */ - void init(UErrorCode &status); - - /** - * Iterate backwards from an arbitrary position in the input text using the - * synthesized Safe Reverse rules. - * This locates a "Safe Position" from which the forward break rules - * will operate correctly. A Safe Position is not necessarily a boundary itself. - * - * @param fromPosition the position in the input text to begin the iteration. - * @internal (private) - */ - int32_t handleSafePrevious(int32_t fromPosition); - - /** - * Find a rule-based boundary by running the state machine. - * Input - * fPosition, the position in the text to begin from. - * Output - * fPosition: the boundary following the starting position. - * fDictionaryCharCount the number of dictionary characters encountered. - * If > 0, the segment will be further subdivided - * fRuleStatusIndex Info from the state table indicating which rules caused the boundary. - * - * @internal (private) - */ - int32_t handleNext(); - - - /** - * This function returns the appropriate LanguageBreakEngine for a - * given character c. - * @param c A character in the dictionary set - * @internal (private) - */ - const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); - - public: -#ifndef U_HIDE_INTERNAL_API - /** - * Debugging function only. - * @internal - */ - void dumpCache(); - - /** - * Debugging function only. - * @internal - */ - void dumpTables(); - -#endif /* U_HIDE_INTERNAL_API */ -}; - -//------------------------------------------------------------------------------ -// -// Inline Functions Definitions ... -// -//------------------------------------------------------------------------------ - -inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { - return !operator==(that); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/rep.h b/deps/node/deps/icu-small/source/common/unicode/rep.h deleted file mode 100644 index b1023a37..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/rep.h +++ /dev/null @@ -1,263 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -************************************************************************** -* Copyright (C) 1999-2012, International Business Machines Corporation and -* others. All Rights Reserved. -************************************************************************** -* Date Name Description -* 11/17/99 aliu Creation. Ported from java. Modified to -* match current UnicodeString API. Forced -* to use name "handleReplaceBetween" because -* of existing methods in UnicodeString. -************************************************************************** -*/ - -#ifndef REP_H -#define REP_H - -#include "unicode/uobject.h" - -/** - * \file - * \brief C++ API: Replaceable String - */ - -U_NAMESPACE_BEGIN - -class UnicodeString; - -/** - * Replaceable is an abstract base class representing a - * string of characters that supports the replacement of a range of - * itself with a new string of characters. It is used by APIs that - * change a piece of text while retaining metadata. Metadata is data - * other than the Unicode characters returned by char32At(). One - * example of metadata is style attributes; another is an edit - * history, marking each character with an author and revision number. - * - *

An implicit aspect of the Replaceable API is that - * during a replace operation, new characters take on the metadata of - * the old characters. For example, if the string "the bold - * font" has range (4, 8) replaced with "strong", then it becomes "the - * strong font". - * - *

Replaceable specifies ranges using a start - * offset and a limit offset. The range of characters thus specified - * includes the characters at offset start..limit-1. That is, the - * start offset is inclusive, and the limit offset is exclusive. - * - *

Replaceable also includes API to access characters - * in the string: length(), charAt(), - * char32At(), and extractBetween(). - * - *

For a subclass to support metadata, typical behavior of - * replace() is the following: - *

    - *
  • Set the metadata of the new text to the metadata of the first - * character replaced
  • - *
  • If no characters are replaced, use the metadata of the - * previous character
  • - *
  • If there is no previous character (i.e. start == 0), use the - * following character
  • - *
  • If there is no following character (i.e. the replaceable was - * empty), use default metadata.
    - *
  • If the code point U+FFFF is seen, it should be interpreted as - * a special marker having no metadata
  • - *
  • - *
- * If this is not the behavior, the subclass should document any differences. - * @author Alan Liu - * @stable ICU 2.0 - */ -class U_COMMON_API Replaceable : public UObject { - -public: - /** - * Destructor. - * @stable ICU 2.0 - */ - virtual ~Replaceable(); - - /** - * Returns the number of 16-bit code units in the text. - * @return number of 16-bit code units in text - * @stable ICU 1.8 - */ - inline int32_t length() const; - - /** - * Returns the 16-bit code unit at the given offset into the text. - * @param offset an integer between 0 and length()-1 - * inclusive - * @return 16-bit code unit of text at given offset - * @stable ICU 1.8 - */ - inline char16_t charAt(int32_t offset) const; - - /** - * Returns the 32-bit code point at the given 16-bit offset into - * the text. This assumes the text is stored as 16-bit code units - * with surrogate pairs intermixed. If the offset of a leading or - * trailing code unit of a surrogate pair is given, return the - * code point of the surrogate pair. - * - * @param offset an integer between 0 and length()-1 - * inclusive - * @return 32-bit code point of text at given offset - * @stable ICU 1.8 - */ - inline UChar32 char32At(int32_t offset) const; - - /** - * Copies characters in the range [start, limit) - * into the UnicodeString target. - * @param start offset of first character which will be copied - * @param limit offset immediately following the last character to - * be copied - * @param target UnicodeString into which to copy characters. - * @return A reference to target - * @stable ICU 2.1 - */ - virtual void extractBetween(int32_t start, - int32_t limit, - UnicodeString& target) const = 0; - - /** - * Replaces a substring of this object with the given text. If the - * characters being replaced have metadata, the new characters - * that replace them should be given the same metadata. - * - *

Subclasses must ensure that if the text between start and - * limit is equal to the replacement text, that replace has no - * effect. That is, any metadata - * should be unaffected. In addition, subclasses are encouraged to - * check for initial and trailing identical characters, and make a - * smaller replacement if possible. This will preserve as much - * metadata as possible. - * @param start the beginning index, inclusive; 0 <= start - * <= limit. - * @param limit the ending index, exclusive; start <= limit - * <= length(). - * @param text the text to replace characters start - * to limit - 1 - * @stable ICU 2.0 - */ - virtual void handleReplaceBetween(int32_t start, - int32_t limit, - const UnicodeString& text) = 0; - // Note: All other methods in this class take the names of - // existing UnicodeString methods. This method is the exception. - // It is named differently because all replace methods of - // UnicodeString return a UnicodeString&. The 'between' is - // required in order to conform to the UnicodeString naming - // convention; API taking start/length are named , and - // those taking start/limit are named . The - // 'handle' is added because 'replaceBetween' and - // 'doReplaceBetween' are already taken. - - /** - * Copies a substring of this object, retaining metadata. - * This method is used to duplicate or reorder substrings. - * The destination index must not overlap the source range. - * - * @param start the beginning index, inclusive; 0 <= start <= - * limit. - * @param limit the ending index, exclusive; start <= limit <= - * length(). - * @param dest the destination index. The characters from - * start..limit-1 will be copied to dest. - * Implementations of this method may assume that dest <= start || - * dest >= limit. - * @stable ICU 2.0 - */ - virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0; - - /** - * Returns true if this object contains metadata. If a - * Replaceable object has metadata, calls to the Replaceable API - * must be made so as to preserve metadata. If it does not, calls - * to the Replaceable API may be optimized to improve performance. - * The default implementation returns true. - * @return true if this object contains metadata - * @stable ICU 2.2 - */ - virtual UBool hasMetaData() const; - - /** - * Clone this object, an instance of a subclass of Replaceable. - * Clones can be used concurrently in multiple threads. - * If a subclass does not implement clone(), or if an error occurs, - * then NULL is returned. - * The clone functions in all subclasses return a pointer to a Replaceable - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see getDynamicClassID - * @stable ICU 2.6 - */ - virtual Replaceable *clone() const; - -protected: - - /** - * Default constructor. - * @stable ICU 2.4 - */ - inline Replaceable(); - - /* - * Assignment operator not declared. The compiler will provide one - * which does nothing since this class does not contain any data members. - * API/code coverage may show the assignment operator as present and - * untested - ignore. - * Subclasses need this assignment operator if they use compiler-provided - * assignment operators of their own. An alternative to not declaring one - * here would be to declare and empty-implement a protected or public one. - Replaceable &Replaceable::operator=(const Replaceable &); - */ - - /** - * Virtual version of length(). - * @stable ICU 2.4 - */ - virtual int32_t getLength() const = 0; - - /** - * Virtual version of charAt(). - * @stable ICU 2.4 - */ - virtual char16_t getCharAt(int32_t offset) const = 0; - - /** - * Virtual version of char32At(). - * @stable ICU 2.4 - */ - virtual UChar32 getChar32At(int32_t offset) const = 0; -}; - -inline Replaceable::Replaceable() {} - -inline int32_t -Replaceable::length() const { - return getLength(); -} - -inline char16_t -Replaceable::charAt(int32_t offset) const { - return getCharAt(offset); -} - -inline UChar32 -Replaceable::char32At(int32_t offset) const { - return getChar32At(offset); -} - -// There is no rep.cpp, see unistr.cpp for Replaceable function implementations. - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/resbund.h b/deps/node/deps/icu-small/source/common/unicode/resbund.h deleted file mode 100644 index ab0b60bb..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/resbund.h +++ /dev/null @@ -1,492 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1996-2013, International Business Machines Corporation -* and others. All Rights Reserved. -* -****************************************************************************** -* -* File resbund.h -* -* CREATED BY -* Richard Gillam -* -* Modification History: -* -* Date Name Description -* 2/5/97 aliu Added scanForLocaleInFile. Added -* constructor which attempts to read resource bundle -* from a specific file, without searching other files. -* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed -* infinite loops in scanForFile and scanForLocale. -* Modified getRawResourceData to not delete storage -* in localeData and resourceData which it doesn't own. -* Added Mac compatibility #ifdefs for tellp() and -* ios::nocreate. -* 2/18/97 helena Updated with 100% documentation coverage. -* 3/13/97 aliu Rewrote to load in entire resource bundle and store -* it as a Hashtable of ResourceBundleData objects. -* Added state table to govern parsing of files. -* Modified to load locale index out of new file -* distinct from default.txt. -* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone -* data. Added support for custom file suffixes. Again, -* needed to support timezone data. -* 4/7/97 aliu Cleaned up. -* 03/02/99 stephen Removed dependency on FILE*. -* 03/29/99 helena Merged Bertrand and Stephen's changes. -* 06/11/99 stephen Removed parsing of .txt files. -* Reworked to use new binary format. -* Cleaned up. -* 06/14/99 stephen Removed methods taking a filename suffix. -* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID -****************************************************************************** -*/ - -#ifndef RESBUND_H -#define RESBUND_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/ures.h" -#include "unicode/unistr.h" -#include "unicode/locid.h" - -/** - * \file - * \brief C++ API: Resource Bundle - */ - -U_NAMESPACE_BEGIN - -/** - * A class representing a collection of resource information pertaining to a given - * locale. A resource bundle provides a way of accessing locale- specfic information in - * a data file. You create a resource bundle that manages the resources for a given - * locale and then ask it for individual resources. - *

- * Resource bundles in ICU4C are currently defined using text files which conform to the following - * BNF definition. - * More on resource bundle concepts and syntax can be found in the - * Users Guide. - *

- * - * The ResourceBundle class is not suitable for subclassing. - * - * @stable ICU 2.0 - */ -class U_COMMON_API ResourceBundle : public UObject { -public: - /** - * Constructor - * - * @param packageName The packageName and locale together point to an ICU udata object, - * as defined by udata_open( packageName, "res", locale, err) - * or equivalent. Typically, packageName will refer to a (.dat) file, or to - * a package registered with udata_setAppData(). Using a full file or directory - * pathname for packageName is deprecated. - * @param locale This is the locale this resource bundle is for. To get resources - * for the French locale, for example, you would create a - * ResourceBundle passing Locale::FRENCH for the "locale" parameter, - * and all subsequent calls to that resource bundle will return - * resources that pertain to the French locale. If the caller doesn't - * pass a locale parameter, the default locale for the system (as - * returned by Locale::getDefault()) will be used. - * @param err The Error Code. - * The UErrorCode& err parameter is used to return status information to the user. To - * check whether the construction succeeded or not, you should check the value of - * U_SUCCESS(err). If you wish more detailed information, you can check for - * informational error results which still indicate success. U_USING_FALLBACK_WARNING - * indicates that a fall back locale was used. For example, 'de_CH' was requested, - * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that - * the default locale data was used; neither the requested locale nor any of its - * fall back locales could be found. - * @stable ICU 2.0 - */ - ResourceBundle(const UnicodeString& packageName, - const Locale& locale, - UErrorCode& err); - - /** - * Construct a resource bundle for the default bundle in the specified package. - * - * @param packageName The packageName and locale together point to an ICU udata object, - * as defined by udata_open( packageName, "res", locale, err) - * or equivalent. Typically, packageName will refer to a (.dat) file, or to - * a package registered with udata_setAppData(). Using a full file or directory - * pathname for packageName is deprecated. - * @param err A UErrorCode value - * @stable ICU 2.0 - */ - ResourceBundle(const UnicodeString& packageName, - UErrorCode& err); - - /** - * Construct a resource bundle for the ICU default bundle. - * - * @param err A UErrorCode value - * @stable ICU 2.0 - */ - ResourceBundle(UErrorCode &err); - - /** - * Standard constructor, constructs a resource bundle for the locale-specific - * bundle in the specified package. - * - * @param packageName The packageName and locale together point to an ICU udata object, - * as defined by udata_open( packageName, "res", locale, err) - * or equivalent. Typically, packageName will refer to a (.dat) file, or to - * a package registered with udata_setAppData(). Using a full file or directory - * pathname for packageName is deprecated. - * NULL is used to refer to ICU data. - * @param locale The locale for which to open a resource bundle. - * @param err A UErrorCode value - * @stable ICU 2.0 - */ - ResourceBundle(const char* packageName, - const Locale& locale, - UErrorCode& err); - - /** - * Copy constructor. - * - * @param original The resource bundle to copy. - * @stable ICU 2.0 - */ - ResourceBundle(const ResourceBundle &original); - - /** - * Constructor from a C UResourceBundle. The resource bundle is - * copied and not adopted. ures_close will still need to be used on the - * original resource bundle. - * - * @param res A pointer to the C resource bundle. - * @param status A UErrorCode value. - * @stable ICU 2.0 - */ - ResourceBundle(UResourceBundle *res, - UErrorCode &status); - - /** - * Assignment operator. - * - * @param other The resource bundle to copy. - * @stable ICU 2.0 - */ - ResourceBundle& - operator=(const ResourceBundle& other); - - /** Destructor. - * @stable ICU 2.0 - */ - virtual ~ResourceBundle(); - - /** - * Clone this object. - * Clones can be used concurrently in multiple threads. - * If an error occurs, then NULL is returned. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see getDynamicClassID - * @stable ICU 2.8 - */ - ResourceBundle *clone() const; - - /** - * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is - * the number of child resources. - * @warning Integer array is treated as a scalar type. There are no - * APIs to access individual members of an integer array. It - * is always returned as a whole. - * - * @return number of resources in a given resource. - * @stable ICU 2.0 - */ - int32_t - getSize(void) const; - - /** - * returns a string from a string resource type - * - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a warning - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file. - * @stable ICU 2.0 - */ - UnicodeString - getString(UErrorCode& status) const; - - /** - * returns a binary data from a resource. Can be used at most primitive resource types (binaries, - * strings, ints) - * - * @param len fills in the length of resulting byte chunk - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a warning - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file. - * @stable ICU 2.0 - */ - const uint8_t* - getBinary(int32_t& len, UErrorCode& status) const; - - - /** - * returns an integer vector from a resource. - * - * @param len fills in the length of resulting integer vector - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a warning - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a vector of integers that lives in a memory mapped/DLL file. - * @stable ICU 2.0 - */ - const int32_t* - getIntVector(int32_t& len, UErrorCode& status) const; - - /** - * returns an unsigned integer from a resource. - * This integer is originally 28 bits. - * - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a warning - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return an unsigned integer value - * @stable ICU 2.0 - */ - uint32_t - getUInt(UErrorCode& status) const; - - /** - * returns a signed integer from a resource. - * This integer is originally 28 bit and the sign gets propagated. - * - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a warning - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a signed integer value - * @stable ICU 2.0 - */ - int32_t - getInt(UErrorCode& status) const; - - /** - * Checks whether the resource has another element to iterate over. - * - * @return TRUE if there are more elements, FALSE if there is no more elements - * @stable ICU 2.0 - */ - UBool - hasNext(void) const; - - /** - * Resets the internal context of a resource so that iteration starts from the first element. - * - * @stable ICU 2.0 - */ - void - resetIterator(void); - - /** - * Returns the key associated with this resource. Not all the resources have a key - only - * those that are members of a table. - * - * @return a key associated to this resource, or NULL if it doesn't have a key - * @stable ICU 2.0 - */ - const char* - getKey(void) const; - - /** - * Gets the locale ID of the resource bundle as a string. - * Same as getLocale().getName() . - * - * @return the locale ID of the resource bundle as a string - * @stable ICU 2.0 - */ - const char* - getName(void) const; - - - /** - * Returns the type of a resource. Available types are defined in enum UResType - * - * @return type of the given resource. - * @stable ICU 2.0 - */ - UResType - getType(void) const; - - /** - * Returns the next resource in a given resource or NULL if there are no more resources - * - * @param status fills in the outgoing error code - * @return ResourceBundle object. - * @stable ICU 2.0 - */ - ResourceBundle - getNext(UErrorCode& status); - - /** - * Returns the next string in a resource or NULL if there are no more resources - * to iterate over. - * - * @param status fills in the outgoing error code - * @return an UnicodeString object. - * @stable ICU 2.0 - */ - UnicodeString - getNextString(UErrorCode& status); - - /** - * Returns the next string in a resource or NULL if there are no more resources - * to iterate over. - * - * @param key fill in for key associated with this string - * @param status fills in the outgoing error code - * @return an UnicodeString object. - * @stable ICU 2.0 - */ - UnicodeString - getNextString(const char ** key, - UErrorCode& status); - - /** - * Returns the resource in a resource at the specified index. - * - * @param index an index to the wanted resource. - * @param status fills in the outgoing error code - * @return ResourceBundle object. If there is an error, resource is invalid. - * @stable ICU 2.0 - */ - ResourceBundle - get(int32_t index, - UErrorCode& status) const; - - /** - * Returns the string in a given resource at the specified index. - * - * @param index an index to the wanted string. - * @param status fills in the outgoing error code - * @return an UnicodeString object. If there is an error, string is bogus - * @stable ICU 2.0 - */ - UnicodeString - getStringEx(int32_t index, - UErrorCode& status) const; - - /** - * Returns a resource in a resource that has a given key. This procedure works only with table - * resources. - * - * @param key a key associated with the wanted resource - * @param status fills in the outgoing error code. - * @return ResourceBundle object. If there is an error, resource is invalid. - * @stable ICU 2.0 - */ - ResourceBundle - get(const char* key, - UErrorCode& status) const; - - /** - * Returns a string in a resource that has a given key. This procedure works only with table - * resources. - * - * @param key a key associated with the wanted string - * @param status fills in the outgoing error code - * @return an UnicodeString object. If there is an error, string is bogus - * @stable ICU 2.0 - */ - UnicodeString - getStringEx(const char* key, - UErrorCode& status) const; - -#ifndef U_HIDE_DEPRECATED_API - /** - * Return the version number associated with this ResourceBundle as a string. Please - * use getVersion, as this method is going to be deprecated. - * - * @return A version number string as specified in the resource bundle or its parent. - * The caller does not own this string. - * @see getVersion - * @deprecated ICU 2.8 Use getVersion instead. - */ - const char* - getVersionNumber(void) const; -#endif /* U_HIDE_DEPRECATED_API */ - - /** - * Return the version number associated with this ResourceBundle as a UVersionInfo array. - * - * @param versionInfo A UVersionInfo array that is filled with the version number - * as specified in the resource bundle or its parent. - * @stable ICU 2.0 - */ - void - getVersion(UVersionInfo versionInfo) const; - -#ifndef U_HIDE_DEPRECATED_API - /** - * Return the Locale associated with this ResourceBundle. - * - * @return a Locale object - * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead. - */ - const Locale& - getLocale(void) const; -#endif /* U_HIDE_DEPRECATED_API */ - - /** - * Return the Locale associated with this ResourceBundle. - * @param type You can choose between requested, valid and actual - * locale. For description see the definition of - * ULocDataLocaleType in uloc.h - * @param status just for catching illegal arguments - * - * @return a Locale object - * @stable ICU 2.8 - */ - const Locale - getLocale(ULocDataLocaleType type, UErrorCode &status) const; -#ifndef U_HIDE_INTERNAL_API - /** - * This API implements multilevel fallback - * @internal - */ - ResourceBundle - getWithFallback(const char* key, UErrorCode& status); -#endif /* U_HIDE_INTERNAL_API */ - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - -private: - ResourceBundle(); // default constructor not implemented - - UResourceBundle *fResource; - void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error); - Locale *fLocale; -}; - -U_NAMESPACE_END -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/schriter.h b/deps/node/deps/icu-small/source/common/unicode/schriter.h deleted file mode 100644 index 1a12769e..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/schriter.h +++ /dev/null @@ -1,189 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2005, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File schriter.h -* -* Modification History: -* -* Date Name Description -* 05/05/99 stephen Cleaned up. -****************************************************************************** -*/ - -#ifndef SCHRITER_H -#define SCHRITER_H - -#include "unicode/utypes.h" -#include "unicode/chariter.h" -#include "unicode/uchriter.h" - -/** - * \file - * \brief C++ API: String Character Iterator - */ - -U_NAMESPACE_BEGIN -/** - * A concrete subclass of CharacterIterator that iterates over the - * characters (code units or code points) in a UnicodeString. - * It's possible not only to create an - * iterator that iterates over an entire UnicodeString, but also to - * create one that iterates over only a subrange of a UnicodeString - * (iterators over different subranges of the same UnicodeString don't - * compare equal). - * @see CharacterIterator - * @see ForwardCharacterIterator - * @stable ICU 2.0 - */ -class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator { -public: - /** - * Create an iterator over the UnicodeString referred to by "textStr". - * The UnicodeString object is copied. - * The iteration range is the whole string, and the starting position is 0. - * @param textStr The unicode string used to create an iterator - * @stable ICU 2.0 - */ - StringCharacterIterator(const UnicodeString& textStr); - - /** - * Create an iterator over the UnicodeString referred to by "textStr". - * The iteration range is the whole string, and the starting - * position is specified by "textPos". If "textPos" is outside the valid - * iteration range, the behavior of this object is undefined. - * @param textStr The unicode string used to create an iterator - * @param textPos The starting position of the iteration - * @stable ICU 2.0 - */ - StringCharacterIterator(const UnicodeString& textStr, - int32_t textPos); - - /** - * Create an iterator over the UnicodeString referred to by "textStr". - * The UnicodeString object is copied. - * The iteration range begins with the code unit specified by - * "textBegin" and ends with the code unit BEFORE the code unit specified - * by "textEnd". The starting position is specified by "textPos". If - * "textBegin" and "textEnd" don't form a valid range on "text" (i.e., - * textBegin >= textEnd or either is negative or greater than text.size()), - * or "textPos" is outside the range defined by "textBegin" and "textEnd", - * the behavior of this iterator is undefined. - * @param textStr The unicode string used to create the StringCharacterIterator - * @param textBegin The begin position of the iteration range - * @param textEnd The end position of the iteration range - * @param textPos The starting position of the iteration - * @stable ICU 2.0 - */ - StringCharacterIterator(const UnicodeString& textStr, - int32_t textBegin, - int32_t textEnd, - int32_t textPos); - - /** - * Copy constructor. The new iterator iterates over the same range - * of the same string as "that", and its initial position is the - * same as "that"'s current position. - * The UnicodeString object in "that" is copied. - * @param that The StringCharacterIterator to be copied - * @stable ICU 2.0 - */ - StringCharacterIterator(const StringCharacterIterator& that); - - /** - * Destructor. - * @stable ICU 2.0 - */ - virtual ~StringCharacterIterator(); - - /** - * Assignment operator. *this is altered to iterate over the same - * range of the same string as "that", and refers to the same - * character within that string as "that" does. - * @param that The object to be copied. - * @return the newly created object. - * @stable ICU 2.0 - */ - StringCharacterIterator& - operator=(const StringCharacterIterator& that); - - /** - * Returns true if the iterators iterate over the same range of the - * same string and are pointing at the same character. - * @param that The ForwardCharacterIterator to be compared for equality - * @return true if the iterators iterate over the same range of the - * same string and are pointing at the same character. - * @stable ICU 2.0 - */ - virtual UBool operator==(const ForwardCharacterIterator& that) const; - - /** - * Returns a new StringCharacterIterator referring to the same - * character in the same range of the same string as this one. The - * caller must delete the new iterator. - * @return the newly cloned object. - * @stable ICU 2.0 - */ - virtual CharacterIterator* clone(void) const; - - /** - * Sets the iterator to iterate over the provided string. - * @param newText The string to be iterated over - * @stable ICU 2.0 - */ - void setText(const UnicodeString& newText); - - /** - * Copies the UnicodeString under iteration into the UnicodeString - * referred to by "result". Even if this iterator iterates across - * only a part of this string, the whole string is copied. - * @param result Receives a copy of the text under iteration. - * @stable ICU 2.0 - */ - virtual void getText(UnicodeString& result); - - /** - * Return a class ID for this object (not really public) - * @return a class ID for this object. - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const; - - /** - * Return a class ID for this class (not really public) - * @return a class ID for this class - * @stable ICU 2.0 - */ - static UClassID U_EXPORT2 getStaticClassID(void); - -protected: - /** - * Default constructor, iteration over empty string. - * @stable ICU 2.0 - */ - StringCharacterIterator(); - - /** - * Sets the iterator to iterate over the provided string. - * @param newText The string to be iterated over - * @param newTextLength The length of the String - * @stable ICU 2.0 - */ - void setText(const char16_t* newText, int32_t newTextLength); - - /** - * Copy of the iterated string object. - * @stable ICU 2.0 - */ - UnicodeString text; - -}; - -U_NAMESPACE_END -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/simpleformatter.h b/deps/node/deps/icu-small/source/common/unicode/simpleformatter.h deleted file mode 100644 index 850949ca..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/simpleformatter.h +++ /dev/null @@ -1,303 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2014-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* simpleformatter.h -*/ - -#ifndef __SIMPLEFORMATTER_H__ -#define __SIMPLEFORMATTER_H__ - -/** - * \file - * \brief C++ API: Simple formatter, minimal subset of MessageFormat. - */ - -#include "unicode/utypes.h" -#include "unicode/unistr.h" - -U_NAMESPACE_BEGIN - -// Forward declaration: -namespace number { -namespace impl { -class SimpleModifier; -} -} - -/** - * Formats simple patterns like "{1} was born in {0}". - * Minimal subset of MessageFormat; fast, simple, minimal dependencies. - * Supports only numbered arguments with no type nor style parameters, - * and formats only string values. - * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior. - * - * Factory methods set error codes for syntax errors - * and for too few or too many arguments/placeholders. - * - * SimpleFormatter objects are thread-safe except for assignment and applying new patterns. - * - * Example: - *

- * UErrorCode errorCode = U_ZERO_ERROR;
- * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
- * UnicodeString result;
- *
- * // Output: "paul {born} in england"
- * fmt.format("england", "paul", result, errorCode);
- * 
- * - * This class is not intended for public subclassing. - * - * @see MessageFormat - * @see UMessagePatternApostropheMode - * @stable ICU 57 - */ -class U_COMMON_API SimpleFormatter U_FINAL : public UMemory { -public: - /** - * Default constructor. - * @stable ICU 57 - */ - SimpleFormatter() : compiledPattern((char16_t)0) {} - - /** - * Constructs a formatter from the pattern string. - * - * @param pattern The pattern string. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. - * @stable ICU 57 - */ - SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) { - applyPattern(pattern, errorCode); - } - - /** - * Constructs a formatter from the pattern string. - * The number of arguments checked against the given limits is the - * highest argument number plus one, not the number of occurrences of arguments. - * - * @param pattern The pattern string. - * @param min The pattern must have at least this many arguments. - * @param max The pattern must have at most this many arguments. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and - * too few or too many arguments. - * @stable ICU 57 - */ - SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max, - UErrorCode &errorCode) { - applyPatternMinMaxArguments(pattern, min, max, errorCode); - } - - /** - * Copy constructor. - * @stable ICU 57 - */ - SimpleFormatter(const SimpleFormatter& other) - : compiledPattern(other.compiledPattern) {} - - /** - * Assignment operator. - * @stable ICU 57 - */ - SimpleFormatter &operator=(const SimpleFormatter& other); - - /** - * Destructor. - * @stable ICU 57 - */ - ~SimpleFormatter(); - - /** - * Changes this object according to the new pattern. - * - * @param pattern The pattern string. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. - * @return TRUE if U_SUCCESS(errorCode). - * @stable ICU 57 - */ - UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) { - return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode); - } - - /** - * Changes this object according to the new pattern. - * The number of arguments checked against the given limits is the - * highest argument number plus one, not the number of occurrences of arguments. - * - * @param pattern The pattern string. - * @param min The pattern must have at least this many arguments. - * @param max The pattern must have at most this many arguments. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and - * too few or too many arguments. - * @return TRUE if U_SUCCESS(errorCode). - * @stable ICU 57 - */ - UBool applyPatternMinMaxArguments(const UnicodeString &pattern, - int32_t min, int32_t max, UErrorCode &errorCode); - - /** - * @return The max argument number + 1. - * @stable ICU 57 - */ - int32_t getArgumentLimit() const { - return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length()); - } - - /** - * Formats the given value, appending to the appendTo builder. - * The argument value must not be the same object as appendTo. - * getArgumentLimit() must be at most 1. - * - * @param value0 Value for argument {0}. - * @param appendTo Gets the formatted pattern and value appended. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return appendTo - * @stable ICU 57 - */ - UnicodeString &format( - const UnicodeString &value0, - UnicodeString &appendTo, UErrorCode &errorCode) const; - - /** - * Formats the given values, appending to the appendTo builder. - * An argument value must not be the same object as appendTo. - * getArgumentLimit() must be at most 2. - * - * @param value0 Value for argument {0}. - * @param value1 Value for argument {1}. - * @param appendTo Gets the formatted pattern and values appended. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return appendTo - * @stable ICU 57 - */ - UnicodeString &format( - const UnicodeString &value0, - const UnicodeString &value1, - UnicodeString &appendTo, UErrorCode &errorCode) const; - - /** - * Formats the given values, appending to the appendTo builder. - * An argument value must not be the same object as appendTo. - * getArgumentLimit() must be at most 3. - * - * @param value0 Value for argument {0}. - * @param value1 Value for argument {1}. - * @param value2 Value for argument {2}. - * @param appendTo Gets the formatted pattern and values appended. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return appendTo - * @stable ICU 57 - */ - UnicodeString &format( - const UnicodeString &value0, - const UnicodeString &value1, - const UnicodeString &value2, - UnicodeString &appendTo, UErrorCode &errorCode) const; - - /** - * Formats the given values, appending to the appendTo string. - * - * @param values The argument values. - * An argument value must not be the same object as appendTo. - * Can be NULL if valuesLength==getArgumentLimit()==0. - * @param valuesLength The length of the values array. - * Must be at least getArgumentLimit(). - * @param appendTo Gets the formatted pattern and values appended. - * @param offsets offsets[i] receives the offset of where - * values[i] replaced pattern argument {i}. - * Can be shorter or longer than values. Can be NULL if offsetsLength==0. - * If there is no {i} in the pattern, then offsets[i] is set to -1. - * @param offsetsLength The length of the offsets array. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return appendTo - * @stable ICU 57 - */ - UnicodeString &formatAndAppend( - const UnicodeString *const *values, int32_t valuesLength, - UnicodeString &appendTo, - int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; - - /** - * Formats the given values, replacing the contents of the result string. - * May optimize by actually appending to the result if it is the same object - * as the value corresponding to the initial argument in the pattern. - * - * @param values The argument values. - * An argument value may be the same object as result. - * Can be NULL if valuesLength==getArgumentLimit()==0. - * @param valuesLength The length of the values array. - * Must be at least getArgumentLimit(). - * @param result Gets its contents replaced by the formatted pattern and values. - * @param offsets offsets[i] receives the offset of where - * values[i] replaced pattern argument {i}. - * Can be shorter or longer than values. Can be NULL if offsetsLength==0. - * If there is no {i} in the pattern, then offsets[i] is set to -1. - * @param offsetsLength The length of the offsets array. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return result - * @stable ICU 57 - */ - UnicodeString &formatAndReplace( - const UnicodeString *const *values, int32_t valuesLength, - UnicodeString &result, - int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; - - /** - * Returns the pattern text with none of the arguments. - * Like formatting with all-empty string values. - * @stable ICU 57 - */ - UnicodeString getTextWithNoArguments() const { - return getTextWithNoArguments(compiledPattern.getBuffer(), compiledPattern.length()); - } - -private: - /** - * Binary representation of the compiled pattern. - * Index 0: One more than the highest argument number. - * Followed by zero or more arguments or literal-text segments. - * - * An argument is stored as its number, less than ARG_NUM_LIMIT. - * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, - * followed by that many chars. - */ - UnicodeString compiledPattern; - - static inline int32_t getArgumentLimit(const char16_t *compiledPattern, - int32_t compiledPatternLength) { - return compiledPatternLength == 0 ? 0 : compiledPattern[0]; - } - - static UnicodeString getTextWithNoArguments(const char16_t *compiledPattern, int32_t compiledPatternLength); - - static UnicodeString &format( - const char16_t *compiledPattern, int32_t compiledPatternLength, - const UnicodeString *const *values, - UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, - int32_t *offsets, int32_t offsetsLength, - UErrorCode &errorCode); - - // Give access to internals to SimpleModifier for number formatting - friend class number::impl::SimpleModifier; -}; - -U_NAMESPACE_END - -#endif // __SIMPLEFORMATTER_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/std_string.h b/deps/node/deps/icu-small/source/common/unicode/std_string.h deleted file mode 100644 index 729c5639..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/std_string.h +++ /dev/null @@ -1,37 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: std_string.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009feb19 -* created by: Markus W. Scherer -*/ - -#ifndef __STD_STRING_H__ -#define __STD_STRING_H__ - -/** - * \file - * \brief C++ API: Central ICU header for including the C++ standard <string> - * header and for related definitions. - */ - -#include "unicode/utypes.h" - -// Workaround for a libstdc++ bug before libstdc++4.6 (2011). -// https://bugs.llvm.org/show_bug.cgi?id=13364 -#if defined(__GLIBCXX__) -namespace std { class type_info; } -#endif -#include - -#endif // __STD_STRING_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/strenum.h b/deps/node/deps/icu-small/source/common/unicode/strenum.h deleted file mode 100644 index fa525d4f..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/strenum.h +++ /dev/null @@ -1,278 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -*/ - -#ifndef STRENUM_H -#define STRENUM_H - -#include "unicode/uobject.h" -#include "unicode/unistr.h" - -/** - * \file - * \brief C++ API: String Enumeration - */ - -U_NAMESPACE_BEGIN - -/** - * Base class for 'pure' C++ implementations of uenum api. Adds a - * method that returns the next UnicodeString since in C++ this can - * be a common storage format for strings. - * - *

The model is that the enumeration is over strings maintained by - * a 'service.' At any point, the service might change, invalidating - * the enumerator (though this is expected to be rare). The iterator - * returns an error if this has occurred. Lack of the error is no - * guarantee that the service didn't change immediately after the - * call, so the returned string still might not be 'valid' on - * subsequent use.

- * - *

Strings may take the form of const char*, const char16_t*, or const - * UnicodeString*. The type you get is determine by the variant of - * 'next' that you call. In general the StringEnumeration is - * optimized for one of these types, but all StringEnumerations can - * return all types. Returned strings are each terminated with a NUL. - * Depending on the service data, they might also include embedded NUL - * characters, so API is provided to optionally return the true - * length, counting the embedded NULs but not counting the terminating - * NUL.

- * - *

The pointers returned by next, unext, and snext become invalid - * upon any subsequent call to the enumeration's destructor, next, - * unext, snext, or reset.

- * - * ICU 2.8 adds some default implementations and helper functions - * for subclasses. - * - * @stable ICU 2.4 - */ -class U_COMMON_API StringEnumeration : public UObject { -public: - /** - * Destructor. - * @stable ICU 2.4 - */ - virtual ~StringEnumeration(); - - /** - * Clone this object, an instance of a subclass of StringEnumeration. - * Clones can be used concurrently in multiple threads. - * If a subclass does not implement clone(), or if an error occurs, - * then NULL is returned. - * The clone functions in all subclasses return a base class pointer - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see getDynamicClassID - * @stable ICU 2.8 - */ - virtual StringEnumeration *clone() const; - - /** - *

Return the number of elements that the iterator traverses. If - * the iterator is out of sync with its service, status is set to - * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.

- * - *

The return value will not change except possibly as a result of - * a subsequent call to reset, or if the iterator becomes out of sync.

- * - *

This is a convenience function. It can end up being very - * expensive as all the items might have to be pre-fetched - * (depending on the storage format of the data being - * traversed).

- * - * @param status the error code. - * @return number of elements in the iterator. - * - * @stable ICU 2.4 */ - virtual int32_t count(UErrorCode& status) const = 0; - - /** - *

Returns the next element as a NUL-terminated char*. If there - * are no more elements, returns NULL. If the resultLength pointer - * is not NULL, the length of the string (not counting the - * terminating NUL) is returned at that address. If an error - * status is returned, the value at resultLength is undefined.

- * - *

The returned pointer is owned by this iterator and must not be - * deleted by the caller. The pointer is valid until the next call - * to next, unext, snext, reset, or the enumerator's destructor.

- * - *

If the iterator is out of sync with its service, status is set - * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.

- * - *

If the native service string is a char16_t* string, it is - * converted to char* with the invariant converter. If the - * conversion fails (because a character cannot be converted) then - * status is set to U_INVARIANT_CONVERSION_ERROR and the return - * value is undefined (though not NULL).

- * - * Starting with ICU 2.8, the default implementation calls snext() - * and handles the conversion. - * Either next() or snext() must be implemented differently by a subclass. - * - * @param status the error code. - * @param resultLength a pointer to receive the length, can be NULL. - * @return a pointer to the string, or NULL. - * - * @stable ICU 2.4 - */ - virtual const char* next(int32_t *resultLength, UErrorCode& status); - - /** - *

Returns the next element as a NUL-terminated char16_t*. If there - * are no more elements, returns NULL. If the resultLength pointer - * is not NULL, the length of the string (not counting the - * terminating NUL) is returned at that address. If an error - * status is returned, the value at resultLength is undefined.

- * - *

The returned pointer is owned by this iterator and must not be - * deleted by the caller. The pointer is valid until the next call - * to next, unext, snext, reset, or the enumerator's destructor.

- * - *

If the iterator is out of sync with its service, status is set - * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.

- * - * Starting with ICU 2.8, the default implementation calls snext() - * and handles the conversion. - * - * @param status the error code. - * @param resultLength a ponter to receive the length, can be NULL. - * @return a pointer to the string, or NULL. - * - * @stable ICU 2.4 - */ - virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status); - - /** - *

Returns the next element a UnicodeString*. If there are no - * more elements, returns NULL.

- * - *

The returned pointer is owned by this iterator and must not be - * deleted by the caller. The pointer is valid until the next call - * to next, unext, snext, reset, or the enumerator's destructor.

- * - *

If the iterator is out of sync with its service, status is set - * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.

- * - * Starting with ICU 2.8, the default implementation calls next() - * and handles the conversion. - * Either next() or snext() must be implemented differently by a subclass. - * - * @param status the error code. - * @return a pointer to the string, or NULL. - * - * @stable ICU 2.4 - */ - virtual const UnicodeString* snext(UErrorCode& status); - - /** - *

Resets the iterator. This re-establishes sync with the - * service and rewinds the iterator to start at the first - * element.

- * - *

Previous pointers returned by next, unext, or snext become - * invalid, and the value returned by count might change.

- * - * @param status the error code. - * - * @stable ICU 2.4 - */ - virtual void reset(UErrorCode& status) = 0; - - /** - * Compares this enumeration to other to check if both are equal - * - * @param that The other string enumeration to compare this object to - * @return TRUE if the enumerations are equal. FALSE if not. - * @stable ICU 3.6 - */ - virtual UBool operator==(const StringEnumeration& that)const; - /** - * Compares this enumeration to other to check if both are not equal - * - * @param that The other string enumeration to compare this object to - * @return TRUE if the enumerations are equal. FALSE if not. - * @stable ICU 3.6 - */ - virtual UBool operator!=(const StringEnumeration& that)const; - -protected: - /** - * UnicodeString field for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - UnicodeString unistr; - /** - * char * default buffer for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - char charsBuffer[32]; - /** - * char * buffer for use with default implementations and subclasses. - * Allocated in constructor and in ensureCharsCapacity(). - * @stable ICU 2.8 - */ - char *chars; - /** - * Capacity of chars, for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - int32_t charsCapacity; - - /** - * Default constructor for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - StringEnumeration(); - - /** - * Ensures that chars is at least as large as the requested capacity. - * For use with default implementations and subclasses. - * - * @param capacity Requested capacity. - * @param status ICU in/out error code. - * @stable ICU 2.8 - */ - void ensureCharsCapacity(int32_t capacity, UErrorCode &status); - - /** - * Converts s to Unicode and sets unistr to the result. - * For use with default implementations and subclasses, - * especially for implementations of snext() in terms of next(). - * This is provided with a helper function instead of a default implementation - * of snext() to avoid potential infinite loops between next() and snext(). - * - * For example: - * \code - * const UnicodeString* snext(UErrorCode& status) { - * int32_t resultLength=0; - * const char *s=next(&resultLength, status); - * return setChars(s, resultLength, status); - * } - * \endcode - * - * @param s String to be converted to Unicode. - * @param length Length of the string. - * @param status ICU in/out error code. - * @return A pointer to unistr. - * @stable ICU 2.8 - */ - UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status); -}; - -U_NAMESPACE_END - -/* STRENUM_H */ -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/stringoptions.h b/deps/node/deps/icu-small/source/common/unicode/stringoptions.h deleted file mode 100644 index 7b9f7094..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/stringoptions.h +++ /dev/null @@ -1,190 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// stringoptions.h -// created: 2017jun08 Markus W. Scherer - -#ifndef __STRINGOPTIONS_H__ -#define __STRINGOPTIONS_H__ - -#include "unicode/utypes.h" - -/** - * \file - * \brief C API: Bit set option bit constants for various string and character processing functions. - */ - -/** - * Option value for case folding: Use default mappings defined in CaseFolding.txt. - * - * @stable ICU 2.0 - */ -#define U_FOLD_CASE_DEFAULT 0 - -/** - * Option value for case folding: - * - * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I - * and dotless i appropriately for Turkic languages (tr, az). - * - * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that - * are to be included for default mappings and - * excluded for the Turkic-specific mappings. - * - * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that - * are to be excluded for default mappings and - * included for the Turkic-specific mappings. - * - * @stable ICU 2.0 - */ -#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 - -/** - * Titlecase the string as a whole rather than each word. - * (Titlecase only the character at index 0, possibly adjusted.) - * Option bits value for titlecasing APIs that take an options bit set. - * - * It is an error to specify multiple titlecasing iterator options together, - * including both an options bit and an explicit BreakIterator. - * - * @see U_TITLECASE_ADJUST_TO_CASED - * @stable ICU 60 - */ -#define U_TITLECASE_WHOLE_STRING 0x20 - -/** - * Titlecase sentences rather than words. - * (Titlecase only the first character of each sentence, possibly adjusted.) - * Option bits value for titlecasing APIs that take an options bit set. - * - * It is an error to specify multiple titlecasing iterator options together, - * including both an options bit and an explicit BreakIterator. - * - * @see U_TITLECASE_ADJUST_TO_CASED - * @stable ICU 60 - */ -#define U_TITLECASE_SENTENCES 0x40 - -/** - * Do not lowercase non-initial parts of words when titlecasing. - * Option bit for titlecasing APIs that take an options bit set. - * - * By default, titlecasing will titlecase the character at each - * (possibly adjusted) BreakIterator index and - * lowercase all other characters up to the next iterator index. - * With this option, the other characters will not be modified. - * - * @see U_TITLECASE_ADJUST_TO_CASED - * @see UnicodeString::toTitle - * @see CaseMap::toTitle - * @see ucasemap_setOptions - * @see ucasemap_toTitle - * @see ucasemap_utf8ToTitle - * @stable ICU 3.8 - */ -#define U_TITLECASE_NO_LOWERCASE 0x100 - -/** - * Do not adjust the titlecasing BreakIterator indexes; - * titlecase exactly the characters at breaks from the iterator. - * Option bit for titlecasing APIs that take an options bit set. - * - * By default, titlecasing will take each break iterator index, - * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), - * and titlecase that one. - * - * Other characters are lowercased. - * - * It is an error to specify multiple titlecasing adjustment options together. - * - * @see U_TITLECASE_ADJUST_TO_CASED - * @see U_TITLECASE_NO_LOWERCASE - * @see UnicodeString::toTitle - * @see CaseMap::toTitle - * @see ucasemap_setOptions - * @see ucasemap_toTitle - * @see ucasemap_utf8ToTitle - * @stable ICU 3.8 - */ -#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 - -/** - * Adjust each titlecasing BreakIterator index to the next cased character. - * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) - * Option bit for titlecasing APIs that take an options bit set. - * - * This used to be the default index adjustment in ICU. - * Since ICU 60, the default index adjustment is to the next character that is - * a letter, number, symbol, or private use code point. - * (Uncased modifier letters are skipped.) - * The difference in behavior is small for word titlecasing, - * but the new adjustment is much better for whole-string and sentence titlecasing: - * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". - * - * It is an error to specify multiple titlecasing adjustment options together. - * - * @see U_TITLECASE_NO_BREAK_ADJUSTMENT - * @stable ICU 60 - */ -#define U_TITLECASE_ADJUST_TO_CASED 0x400 - -/** - * Option for string transformation functions to not first reset the Edits object. - * Used for example in some case-mapping and normalization functions. - * - * @see CaseMap - * @see Edits - * @see Normalizer2 - * @stable ICU 60 - */ -#define U_EDITS_NO_RESET 0x2000 - -/** - * Omit unchanged text when recording how source substrings - * relate to changed and unchanged result substrings. - * Used for example in some case-mapping and normalization functions. - * - * @see CaseMap - * @see Edits - * @see Normalizer2 - * @stable ICU 60 - */ -#define U_OMIT_UNCHANGED_TEXT 0x4000 - -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 - -/** - * Option bit for unorm_compare: - * Perform case-insensitive comparison. - * @stable ICU 2.2 - */ -#define U_COMPARE_IGNORE_CASE 0x10000 - -/** - * Option bit for unorm_compare: - * Both input strings are assumed to fulfill FCD conditions. - * @stable ICU 2.2 - */ -#define UNORM_INPUT_IS_FCD 0x20000 - -// Related definitions elsewhere. -// Options that are not meaningful in the same functions -// can share the same bits. -// -// Public: -// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 -// -// Internal: (may change or be removed) -// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff -// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 -// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 -// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 -// ustr_imp.h #define _STRNCMP_STYLE 0x1000 -// unormcmp.cpp #define _COMPARE_EQUIV 0x80000 - -#endif // __STRINGOPTIONS_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/stringpiece.h b/deps/node/deps/icu-small/source/common/unicode/stringpiece.h deleted file mode 100644 index 640fbac5..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/stringpiece.h +++ /dev/null @@ -1,224 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -// Copyright (C) 2009-2013, International Business Machines -// Corporation and others. All Rights Reserved. -// -// Copyright 2001 and onwards Google Inc. -// Author: Sanjay Ghemawat - -// This code is a contribution of Google code, and the style used here is -// a compromise between the original Google code and the ICU coding guidelines. -// For example, data types are ICU-ified (size_t,int->int32_t), -// and API comments doxygen-ified, but function names and behavior are -// as in the original, if possible. -// Assertion-style error handling, not available in ICU, was changed to -// parameter "pinning" similar to UnicodeString. -// -// In addition, this is only a partial port of the original Google code, -// limited to what was needed so far. The (nearly) complete original code -// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib -// (see ICU ticket 6765, r25517). - -#ifndef __STRINGPIECE_H__ -#define __STRINGPIECE_H__ - -/** - * \file - * \brief C++ API: StringPiece: Read-only byte string wrapper class. - */ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/std_string.h" - -// Arghh! I wish C++ literals were "string". - -U_NAMESPACE_BEGIN - -/** - * A string-like object that points to a sized piece of memory. - * - * We provide non-explicit singleton constructors so users can pass - * in a "const char*" or a "string" wherever a "StringPiece" is - * expected. - * - * Functions or methods may use StringPiece parameters to accept either a - * "const char*" or a "string" value that will be implicitly converted to a - * StringPiece. - * - * Systematic usage of StringPiece is encouraged as it will reduce unnecessary - * conversions from "const char*" to "string" and back again. - * - * @stable ICU 4.2 - */ -class U_COMMON_API StringPiece : public UMemory { - private: - const char* ptr_; - int32_t length_; - - public: - /** - * Default constructor, creates an empty StringPiece. - * @stable ICU 4.2 - */ - StringPiece() : ptr_(NULL), length_(0) { } - /** - * Constructs from a NUL-terminated const char * pointer. - * @param str a NUL-terminated const char * pointer - * @stable ICU 4.2 - */ - StringPiece(const char* str); - /** - * Constructs from a std::string. - * @stable ICU 4.2 - */ - StringPiece(const std::string& str) - : ptr_(str.data()), length_(static_cast(str.size())) { } - /** - * Constructs from a const char * pointer and a specified length. - * @param offset a const char * pointer (need not be terminated) - * @param len the length of the string; must be non-negative - * @stable ICU 4.2 - */ - StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } - /** - * Substring of another StringPiece. - * @param x the other StringPiece - * @param pos start position in x; must be non-negative and <= x.length(). - * @stable ICU 4.2 - */ - StringPiece(const StringPiece& x, int32_t pos); - /** - * Substring of another StringPiece. - * @param x the other StringPiece - * @param pos start position in x; must be non-negative and <= x.length(). - * @param len length of the substring; - * must be non-negative and will be pinned to at most x.length() - pos. - * @stable ICU 4.2 - */ - StringPiece(const StringPiece& x, int32_t pos, int32_t len); - - /** - * Returns the string pointer. May be NULL if it is empty. - * - * data() may return a pointer to a buffer with embedded NULs, and the - * returned buffer may or may not be null terminated. Therefore it is - * typically a mistake to pass data() to a routine that expects a NUL - * terminated string. - * @return the string pointer - * @stable ICU 4.2 - */ - const char* data() const { return ptr_; } - /** - * Returns the string length. Same as length(). - * @return the string length - * @stable ICU 4.2 - */ - int32_t size() const { return length_; } - /** - * Returns the string length. Same as size(). - * @return the string length - * @stable ICU 4.2 - */ - int32_t length() const { return length_; } - /** - * Returns whether the string is empty. - * @return TRUE if the string is empty - * @stable ICU 4.2 - */ - UBool empty() const { return length_ == 0; } - - /** - * Sets to an empty string. - * @stable ICU 4.2 - */ - void clear() { ptr_ = NULL; length_ = 0; } - - /** - * Reset the stringpiece to refer to new data. - * @param xdata pointer the new string data. Need not be nul terminated. - * @param len the length of the new data - * @stable ICU 4.8 - */ - void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } - - /** - * Reset the stringpiece to refer to new data. - * @param str a pointer to a NUL-terminated string. - * @stable ICU 4.8 - */ - void set(const char* str); - - /** - * Removes the first n string units. - * @param n prefix length, must be non-negative and <=length() - * @stable ICU 4.2 - */ - void remove_prefix(int32_t n) { - if (n >= 0) { - if (n > length_) { - n = length_; - } - ptr_ += n; - length_ -= n; - } - } - - /** - * Removes the last n string units. - * @param n suffix length, must be non-negative and <=length() - * @stable ICU 4.2 - */ - void remove_suffix(int32_t n) { - if (n >= 0) { - if (n <= length_) { - length_ -= n; - } else { - length_ = 0; - } - } - } - - /** - * Maximum integer, used as a default value for substring methods. - * @stable ICU 4.2 - */ - static const int32_t npos; // = 0x7fffffff; - - /** - * Returns a substring of this StringPiece. - * @param pos start position; must be non-negative and <= length(). - * @param len length of the substring; - * must be non-negative and will be pinned to at most length() - pos. - * @return the substring StringPiece - * @stable ICU 4.2 - */ - StringPiece substr(int32_t pos, int32_t len = npos) const { - return StringPiece(*this, pos, len); - } -}; - -/** - * Global operator == for StringPiece - * @param x The first StringPiece to compare. - * @param y The second StringPiece to compare. - * @return TRUE if the string data is equal - * @stable ICU 4.8 - */ -U_EXPORT UBool U_EXPORT2 -operator==(const StringPiece& x, const StringPiece& y); - -/** - * Global operator != for StringPiece - * @param x The first StringPiece to compare. - * @param y The second StringPiece to compare. - * @return TRUE if the string data is not equal - * @stable ICU 4.8 - */ -inline UBool operator!=(const StringPiece& x, const StringPiece& y) { - return !(x == y); -} - -U_NAMESPACE_END - -#endif // __STRINGPIECE_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/stringtriebuilder.h b/deps/node/deps/icu-small/source/common/unicode/stringtriebuilder.h deleted file mode 100644 index c27fbd67..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/stringtriebuilder.h +++ /dev/null @@ -1,421 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012,2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: stringtriebuilder.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010dec24 -* created by: Markus W. Scherer -*/ - -#ifndef __STRINGTRIEBUILDER_H__ -#define __STRINGTRIEBUILDER_H__ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" - -/** - * \file - * \brief C++ API: Builder API for trie builders - */ - -// Forward declaration. -/// \cond -struct UHashtable; -typedef struct UHashtable UHashtable; -/// \endcond - -/** - * Build options for BytesTrieBuilder and CharsTrieBuilder. - * @stable ICU 4.8 - */ -enum UStringTrieBuildOption { - /** - * Builds a trie quickly. - * @stable ICU 4.8 - */ - USTRINGTRIE_BUILD_FAST, - /** - * Builds a trie more slowly, attempting to generate - * a shorter but equivalent serialization. - * This build option also uses more memory. - * - * This option can be effective when many integer values are the same - * and string/byte sequence suffixes can be shared. - * Runtime speed is not expected to improve. - * @stable ICU 4.8 - */ - USTRINGTRIE_BUILD_SMALL -}; - -U_NAMESPACE_BEGIN - -/** - * Base class for string trie builder classes. - * - * This class is not intended for public subclassing. - * @stable ICU 4.8 - */ -class U_COMMON_API StringTrieBuilder : public UObject { -public: -#ifndef U_HIDE_INTERNAL_API - /** @internal */ - static int32_t hashNode(const void *node); - /** @internal */ - static UBool equalNodes(const void *left, const void *right); -#endif /* U_HIDE_INTERNAL_API */ - -protected: - // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API - // or else the compiler will create a public default constructor. - /** @internal */ - StringTrieBuilder(); - /** @internal */ - virtual ~StringTrieBuilder(); - -#ifndef U_HIDE_INTERNAL_API - /** @internal */ - void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode); - /** @internal */ - void deleteCompactBuilder(); - - /** @internal */ - void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode); - - /** @internal */ - int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex); - /** @internal */ - int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length); -#endif /* U_HIDE_INTERNAL_API */ - - class Node; - -#ifndef U_HIDE_INTERNAL_API - /** @internal */ - Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode); - /** @internal */ - Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, - int32_t length, UErrorCode &errorCode); -#endif /* U_HIDE_INTERNAL_API */ - - /** @internal */ - virtual int32_t getElementStringLength(int32_t i) const = 0; - /** @internal */ - virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0; - /** @internal */ - virtual int32_t getElementValue(int32_t i) const = 0; - - // Finds the first unit index after this one where - // the first and last element have different units again. - /** @internal */ - virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0; - - // Number of different units at unitIndex. - /** @internal */ - virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0; - /** @internal */ - virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; - /** @internal */ - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0; - - /** @internal */ - virtual UBool matchNodesCanHaveValues() const = 0; - - /** @internal */ - virtual int32_t getMaxBranchLinearSubNodeLength() const = 0; - /** @internal */ - virtual int32_t getMinLinearMatch() const = 0; - /** @internal */ - virtual int32_t getMaxLinearMatchLength() const = 0; - -#ifndef U_HIDE_INTERNAL_API - // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength). - /** @internal */ - static const int32_t kMaxBranchLinearSubNodeLength=5; - - // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units. - // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. - /** @internal */ - static const int32_t kMaxSplitBranchLevels=14; - - /** - * Makes sure that there is only one unique node registered that is - * equivalent to newNode. - * @param newNode Input node. The builder takes ownership. - * @param errorCode ICU in/out UErrorCode. - Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. - * @return newNode if it is the first of its kind, or - * an equivalent node if newNode is a duplicate. - * @internal - */ - Node *registerNode(Node *newNode, UErrorCode &errorCode); - /** - * Makes sure that there is only one unique FinalValueNode registered - * with this value. - * Avoids creating a node if the value is a duplicate. - * @param value A final value. - * @param errorCode ICU in/out UErrorCode. - Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. - * @return A FinalValueNode with the given value. - * @internal - */ - Node *registerFinalValue(int32_t value, UErrorCode &errorCode); -#endif /* U_HIDE_INTERNAL_API */ - - /* - * C++ note: - * registerNode() and registerFinalValue() take ownership of their input nodes, - * and only return owned nodes. - * If they see a failure UErrorCode, they will delete the input node. - * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR. - * If there is a failure, they return NULL. - * - * NULL Node pointers can be safely passed into other Nodes because - * they call the static Node::hashCode() which checks for a NULL pointer first. - * - * Therefore, as long as builder functions register a new node, - * they need to check for failures only before explicitly dereferencing - * a Node pointer, or before setting a new UErrorCode. - */ - - // Hash set of nodes, maps from nodes to integer 1. - /** @internal */ - UHashtable *nodes; - - // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, - // it is needed for layout of other objects. - /** - * @internal - * \cond - */ - class Node : public UObject { - public: - Node(int32_t initialHash) : hash(initialHash), offset(0) {} - inline int32_t hashCode() const { return hash; } - // Handles node==NULL. - static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); } - // Base class operator==() compares the actual class types. - virtual UBool operator==(const Node &other) const; - inline UBool operator!=(const Node &other) const { return !operator==(other); } - /** - * Traverses the Node graph and numbers branch edges, with rightmost edges first. - * This is to avoid writing a duplicate node twice. - * - * Branch nodes in this trie data structure are not symmetric. - * Most branch edges "jump" to other nodes but the rightmost branch edges - * just continue without a jump. - * Therefore, write() must write the rightmost branch edge last - * (trie units are written backwards), and must write it at that point even if - * it is a duplicate of a node previously written elsewhere. - * - * This function visits and marks right branch edges first. - * Edges are numbered with increasingly negative values because we share the - * offset field which gets positive values when nodes are written. - * A branch edge also remembers the first number for any of its edges. - * - * When a further-left branch edge has a number in the range of the rightmost - * edge's numbers, then it will be written as part of the required right edge - * and we can avoid writing it first. - * - * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative - * edge numbers. - * - * @param edgeNumber The first edge number for this node and its sub-nodes. - * @return An edge number that is at least the maximum-negative - * of the input edge number and the numbers of this node and all of its sub-nodes. - */ - virtual int32_t markRightEdgesFirst(int32_t edgeNumber); - // write() must set the offset to a positive value. - virtual void write(StringTrieBuilder &builder) = 0; - // See markRightEdgesFirst. - inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight, - StringTrieBuilder &builder) { - // Note: Edge numbers are negative, lastRight<=firstRight. - // If offset>0 then this node and its sub-nodes have been written already - // and we need not write them again. - // If this node is part of the unwritten right branch edge, - // then we wait until that is written. - if(offset<0 && (offsetA symbol table maintains two kinds of mappings. The first is - * between symbolic names and their values. For example, if the - * variable with the name "start" is set to the value "alpha" - * (perhaps, though not necessarily, through an expression such as - * "$start=alpha"), then the call lookup("start") will return the - * char[] array ['a', 'l', 'p', 'h', 'a']. - * - *

The second kind of mapping is between character values and - * UnicodeMatcher objects. This is used by RuleBasedTransliterator, - * which uses characters in the private use area to represent objects - * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z], - * then lookupMatcher(0xE015) will return the UnicodeSet [a-z]. - * - *

Finally, a symbol table defines parsing behavior for symbolic - * names. All symbolic names start with the SYMBOL_REF character. - * When a parser encounters this character, it calls parseReference() - * with the position immediately following the SYMBOL_REF. The symbol - * table parses the name, if there is one, and returns it. - * - * @stable ICU 2.8 - */ -class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ { -public: - - /** - * The character preceding a symbol reference name. - * @stable ICU 2.8 - */ - enum { SYMBOL_REF = 0x0024 /*$*/ }; - - /** - * Destructor. - * @stable ICU 2.8 - */ - virtual ~SymbolTable(); - - /** - * Lookup the characters associated with this string and return it. - * Return NULL if no such name exists. The resultant - * string may have length zero. - * @param s the symbolic name to lookup - * @return a string containing the name's value, or NULL if - * there is no mapping for s. - * @stable ICU 2.8 - */ - virtual const UnicodeString* lookup(const UnicodeString& s) const = 0; - - /** - * Lookup the UnicodeMatcher associated with the given character, and - * return it. Return NULL if not found. - * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive. - * @return the UnicodeMatcher object represented by the given - * character, or NULL if there is no mapping for ch. - * @stable ICU 2.8 - */ - virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0; - - /** - * Parse a symbol reference name from the given string, starting - * at the given position. If no valid symbol reference name is - * found, return the empty string and leave pos unchanged. That is, if the - * character at pos cannot start a name, or if pos is at or after - * text.length(), then return an empty string. This indicates an - * isolated SYMBOL_REF character. - * @param text the text to parse for the name - * @param pos on entry, the index of the first character to parse. - * This is the character following the SYMBOL_REF character. On - * exit, the index after the last parsed character. If the parse - * failed, pos is unchanged on exit. - * @param limit the index after the last character to be parsed. - * @return the parsed name, or an empty string if there is no - * valid symbolic name at the given position. - * @stable ICU 2.8 - */ - virtual UnicodeString parseReference(const UnicodeString& text, - ParsePosition& pos, int32_t limit) const = 0; -}; -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ubidi.h b/deps/node/deps/icu-small/source/common/unicode/ubidi.h deleted file mode 100644 index f4875c88..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ubidi.h +++ /dev/null @@ -1,2207 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ubidi.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999jul27 -* created by: Markus W. Scherer, updated by Matitiahu Allouche -*/ - -#ifndef UBIDI_H -#define UBIDI_H - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/localpointer.h" - -/** - *\file - * \brief C API: Bidi algorithm - * - *

Bidi algorithm for ICU

- * - * This is an implementation of the Unicode Bidirectional Algorithm. - * The algorithm is defined in the - * Unicode Standard Annex #9.

- * - * Note: Libraries that perform a bidirectional algorithm and - * reorder strings accordingly are sometimes called "Storage Layout Engines". - * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such - * "Storage Layout Engines". - * - *

General remarks about the API:

- * - * In functions with an error code parameter, - * the pErrorCode pointer must be valid - * and the value that it points to must not indicate a failure before - * the function call. Otherwise, the function returns immediately. - * After the function call, the value indicates success or failure.

- * - * The "limit" of a sequence of characters is the position just after their - * last character, i.e., one more than that position.

- * - * Some of the API functions provide access to "runs". - * Such a "run" is defined as a sequence of characters - * that are at the same embedding level - * after performing the Bidi algorithm.

- * - * @author Markus W. Scherer - * @version 1.0 - * - * - *

Sample code for the ICU Bidi API

- * - *
Rendering a paragraph with the ICU Bidi API
- * - * This is (hypothetical) sample code that illustrates - * how the ICU Bidi API could be used to render a paragraph of text. - * Rendering code depends highly on the graphics system, - * therefore this sample code must make a lot of assumptions, - * which may or may not match any existing graphics system's properties. - * - *

The basic assumptions are:

- *
    - *
  • Rendering is done from left to right on a horizontal line.
  • - *
  • A run of single-style, unidirectional text can be rendered at once.
  • - *
  • Such a run of text is passed to the graphics system with - * characters (code units) in logical order.
  • - *
  • The line-breaking algorithm is very complicated - * and Locale-dependent - - * and therefore its implementation omitted from this sample code.
  • - *
- * - *
- * \code
- *#include "unicode/ubidi.h"
- *
- *typedef enum {
- *     styleNormal=0, styleSelected=1,
- *     styleBold=2, styleItalics=4,
- *     styleSuper=8, styleSub=16
- *} Style;
- *
- *typedef struct { int32_t limit; Style style; } StyleRun;
- *
- *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
- *                  const StyleRun *styleRuns, int styleRunCount);
- *
- * // set *pLimit and *pStyleRunLimit for a line
- * // from text[start] and from styleRuns[styleRunStart]
- * // using ubidi_getLogicalRun(para, ...)
- *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
- *                  UBiDi *para,
- *                  const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
- *                  int *pLineWidth);
- *
- * // render runs on a line sequentially, always from left to right
- *
- * // prepare rendering a new line
- * void startLine(UBiDiDirection textDirection, int lineWidth);
- *
- * // render a run of text and advance to the right by the run width
- * // the text[start..limit-1] is always in logical order
- * void renderRun(const UChar *text, int32_t start, int32_t limit,
- *               UBiDiDirection textDirection, Style style);
- *
- * // We could compute a cross-product
- * // from the style runs with the directional runs
- * // and then reorder it.
- * // Instead, here we iterate over each run type
- * // and render the intersections -
- * // with shortcuts in simple (and common) cases.
- * // renderParagraph() is the main function.
- *
- * // render a directional run with
- * // (possibly) multiple style runs intersecting with it
- * void renderDirectionalRun(const UChar *text,
- *                           int32_t start, int32_t limit,
- *                           UBiDiDirection direction,
- *                           const StyleRun *styleRuns, int styleRunCount) {
- *     int i;
- *
- *     // iterate over style runs
- *     if(direction==UBIDI_LTR) {
- *         int styleLimit;
- *
- *         for(i=0; ilimit) { styleLimit=limit; }
- *                 renderRun(text, start, styleLimit,
- *                           direction, styleRun[i].style);
- *                 if(styleLimit==limit) { break; }
- *                 start=styleLimit;
- *             }
- *         }
- *     } else {
- *         int styleStart;
- *
- *         for(i=styleRunCount-1; i>=0; --i) {
- *             if(i>0) {
- *                 styleStart=styleRun[i-1].limit;
- *             } else {
- *                 styleStart=0;
- *             }
- *             if(limit>=styleStart) {
- *                 if(styleStart=length
- *
- *         width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
- *         if(width<=lineWidth) {
- *             // everything fits onto one line
- *
- *            // prepare rendering a new line from either left or right
- *             startLine(paraLevel, width);
- *
- *             renderLine(para, text, 0, length,
- *                        styleRuns, styleRunCount);
- *         } else {
- *             UBiDi *line;
- *
- *             // we need to render several lines
- *             line=ubidi_openSized(length, 0, pErrorCode);
- *             if(line!=NULL) {
- *                 int32_t start=0, limit;
- *                 int styleRunStart=0, styleRunLimit;
- *
- *                 for(;;) {
- *                     limit=length;
- *                     styleRunLimit=styleRunCount;
- *                     getLineBreak(text, start, &limit, para,
- *                                  styleRuns, styleRunStart, &styleRunLimit,
- *                                 &width);
- *                     ubidi_setLine(para, start, limit, line, pErrorCode);
- *                     if(U_SUCCESS(*pErrorCode)) {
- *                         // prepare rendering a new line
- *                         // from either left or right
- *                         startLine(paraLevel, width);
- *
- *                         renderLine(line, text, start, limit,
- *                                    styleRuns+styleRunStart,
- *                                    styleRunLimit-styleRunStart);
- *                     }
- *                     if(limit==length) { break; }
- *                     start=limit;
- *                     styleRunStart=styleRunLimit-1;
- *                     if(start>=styleRuns[styleRunStart].limit) {
- *                         ++styleRunStart;
- *                     }
- *                 }
- *
- *                 ubidi_close(line);
- *             }
- *        }
- *    }
- *
- *     ubidi_close(para);
- *}
- *\endcode
- * 
- */ - -/*DOCXX_TAG*/ -/*@{*/ - -/** - * UBiDiLevel is the type of the level values in this - * Bidi implementation. - * It holds an embedding level and indicates the visual direction - * by its bit 0 (even/odd value).

- * - * It can also hold non-level values for the - * paraLevel and embeddingLevels - * arguments of ubidi_setPara(); there: - *

    - *
  • bit 7 of an embeddingLevels[] - * value indicates whether the using application is - * specifying the level of a character to override whatever the - * Bidi implementation would resolve it to.
  • - *
  • paraLevel can be set to the - * pseudo-level values UBIDI_DEFAULT_LTR - * and UBIDI_DEFAULT_RTL.
  • - *
- * - * @see ubidi_setPara - * - *

The related constants are not real, valid level values. - * UBIDI_DEFAULT_XXX can be used to specify - * a default for the paragraph level for - * when the ubidi_setPara() function - * shall determine it but there is no - * strongly typed character in the input.

- * - * Note that the value for UBIDI_DEFAULT_LTR is even - * and the one for UBIDI_DEFAULT_RTL is odd, - * just like with normal LTR and RTL level values - - * these special values are designed that way. Also, the implementation - * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. - * - * Note: The numeric values of the related constants will not change: - * They are tied to the use of 7-bit byte values (plus the override bit) - * and of the UBiDiLevel=uint8_t data type in this API. - * - * @see UBIDI_DEFAULT_LTR - * @see UBIDI_DEFAULT_RTL - * @see UBIDI_LEVEL_OVERRIDE - * @see UBIDI_MAX_EXPLICIT_LEVEL - * @stable ICU 2.0 - */ -typedef uint8_t UBiDiLevel; - -/** Paragraph level setting.

- * - * Constant indicating that the base direction depends on the first strong - * directional character in the text according to the Unicode Bidirectional - * Algorithm. If no strong directional character is present, - * then set the paragraph level to 0 (left-to-right).

- * - * If this value is used in conjunction with reordering modes - * UBIDI_REORDER_INVERSE_LIKE_DIRECT or - * UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder - * is assumed to be visual LTR, and the text after reordering is required - * to be the corresponding logical string with appropriate contextual - * direction. The direction of the result string will be RTL if either - * the righmost or leftmost strong character of the source text is RTL - * or Arabic Letter, the direction will be LTR otherwise.

- * - * If reordering option UBIDI_OPTION_INSERT_MARKS is set, an RLM may - * be added at the beginning of the result string to ensure round trip - * (that the result string, when reordered back to visual, will produce - * the original source text). - * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT - * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL - * @stable ICU 2.0 - */ -#define UBIDI_DEFAULT_LTR 0xfe - -/** Paragraph level setting.

- * - * Constant indicating that the base direction depends on the first strong - * directional character in the text according to the Unicode Bidirectional - * Algorithm. If no strong directional character is present, - * then set the paragraph level to 1 (right-to-left).

- * - * If this value is used in conjunction with reordering modes - * UBIDI_REORDER_INVERSE_LIKE_DIRECT or - * UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder - * is assumed to be visual LTR, and the text after reordering is required - * to be the corresponding logical string with appropriate contextual - * direction. The direction of the result string will be RTL if either - * the righmost or leftmost strong character of the source text is RTL - * or Arabic Letter, or if the text contains no strong character; - * the direction will be LTR otherwise.

- * - * If reordering option UBIDI_OPTION_INSERT_MARKS is set, an RLM may - * be added at the beginning of the result string to ensure round trip - * (that the result string, when reordered back to visual, will produce - * the original source text). - * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT - * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL - * @stable ICU 2.0 - */ -#define UBIDI_DEFAULT_RTL 0xff - -/** - * Maximum explicit embedding level. - * Same as the max_depth value in the - * Unicode Bidirectional Algorithm. - * (The maximum resolved level can be up to UBIDI_MAX_EXPLICIT_LEVEL+1). - * @stable ICU 2.0 - */ -#define UBIDI_MAX_EXPLICIT_LEVEL 125 - -/** Bit flag for level input. - * Overrides directional properties. - * @stable ICU 2.0 - */ -#define UBIDI_LEVEL_OVERRIDE 0x80 - -/** - * Special value which can be returned by the mapping functions when a logical - * index has no corresponding visual index or vice-versa. This may happen - * for the logical-to-visual mapping of a Bidi control when option - * #UBIDI_OPTION_REMOVE_CONTROLS is specified. This can also happen - * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted - * by option #UBIDI_OPTION_INSERT_MARKS. - * @see ubidi_getVisualIndex - * @see ubidi_getVisualMap - * @see ubidi_getLogicalIndex - * @see ubidi_getLogicalMap - * @stable ICU 3.6 - */ -#define UBIDI_MAP_NOWHERE (-1) - -/** - * UBiDiDirection values indicate the text direction. - * @stable ICU 2.0 - */ -enum UBiDiDirection { - /** Left-to-right text. This is a 0 value. - *

    - *
  • As return value for ubidi_getDirection(), it means - * that the source string contains no right-to-left characters, or - * that the source string is empty and the paragraph level is even. - *
  • As return value for ubidi_getBaseDirection(), it - * means that the first strong character of the source string has - * a left-to-right direction. - *
- * @stable ICU 2.0 - */ - UBIDI_LTR, - /** Right-to-left text. This is a 1 value. - *
    - *
  • As return value for ubidi_getDirection(), it means - * that the source string contains no left-to-right characters, or - * that the source string is empty and the paragraph level is odd. - *
  • As return value for ubidi_getBaseDirection(), it - * means that the first strong character of the source string has - * a right-to-left direction. - *
- * @stable ICU 2.0 - */ - UBIDI_RTL, - /** Mixed-directional text. - *

As return value for ubidi_getDirection(), it means - * that the source string contains both left-to-right and - * right-to-left characters. - * @stable ICU 2.0 - */ - UBIDI_MIXED, - /** No strongly directional text. - *

As return value for ubidi_getBaseDirection(), it means - * that the source string is missing or empty, or contains neither left-to-right - * nor right-to-left characters. - * @stable ICU 4.6 - */ - UBIDI_NEUTRAL -}; - -/** @stable ICU 2.0 */ -typedef enum UBiDiDirection UBiDiDirection; - -/** - * Forward declaration of the UBiDi structure for the declaration of - * the API functions. Its fields are implementation-specific.

- * This structure holds information about a paragraph (or multiple paragraphs) - * of text with Bidi-algorithm-related details, or about one line of - * such a paragraph.

- * Reordering can be done on a line, or on one or more paragraphs which are - * then interpreted each as one single line. - * @stable ICU 2.0 - */ -struct UBiDi; - -/** @stable ICU 2.0 */ -typedef struct UBiDi UBiDi; - -/** - * Allocate a UBiDi structure. - * Such an object is initially empty. It is assigned - * the Bidi properties of a piece of text containing one or more paragraphs - * by ubidi_setPara() - * or the Bidi properties of a line within a paragraph by - * ubidi_setLine().

- * This object can be reused for as long as it is not deallocated - * by calling ubidi_close().

- * ubidi_setPara() and ubidi_setLine() will allocate - * additional memory for internal structures as necessary. - * - * @return An empty UBiDi object. - * @stable ICU 2.0 - */ -U_STABLE UBiDi * U_EXPORT2 -ubidi_open(void); - -/** - * Allocate a UBiDi structure with preallocated memory - * for internal structures. - * This function provides a UBiDi object like ubidi_open() - * with no arguments, but it also preallocates memory for internal structures - * according to the sizings supplied by the caller.

- * Subsequent functions will not allocate any more memory, and are thus - * guaranteed not to fail because of lack of memory.

- * The preallocation can be limited to some of the internal memory - * by setting some values to 0 here. That means that if, e.g., - * maxRunCount cannot be reasonably predetermined and should not - * be set to maxLength (the only failproof value) to avoid - * wasting memory, then maxRunCount could be set to 0 here - * and the internal structures that are associated with it will be allocated - * on demand, just like with ubidi_open(). - * - * @param maxLength is the maximum text or line length that internal memory - * will be preallocated for. An attempt to associate this object with a - * longer text will fail, unless this value is 0, which leaves the allocation - * up to the implementation. - * - * @param maxRunCount is the maximum anticipated number of same-level runs - * that internal memory will be preallocated for. An attempt to access - * visual runs on an object that was not preallocated for as many runs - * as the text was actually resolved to will fail, - * unless this value is 0, which leaves the allocation up to the implementation.

- * The number of runs depends on the actual text and maybe anywhere between - * 1 and maxLength. It is typically small. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return An empty UBiDi object with preallocated memory. - * @stable ICU 2.0 - */ -U_STABLE UBiDi * U_EXPORT2 -ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode); - -/** - * ubidi_close() must be called to free the memory - * associated with a UBiDi object.

- * - * Important: - * A parent UBiDi object must not be destroyed or reused if - * it still has children. - * If a UBiDi object has become the child - * of another one (its parent) by calling - * ubidi_setLine(), then the child object must - * be destroyed (closed) or reused (by calling - * ubidi_setPara() or ubidi_setLine()) - * before the parent object. - * - * @param pBiDi is a UBiDi object. - * - * @see ubidi_setPara - * @see ubidi_setLine - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_close(UBiDi *pBiDi); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUBiDiPointer - * "Smart pointer" class, closes a UBiDi via ubidi_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close); - -U_NAMESPACE_END - -#endif - -/** - * Modify the operation of the Bidi algorithm such that it - * approximates an "inverse Bidi" algorithm. This function - * must be called before ubidi_setPara(). - * - *

The normal operation of the Bidi algorithm as described - * in the Unicode Technical Report is to take text stored in logical - * (keyboard, typing) order and to determine the reordering of it for visual - * rendering. - * Some legacy systems store text in visual order, and for operations - * with standard, Unicode-based algorithms, the text needs to be transformed - * to logical order. This is effectively the inverse algorithm of the - * described Bidi algorithm. Note that there is no standard algorithm for - * this "inverse Bidi" and that the current implementation provides only an - * approximation of "inverse Bidi".

- * - *

With isInverse set to TRUE, - * this function changes the behavior of some of the subsequent functions - * in a way that they can be used for the inverse Bidi algorithm. - * Specifically, runs of text with numeric characters will be treated in a - * special way and may need to be surrounded with LRM characters when they are - * written in reordered sequence.

- * - *

Output runs should be retrieved using ubidi_getVisualRun(). - * Since the actual input for "inverse Bidi" is visually ordered text and - * ubidi_getVisualRun() gets the reordered runs, these are actually - * the runs of the logically ordered output.

- * - *

Calling this function with argument isInverse set to - * TRUE is equivalent to calling - * ubidi_setReorderingMode with argument - * reorderingMode - * set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L.
- * Calling this function with argument isInverse set to - * FALSE is equivalent to calling - * ubidi_setReorderingMode with argument - * reorderingMode - * set to #UBIDI_REORDER_DEFAULT. - * - * @param pBiDi is a UBiDi object. - * - * @param isInverse specifies "forward" or "inverse" Bidi operation. - * - * @see ubidi_setPara - * @see ubidi_writeReordered - * @see ubidi_setReorderingMode - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_setInverse(UBiDi *pBiDi, UBool isInverse); - -/** - * Is this Bidi object set to perform the inverse Bidi algorithm? - *

Note: calling this function after setting the reordering mode with - * ubidi_setReorderingMode will return TRUE if the - * reordering mode was set to #UBIDI_REORDER_INVERSE_NUMBERS_AS_L, - * FALSE for all other values.

- * - * @param pBiDi is a UBiDi object. - * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm - * by handling numbers as L. - * - * @see ubidi_setInverse - * @see ubidi_setReorderingMode - * @stable ICU 2.0 - */ - -U_STABLE UBool U_EXPORT2 -ubidi_isInverse(UBiDi *pBiDi); - -/** - * Specify whether block separators must be allocated level zero, - * so that successive paragraphs will progress from left to right. - * This function must be called before ubidi_setPara(). - * Paragraph separators (B) may appear in the text. Setting them to level zero - * means that all paragraph separators (including one possibly appearing - * in the last text position) are kept in the reordered text after the text - * that they follow in the source text. - * When this feature is not enabled, a paragraph separator at the last - * position of the text before reordering will go to the first position - * of the reordered text when the paragraph level is odd. - * - * @param pBiDi is a UBiDi object. - * - * @param orderParagraphsLTR specifies whether paragraph separators (B) must - * receive level 0, so that successive paragraphs progress from left to right. - * - * @see ubidi_setPara - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR); - -/** - * Is this Bidi object set to allocate level 0 to block separators so that - * successive paragraphs progress from left to right? - * - * @param pBiDi is a UBiDi object. - * @return TRUE if the Bidi object is set to allocate level 0 to block - * separators. - * - * @see ubidi_orderParagraphsLTR - * @stable ICU 3.4 - */ -U_STABLE UBool U_EXPORT2 -ubidi_isOrderParagraphsLTR(UBiDi *pBiDi); - -/** - * UBiDiReorderingMode values indicate which variant of the Bidi - * algorithm to use. - * - * @see ubidi_setReorderingMode - * @stable ICU 3.6 - */ -typedef enum UBiDiReorderingMode { - /** Regular Logical to Visual Bidi algorithm according to Unicode. - * This is a 0 value. - * @stable ICU 3.6 */ - UBIDI_REORDER_DEFAULT = 0, - /** Logical to Visual algorithm which handles numbers in a way which - * mimics the behavior of Windows XP. - * @stable ICU 3.6 */ - UBIDI_REORDER_NUMBERS_SPECIAL, - /** Logical to Visual algorithm grouping numbers with adjacent R characters - * (reversible algorithm). - * @stable ICU 3.6 */ - UBIDI_REORDER_GROUP_NUMBERS_WITH_R, - /** Reorder runs only to transform a Logical LTR string to the Logical RTL - * string with the same display, or vice-versa.
- * If this mode is set together with option - * #UBIDI_OPTION_INSERT_MARKS, some Bidi controls in the source - * text may be removed and other controls may be added to produce the - * minimum combination which has the required display. - * @stable ICU 3.6 */ - UBIDI_REORDER_RUNS_ONLY, - /** Visual to Logical algorithm which handles numbers like L - * (same algorithm as selected by ubidi_setInverse(TRUE). - * @see ubidi_setInverse - * @stable ICU 3.6 */ - UBIDI_REORDER_INVERSE_NUMBERS_AS_L, - /** Visual to Logical algorithm equivalent to the regular Logical to Visual - * algorithm. - * @stable ICU 3.6 */ - UBIDI_REORDER_INVERSE_LIKE_DIRECT, - /** Inverse Bidi (Visual to Logical) algorithm for the - * UBIDI_REORDER_NUMBERS_SPECIAL Bidi algorithm. - * @stable ICU 3.6 */ - UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, -#ifndef U_HIDE_DEPRECATED_API - /** - * Number of values for reordering mode. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UBIDI_REORDER_COUNT -#endif // U_HIDE_DEPRECATED_API -} UBiDiReorderingMode; - -/** - * Modify the operation of the Bidi algorithm such that it implements some - * variant to the basic Bidi algorithm or approximates an "inverse Bidi" - * algorithm, depending on different values of the "reordering mode". - * This function must be called before ubidi_setPara(), and stays - * in effect until called again with a different argument. - * - *

The normal operation of the Bidi algorithm as described - * in the Unicode Standard Annex #9 is to take text stored in logical - * (keyboard, typing) order and to determine how to reorder it for visual - * rendering.

- * - *

With the reordering mode set to a value other than - * #UBIDI_REORDER_DEFAULT, this function changes the behavior of - * some of the subsequent functions in a way such that they implement an - * inverse Bidi algorithm or some other algorithm variants.

- * - *

Some legacy systems store text in visual order, and for operations - * with standard, Unicode-based algorithms, the text needs to be transformed - * into logical order. This is effectively the inverse algorithm of the - * described Bidi algorithm. Note that there is no standard algorithm for - * this "inverse Bidi", so a number of variants are implemented here.

- * - *

In other cases, it may be desirable to emulate some variant of the - * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a - * Logical to Logical transformation.

- * - *
    - *
  • When the reordering mode is set to #UBIDI_REORDER_DEFAULT, - * the standard Bidi Logical to Visual algorithm is applied.
  • - * - *
  • When the reordering mode is set to - * #UBIDI_REORDER_NUMBERS_SPECIAL, - * the algorithm used to perform Bidi transformations when calling - * ubidi_setPara should approximate the algorithm used in - * Microsoft Windows XP rather than strictly conform to the Unicode Bidi - * algorithm. - *
    - * The differences between the basic algorithm and the algorithm addressed - * by this option are as follows: - *
      - *
    • Within text at an even embedding level, the sequence "123AB" - * (where AB represent R or AL letters) is transformed to "123BA" by the - * Unicode algorithm and to "BA123" by the Windows algorithm.
    • - *
    • Arabic-Indic numbers (AN) are handled by the Windows algorithm just - * like regular numbers (EN).
    • - *
  • - * - *
  • When the reordering mode is set to - * #UBIDI_REORDER_GROUP_NUMBERS_WITH_R, - * numbers located between LTR text and RTL text are associated with the RTL - * text. For instance, an LTR paragraph with content "abc 123 DEF" (where - * upper case letters represent RTL characters) will be transformed to - * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed - * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc". - * This makes the algorithm reversible and makes it useful when round trip - * (from visual to logical and back to visual) must be achieved without - * adding LRM characters. However, this is a variation from the standard - * Unicode Bidi algorithm.
    - * The source text should not contain Bidi control characters other than LRM - * or RLM.
  • - * - *
  • When the reordering mode is set to - * #UBIDI_REORDER_RUNS_ONLY, - * a "Logical to Logical" transformation must be performed: - *
      - *
    • If the default text level of the source text (argument paraLevel - * in ubidi_setPara) is even, the source text will be handled as - * LTR logical text and will be transformed to the RTL logical text which has - * the same LTR visual display.
    • - *
    • If the default level of the source text is odd, the source text - * will be handled as RTL logical text and will be transformed to the - * LTR logical text which has the same LTR visual display.
    • - *
    - * This mode may be needed when logical text which is basically Arabic or - * Hebrew, with possible included numbers or phrases in English, has to be - * displayed as if it had an even embedding level (this can happen if the - * displaying application treats all text as if it was basically LTR). - *
    - * This mode may also be needed in the reverse case, when logical text which is - * basically English, with possible included phrases in Arabic or Hebrew, has to - * be displayed as if it had an odd embedding level. - *
    - * Both cases could be handled by adding LRE or RLE at the head of the text, - * if the display subsystem supports these formatting controls. If it does not, - * the problem may be handled by transforming the source text in this mode - * before displaying it, so that it will be displayed properly.
    - * The source text should not contain Bidi control characters other than LRM - * or RLM.
  • - * - *
  • When the reordering mode is set to - * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L, an "inverse Bidi" algorithm - * is applied. - * Runs of text with numeric characters will be treated like LTR letters and - * may need to be surrounded with LRM characters when they are written in - * reordered sequence (the option #UBIDI_INSERT_LRM_FOR_NUMERIC can - * be used with function ubidi_writeReordered to this end. This - * mode is equivalent to calling ubidi_setInverse() with - * argument isInverse set to TRUE.
  • - * - *
  • When the reordering mode is set to - * #UBIDI_REORDER_INVERSE_LIKE_DIRECT, the "direct" Logical to Visual - * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm. - * This mode is similar to mode #UBIDI_REORDER_INVERSE_NUMBERS_AS_L - * but is closer to the regular Bidi algorithm. - *
    - * For example, an LTR paragraph with the content "FED 123 456 CBA" (where - * upper case represents RTL characters) will be transformed to - * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC" - * with mode UBIDI_REORDER_INVERSE_NUMBERS_AS_L.
    - * When used in conjunction with option - * #UBIDI_OPTION_INSERT_MARKS, this mode generally - * adds Bidi marks to the output significantly more sparingly than mode - * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L with option - * #UBIDI_INSERT_LRM_FOR_NUMERIC in calls to - * ubidi_writeReordered.
  • - * - *
  • When the reordering mode is set to - * #UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the Logical to Visual - * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm. - *
    - * For example, an LTR paragraph with the content "abc FED123" (where - * upper case represents RTL characters) will be transformed to "abc 123DEF."
  • - *
- * - *

In all the reordering modes specifying an "inverse Bidi" algorithm - * (i.e. those with a name starting with UBIDI_REORDER_INVERSE), - * output runs should be retrieved using - * ubidi_getVisualRun(), and the output text with - * ubidi_writeReordered(). The caller should keep in mind that in - * "inverse Bidi" modes the input is actually visually ordered text and - * reordered output returned by ubidi_getVisualRun() or - * ubidi_writeReordered() are actually runs or character string - * of logically ordered output.
- * For all the "inverse Bidi" modes, the source text should not contain - * Bidi control characters other than LRM or RLM.

- * - *

Note that option #UBIDI_OUTPUT_REVERSE of - * ubidi_writeReordered has no useful meaning and should not be - * used in conjunction with any value of the reordering mode specifying - * "inverse Bidi" or with value UBIDI_REORDER_RUNS_ONLY. - * - * @param pBiDi is a UBiDi object. - * @param reorderingMode specifies the required variant of the Bidi algorithm. - * - * @see UBiDiReorderingMode - * @see ubidi_setInverse - * @see ubidi_setPara - * @see ubidi_writeReordered - * @stable ICU 3.6 - */ -U_STABLE void U_EXPORT2 -ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode); - -/** - * What is the requested reordering mode for a given Bidi object? - * - * @param pBiDi is a UBiDi object. - * @return the current reordering mode of the Bidi object - * @see ubidi_setReorderingMode - * @stable ICU 3.6 - */ -U_STABLE UBiDiReorderingMode U_EXPORT2 -ubidi_getReorderingMode(UBiDi *pBiDi); - -/** - * UBiDiReorderingOption values indicate which options are - * specified to affect the Bidi algorithm. - * - * @see ubidi_setReorderingOptions - * @stable ICU 3.6 - */ -typedef enum UBiDiReorderingOption { - /** - * option value for ubidi_setReorderingOptions: - * disable all the options which can be set with this function - * @see ubidi_setReorderingOptions - * @stable ICU 3.6 - */ - UBIDI_OPTION_DEFAULT = 0, - - /** - * option bit for ubidi_setReorderingOptions: - * insert Bidi marks (LRM or RLM) when needed to ensure correct result of - * a reordering to a Logical order - * - *

This option must be set or reset before calling - * ubidi_setPara.

- * - *

This option is significant only with reordering modes which generate - * a result with Logical order, specifically:

- *
    - *
  • #UBIDI_REORDER_RUNS_ONLY
  • - *
  • #UBIDI_REORDER_INVERSE_NUMBERS_AS_L
  • - *
  • #UBIDI_REORDER_INVERSE_LIKE_DIRECT
  • - *
  • #UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
  • - *
- * - *

If this option is set in conjunction with reordering mode - * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L or with calling - * ubidi_setInverse(TRUE), it implies - * option #UBIDI_INSERT_LRM_FOR_NUMERIC - * in calls to function ubidi_writeReordered().

- * - *

For other reordering modes, a minimum number of LRM or RLM characters - * will be added to the source text after reordering it so as to ensure - * round trip, i.e. when applying the inverse reordering mode on the - * resulting logical text with removal of Bidi marks - * (option #UBIDI_OPTION_REMOVE_CONTROLS set before calling - * ubidi_setPara() or option #UBIDI_REMOVE_BIDI_CONTROLS - * in ubidi_writeReordered), the result will be identical to the - * source text in the first transformation. - * - *

This option will be ignored if specified together with option - * #UBIDI_OPTION_REMOVE_CONTROLS. It inhibits option - * UBIDI_REMOVE_BIDI_CONTROLS in calls to function - * ubidi_writeReordered() and it implies option - * #UBIDI_INSERT_LRM_FOR_NUMERIC in calls to function - * ubidi_writeReordered() if the reordering mode is - * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L.

- * - * @see ubidi_setReorderingMode - * @see ubidi_setReorderingOptions - * @stable ICU 3.6 - */ - UBIDI_OPTION_INSERT_MARKS = 1, - - /** - * option bit for ubidi_setReorderingOptions: - * remove Bidi control characters - * - *

This option must be set or reset before calling - * ubidi_setPara.

- * - *

This option nullifies option #UBIDI_OPTION_INSERT_MARKS. - * It inhibits option #UBIDI_INSERT_LRM_FOR_NUMERIC in calls - * to function ubidi_writeReordered() and it implies option - * #UBIDI_REMOVE_BIDI_CONTROLS in calls to that function.

- * - * @see ubidi_setReorderingMode - * @see ubidi_setReorderingOptions - * @stable ICU 3.6 - */ - UBIDI_OPTION_REMOVE_CONTROLS = 2, - - /** - * option bit for ubidi_setReorderingOptions: - * process the output as part of a stream to be continued - * - *

This option must be set or reset before calling - * ubidi_setPara.

- * - *

This option specifies that the caller is interested in processing large - * text object in parts. - * The results of the successive calls are expected to be concatenated by the - * caller. Only the call for the last part will have this option bit off.

- * - *

When this option bit is on, ubidi_setPara() may process - * less than the full source text in order to truncate the text at a meaningful - * boundary. The caller should call ubidi_getProcessedLength() - * immediately after calling ubidi_setPara() in order to - * determine how much of the source text has been processed. - * Source text beyond that length should be resubmitted in following calls to - * ubidi_setPara. The processed length may be less than - * the length of the source text if a character preceding the last character of - * the source text constitutes a reasonable boundary (like a block separator) - * for text to be continued.
- * If the last character of the source text constitutes a reasonable - * boundary, the whole text will be processed at once.
- * If nowhere in the source text there exists - * such a reasonable boundary, the processed length will be zero.
- * The caller should check for such an occurrence and do one of the following: - *

  • submit a larger amount of text with a better chance to include - * a reasonable boundary.
  • - *
  • resubmit the same text after turning off option - * UBIDI_OPTION_STREAMING.
- * In all cases, this option should be turned off before processing the last - * part of the text.

- * - *

When the UBIDI_OPTION_STREAMING option is used, - * it is recommended to call ubidi_orderParagraphsLTR() with - * argument orderParagraphsLTR set to TRUE before - * calling ubidi_setPara so that later paragraphs may be - * concatenated to previous paragraphs on the right.

- * - * @see ubidi_setReorderingMode - * @see ubidi_setReorderingOptions - * @see ubidi_getProcessedLength - * @see ubidi_orderParagraphsLTR - * @stable ICU 3.6 - */ - UBIDI_OPTION_STREAMING = 4 -} UBiDiReorderingOption; - -/** - * Specify which of the reordering options - * should be applied during Bidi transformations. - * - * @param pBiDi is a UBiDi object. - * @param reorderingOptions is a combination of zero or more of the following - * options: - * #UBIDI_OPTION_DEFAULT, #UBIDI_OPTION_INSERT_MARKS, - * #UBIDI_OPTION_REMOVE_CONTROLS, #UBIDI_OPTION_STREAMING. - * - * @see ubidi_getReorderingOptions - * @stable ICU 3.6 - */ -U_STABLE void U_EXPORT2 -ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions); - -/** - * What are the reordering options applied to a given Bidi object? - * - * @param pBiDi is a UBiDi object. - * @return the current reordering options of the Bidi object - * @see ubidi_setReorderingOptions - * @stable ICU 3.6 - */ -U_STABLE uint32_t U_EXPORT2 -ubidi_getReorderingOptions(UBiDi *pBiDi); - -/** - * Set the context before a call to ubidi_setPara().

- * - * ubidi_setPara() computes the left-right directionality for a given piece - * of text which is supplied as one of its arguments. Sometimes this piece - * of text (the "main text") should be considered in context, because text - * appearing before ("prologue") and/or after ("epilogue") the main text - * may affect the result of this computation.

- * - * This function specifies the prologue and/or the epilogue for the next - * call to ubidi_setPara(). The characters specified as prologue and - * epilogue should not be modified by the calling program until the call - * to ubidi_setPara() has returned. If successive calls to ubidi_setPara() - * all need specification of a context, ubidi_setContext() must be called - * before each call to ubidi_setPara(). In other words, a context is not - * "remembered" after the following successful call to ubidi_setPara().

- * - * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or - * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to - * ubidi_setContext() which specifies a prologue, the paragraph level will - * be computed taking in consideration the text in the prologue.

- * - * When ubidi_setPara() is called without a previous call to - * ubidi_setContext, the main text is handled as if preceded and followed - * by strong directional characters at the current paragraph level. - * Calling ubidi_setContext() with specification of a prologue will change - * this behavior by handling the main text as if preceded by the last - * strong character appearing in the prologue, if any. - * Calling ubidi_setContext() with specification of an epilogue will change - * the behavior of ubidi_setPara() by handling the main text as if followed - * by the first strong character or digit appearing in the epilogue, if any.

- * - * Note 1: if ubidi_setContext is called repeatedly without - * calling ubidi_setPara, the earlier calls have no effect, - * only the last call will be remembered for the next call to - * ubidi_setPara.

- * - * Note 2: calling ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode) - * cancels any previous setting of non-empty prologue or epilogue. - * The next call to ubidi_setPara() will process no - * prologue or epilogue.

- * - * Note 3: users must be aware that even after setting the context - * before a call to ubidi_setPara() to perform e.g. a logical to visual - * transformation, the resulting string may not be identical to what it - * would have been if all the text, including prologue and epilogue, had - * been processed together.
- * Example (upper case letters represent RTL characters):
- *   prologue = "abc DE"
- *   epilogue = none
- *   main text = "FGH xyz"
- *   paraLevel = UBIDI_LTR
- *   display without prologue = "HGF xyz" - * ("HGF" is adjacent to "xyz")
- *   display with prologue = "abc HGFED xyz" - * ("HGF" is not adjacent to "xyz")
- * - * @param pBiDi is a paragraph UBiDi object. - * - * @param prologue is a pointer to the text which precedes the text that - * will be specified in a coming call to ubidi_setPara(). - * If there is no prologue to consider, then proLength - * must be zero and this pointer can be NULL. - * - * @param proLength is the length of the prologue; if proLength==-1 - * then the prologue must be zero-terminated. - * Otherwise proLength must be >= 0. If proLength==0, it means - * that there is no prologue to consider. - * - * @param epilogue is a pointer to the text which follows the text that - * will be specified in a coming call to ubidi_setPara(). - * If there is no epilogue to consider, then epiLength - * must be zero and this pointer can be NULL. - * - * @param epiLength is the length of the epilogue; if epiLength==-1 - * then the epilogue must be zero-terminated. - * Otherwise epiLength must be >= 0. If epiLength==0, it means - * that there is no epilogue to consider. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @see ubidi_setPara - * @stable ICU 4.8 - */ -U_STABLE void U_EXPORT2 -ubidi_setContext(UBiDi *pBiDi, - const UChar *prologue, int32_t proLength, - const UChar *epilogue, int32_t epiLength, - UErrorCode *pErrorCode); - -/** - * Perform the Unicode Bidi algorithm. It is defined in the - * Unicode Standard Annex #9, - * version 13, - * also described in The Unicode Standard, Version 4.0 .

- * - * This function takes a piece of plain text containing one or more paragraphs, - * with or without externally specified embedding levels from styled - * text and computes the left-right-directionality of each character.

- * - * If the entire text is all of the same directionality, then - * the function may not perform all the steps described by the algorithm, - * i.e., some levels may not be the same as if all steps were performed. - * This is not relevant for unidirectional text.
- * For example, in pure LTR text with numbers the numbers would get - * a resolved level of 2 higher than the surrounding text according to - * the algorithm. This implementation may set all resolved levels to - * the same value in such a case.

- * - * The text can be composed of multiple paragraphs. Occurrence of a block - * separator in the text terminates a paragraph, and whatever comes next starts - * a new paragraph. The exception to this rule is when a Carriage Return (CR) - * is followed by a Line Feed (LF). Both CR and LF are block separators, but - * in that case, the pair of characters is considered as terminating the - * preceding paragraph, and a new paragraph will be started by a character - * coming after the LF. - * - * @param pBiDi A UBiDi object allocated with ubidi_open() - * which will be set to contain the reordering information, - * especially the resolved levels for all the characters in text. - * - * @param text is a pointer to the text that the Bidi algorithm will be performed on. - * This pointer is stored in the UBiDi object and can be retrieved - * with ubidi_getText().
- * Note: the text must be (at least) length long. - * - * @param length is the length of the text; if length==-1 then - * the text must be zero-terminated. - * - * @param paraLevel specifies the default level for the text; - * it is typically 0 (LTR) or 1 (RTL). - * If the function shall determine the paragraph level from the text, - * then paraLevel can be set to - * either #UBIDI_DEFAULT_LTR - * or #UBIDI_DEFAULT_RTL; if the text contains multiple - * paragraphs, the paragraph level shall be determined separately for - * each paragraph; if a paragraph does not include any strongly typed - * character, then the desired default is used (0 for LTR or 1 for RTL). - * Any other value between 0 and #UBIDI_MAX_EXPLICIT_LEVEL - * is also valid, with odd levels indicating RTL. - * - * @param embeddingLevels (in) may be used to preset the embedding and override levels, - * ignoring characters like LRE and PDF in the text. - * A level overrides the directional property of its corresponding - * (same index) character if the level has the - * #UBIDI_LEVEL_OVERRIDE bit set.

- * Aside from that bit, it must be - * paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL, - * except that level 0 is always allowed. - * Level 0 for a paragraph separator prevents reordering of paragraphs; - * this only works reliably if #UBIDI_LEVEL_OVERRIDE - * is also set for paragraph separators. - * Level 0 for other characters is treated as a wildcard - * and is lifted up to the resolved level of the surrounding paragraph.

- * Caution: A copy of this pointer, not of the levels, - * will be stored in the UBiDi object; - * the embeddingLevels array must not be - * deallocated before the UBiDi structure is destroyed or reused, - * and the embeddingLevels - * should not be modified to avoid unexpected results on subsequent Bidi operations. - * However, the ubidi_setPara() and - * ubidi_setLine() functions may modify some or all of the levels.

- * After the UBiDi object is reused or destroyed, the caller - * must take care of the deallocation of the embeddingLevels array.

- * Note: the embeddingLevels array must be - * at least length long. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pErrorCode must be a valid pointer to an error code value. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, - UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, - UErrorCode *pErrorCode); - -/** - * ubidi_setLine() sets a UBiDi to - * contain the reordering information, especially the resolved levels, - * for all the characters in a line of text. This line of text is - * specified by referring to a UBiDi object representing - * this information for a piece of text containing one or more paragraphs, - * and by specifying a range of indexes in this text.

- * In the new line object, the indexes will range from 0 to limit-start-1.

- * - * This is used after calling ubidi_setPara() - * for a piece of text, and after line-breaking on that text. - * It is not necessary if each paragraph is treated as a single line.

- * - * After line-breaking, rules (L1) and (L2) for the treatment of - * trailing WS and for reordering are performed on - * a UBiDi object that represents a line.

- * - * Important: pLineBiDi shares data with - * pParaBiDi. - * You must destroy or reuse pLineBiDi before pParaBiDi. - * In other words, you must destroy or reuse the UBiDi object for a line - * before the object for its parent paragraph.

- * - * The text pointer that was stored in pParaBiDi is also copied, - * and start is added to it so that it points to the beginning of the - * line for this object. - * - * @param pParaBiDi is the parent paragraph object. It must have been set - * by a successful call to ubidi_setPara. - * - * @param start is the line's first index into the text. - * - * @param limit is just behind the line's last index into the text - * (its last index +1).
- * It must be 0<=startcontaining paragraph limit. - * If the specified line crosses a paragraph boundary, the function - * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR. - * - * @param pLineBiDi is the object that will now represent a line of the text. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @see ubidi_setPara - * @see ubidi_getProcessedLength - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_setLine(const UBiDi *pParaBiDi, - int32_t start, int32_t limit, - UBiDi *pLineBiDi, - UErrorCode *pErrorCode); - -/** - * Get the directionality of the text. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @return a value of UBIDI_LTR, UBIDI_RTL - * or UBIDI_MIXED - * that indicates if the entire text - * represented by this object is unidirectional, - * and which direction, or if it is mixed-directional. - * Note - The value UBIDI_NEUTRAL is never returned from this method. - * - * @see UBiDiDirection - * @stable ICU 2.0 - */ -U_STABLE UBiDiDirection U_EXPORT2 -ubidi_getDirection(const UBiDi *pBiDi); - -/** - * Gets the base direction of the text provided according - * to the Unicode Bidirectional Algorithm. The base direction - * is derived from the first character in the string with bidirectional - * character type L, R, or AL. If the first such character has type L, - * UBIDI_LTR is returned. If the first such character has - * type R or AL, UBIDI_RTL is returned. If the string does - * not contain any character of these types, then - * UBIDI_NEUTRAL is returned. - * - * This is a lightweight function for use when only the base direction - * is needed and no further bidi processing of the text is needed. - * - * @param text is a pointer to the text whose base - * direction is needed. - * Note: the text must be (at least) @c length long. - * - * @param length is the length of the text; - * if length==-1 then the text - * must be zero-terminated. - * - * @return UBIDI_LTR, UBIDI_RTL, - * UBIDI_NEUTRAL - * - * @see UBiDiDirection - * @stable ICU 4.6 - */ -U_STABLE UBiDiDirection U_EXPORT2 -ubidi_getBaseDirection(const UChar *text, int32_t length ); - -/** - * Get the pointer to the text. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @return The pointer to the text that the UBiDi object was created for. - * - * @see ubidi_setPara - * @see ubidi_setLine - * @stable ICU 2.0 - */ -U_STABLE const UChar * U_EXPORT2 -ubidi_getText(const UBiDi *pBiDi); - -/** - * Get the length of the text. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @return The length of the text that the UBiDi object was created for. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_getLength(const UBiDi *pBiDi); - -/** - * Get the paragraph level of the text. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @return The paragraph level. If there are multiple paragraphs, their - * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or - * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph - * is returned. - * - * @see UBiDiLevel - * @see ubidi_getParagraph - * @see ubidi_getParagraphByIndex - * @stable ICU 2.0 - */ -U_STABLE UBiDiLevel U_EXPORT2 -ubidi_getParaLevel(const UBiDi *pBiDi); - -/** - * Get the number of paragraphs. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @return The number of paragraphs. - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_countParagraphs(UBiDi *pBiDi); - -/** - * Get a paragraph, given a position within the text. - * This function returns information about a paragraph.
- * Note: if the paragraph index is known, it is more efficient to - * retrieve the paragraph information using ubidi_getParagraphByIndex().

- * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param charIndex is the index of a character within the text, in the - * range [0..ubidi_getProcessedLength(pBiDi)-1]. - * - * @param pParaStart will receive the index of the first character of the - * paragraph in the text. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pParaLimit will receive the limit of the paragraph. - * The l-value that you point to here may be the - * same expression (variable) as the one for - * charIndex. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pParaLevel will receive the level of the paragraph. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return The index of the paragraph containing the specified position. - * - * @see ubidi_getProcessedLength - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, - int32_t *pParaLimit, UBiDiLevel *pParaLevel, - UErrorCode *pErrorCode); - -/** - * Get a paragraph, given the index of this paragraph. - * - * This function returns information about a paragraph.

- * - * @param pBiDi is the paragraph UBiDi object. - * - * @param paraIndex is the number of the paragraph, in the - * range [0..ubidi_countParagraphs(pBiDi)-1]. - * - * @param pParaStart will receive the index of the first character of the - * paragraph in the text. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pParaLimit will receive the limit of the paragraph. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pParaLevel will receive the level of the paragraph. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, - int32_t *pParaStart, int32_t *pParaLimit, - UBiDiLevel *pParaLevel, UErrorCode *pErrorCode); - -/** - * Get the level for one character. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param charIndex the index of a character. It must be in the range - * [0..ubidi_getProcessedLength(pBiDi)]. - * - * @return The level for the character at charIndex (0 if charIndex is not - * in the valid range). - * - * @see UBiDiLevel - * @see ubidi_getProcessedLength - * @stable ICU 2.0 - */ -U_STABLE UBiDiLevel U_EXPORT2 -ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); - -/** - * Get an array of levels for each character.

- * - * Note that this function may allocate memory under some - * circumstances, unlike ubidi_getLevelAt(). - * - * @param pBiDi is the paragraph or line UBiDi object, whose - * text length must be strictly positive. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return The levels array for the text, - * or NULL if an error occurs. - * - * @see UBiDiLevel - * @see ubidi_getProcessedLength - * @stable ICU 2.0 - */ -U_STABLE const UBiDiLevel * U_EXPORT2 -ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); - -/** - * Get a logical run. - * This function returns information about a run and is used - * to retrieve runs in logical order.

- * This is especially useful for line-breaking on a paragraph. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param logicalPosition is a logical position within the source text. - * - * @param pLogicalLimit will receive the limit of the corresponding run. - * The l-value that you point to here may be the - * same expression (variable) as the one for - * logicalPosition. - * This pointer can be NULL if this - * value is not necessary. - * - * @param pLevel will receive the level of the corresponding run. - * This pointer can be NULL if this - * value is not necessary. - * - * @see ubidi_getProcessedLength - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, - int32_t *pLogicalLimit, UBiDiLevel *pLevel); - -/** - * Get the number of runs. - * This function may invoke the actual reordering on the - * UBiDi object, after ubidi_setPara() - * may have resolved only the levels of the text. Therefore, - * ubidi_countRuns() may have to allocate memory, - * and may fail doing so. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return The number of runs. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); - -/** - * Get one run's logical start, length, and directionality, - * which can be 0 for LTR or 1 for RTL. - * In an RTL run, the character at the logical start is - * visually on the right of the displayed run. - * The length is the number of characters in the run.

- * ubidi_countRuns() should be called - * before the runs are retrieved. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param runIndex is the number of the run in visual order, in the - * range [0..ubidi_countRuns(pBiDi)-1]. - * - * @param pLogicalStart is the first logical character index in the text. - * The pointer may be NULL if this index is not needed. - * - * @param pLength is the number of characters (at least one) in the run. - * The pointer may be NULL if this is not needed. - * - * @return the directionality of the run, - * UBIDI_LTR==0 or UBIDI_RTL==1, - * never UBIDI_MIXED, - * never UBIDI_NEUTRAL. - * - * @see ubidi_countRuns - * - * Example: - *

- * \code
- * int32_t i, count=ubidi_countRuns(pBiDi),
- *         logicalStart, visualIndex=0, length;
- * for(i=0; i0);
- *     } else {
- *         logicalStart+=length;  // logicalLimit
- *         do { // RTL
- *             show_char(text[--logicalStart], visualIndex++);
- *         } while(--length>0);
- *     }
- * }
- *\endcode
- * 
- * - * Note that in right-to-left runs, code like this places - * second surrogates before first ones (which is generally a bad idea) - * and combining characters before base characters. - *

- * Use of ubidi_writeReordered(), optionally with the - * #UBIDI_KEEP_BASE_COMBINING option, can be considered in order - * to avoid these issues. - * @stable ICU 2.0 - */ -U_STABLE UBiDiDirection U_EXPORT2 -ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, - int32_t *pLogicalStart, int32_t *pLength); - -/** - * Get the visual position from a logical text position. - * If such a mapping is used many times on the same - * UBiDi object, then calling - * ubidi_getLogicalMap() is more efficient.

- * - * The value returned may be #UBIDI_MAP_NOWHERE if there is no - * visual position because the corresponding text character is a Bidi control - * removed from output by the option #UBIDI_OPTION_REMOVE_CONTROLS. - *

- * When the visual output is altered by using options of - * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC, - * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE, - * UBIDI_REMOVE_BIDI_CONTROLS, the visual position returned may not - * be correct. It is advised to use, when possible, reordering options - * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS. - *

- * Note that in right-to-left runs, this mapping places - * second surrogates before first ones (which is generally a bad idea) - * and combining characters before base characters. - * Use of ubidi_writeReordered(), optionally with the - * #UBIDI_KEEP_BASE_COMBINING option can be considered instead - * of using the mapping, in order to avoid these issues. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param logicalIndex is the index of a character in the text. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return The visual position of this character. - * - * @see ubidi_getLogicalMap - * @see ubidi_getLogicalIndex - * @see ubidi_getProcessedLength - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode); - -/** - * Get the logical text position from a visual position. - * If such a mapping is used many times on the same - * UBiDi object, then calling - * ubidi_getVisualMap() is more efficient.

- * - * The value returned may be #UBIDI_MAP_NOWHERE if there is no - * logical position because the corresponding text character is a Bidi mark - * inserted in the output by option #UBIDI_OPTION_INSERT_MARKS. - *

- * This is the inverse function to ubidi_getVisualIndex(). - *

- * When the visual output is altered by using options of - * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC, - * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE, - * UBIDI_REMOVE_BIDI_CONTROLS, the logical position returned may not - * be correct. It is advised to use, when possible, reordering options - * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param visualIndex is the visual position of a character. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return The index of this character in the text. - * - * @see ubidi_getVisualMap - * @see ubidi_getVisualIndex - * @see ubidi_getResultLength - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode); - -/** - * Get a logical-to-visual index map (array) for the characters in the UBiDi - * (paragraph or line) object. - *

- * Some values in the map may be #UBIDI_MAP_NOWHERE if the - * corresponding text characters are Bidi controls removed from the visual - * output by the option #UBIDI_OPTION_REMOVE_CONTROLS. - *

- * When the visual output is altered by using options of - * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC, - * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE, - * UBIDI_REMOVE_BIDI_CONTROLS, the visual positions returned may not - * be correct. It is advised to use, when possible, reordering options - * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS. - *

- * Note that in right-to-left runs, this mapping places - * second surrogates before first ones (which is generally a bad idea) - * and combining characters before base characters. - * Use of ubidi_writeReordered(), optionally with the - * #UBIDI_KEEP_BASE_COMBINING option can be considered instead - * of using the mapping, in order to avoid these issues. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param indexMap is a pointer to an array of ubidi_getProcessedLength() - * indexes which will reflect the reordering of the characters. - * If option #UBIDI_OPTION_INSERT_MARKS is set, the number - * of elements allocated in indexMap must be no less than - * ubidi_getResultLength(). - * The array does not need to be initialized.

- * The index map will result in indexMap[logicalIndex]==visualIndex. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @see ubidi_getVisualMap - * @see ubidi_getVisualIndex - * @see ubidi_getProcessedLength - * @see ubidi_getResultLength - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); - -/** - * Get a visual-to-logical index map (array) for the characters in the UBiDi - * (paragraph or line) object. - *

- * Some values in the map may be #UBIDI_MAP_NOWHERE if the - * corresponding text characters are Bidi marks inserted in the visual output - * by the option #UBIDI_OPTION_INSERT_MARKS. - *

- * When the visual output is altered by using options of - * ubidi_writeReordered() such as UBIDI_INSERT_LRM_FOR_NUMERIC, - * UBIDI_KEEP_BASE_COMBINING, UBIDI_OUTPUT_REVERSE, - * UBIDI_REMOVE_BIDI_CONTROLS, the logical positions returned may not - * be correct. It is advised to use, when possible, reordering options - * such as UBIDI_OPTION_INSERT_MARKS and UBIDI_OPTION_REMOVE_CONTROLS. - * - * @param pBiDi is the paragraph or line UBiDi object. - * - * @param indexMap is a pointer to an array of ubidi_getResultLength() - * indexes which will reflect the reordering of the characters. - * If option #UBIDI_OPTION_REMOVE_CONTROLS is set, the number - * of elements allocated in indexMap must be no less than - * ubidi_getProcessedLength(). - * The array does not need to be initialized.

- * The index map will result in indexMap[visualIndex]==logicalIndex. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @see ubidi_getLogicalMap - * @see ubidi_getLogicalIndex - * @see ubidi_getProcessedLength - * @see ubidi_getResultLength - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); - -/** - * This is a convenience function that does not use a UBiDi object. - * It is intended to be used for when an application has determined the levels - * of objects (character sequences) and just needs to have them reordered (L2). - * This is equivalent to using ubidi_getLogicalMap() on a - * UBiDi object. - * - * @param levels is an array with length levels that have been determined by - * the application. - * - * @param length is the number of levels in the array, or, semantically, - * the number of objects to be reordered. - * It must be length>0. - * - * @param indexMap is a pointer to an array of length - * indexes which will reflect the reordering of the characters. - * The array does not need to be initialized.

- * The index map will result in indexMap[logicalIndex]==visualIndex. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); - -/** - * This is a convenience function that does not use a UBiDi object. - * It is intended to be used for when an application has determined the levels - * of objects (character sequences) and just needs to have them reordered (L2). - * This is equivalent to using ubidi_getVisualMap() on a - * UBiDi object. - * - * @param levels is an array with length levels that have been determined by - * the application. - * - * @param length is the number of levels in the array, or, semantically, - * the number of objects to be reordered. - * It must be length>0. - * - * @param indexMap is a pointer to an array of length - * indexes which will reflect the reordering of the characters. - * The array does not need to be initialized.

- * The index map will result in indexMap[visualIndex]==logicalIndex. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); - -/** - * Invert an index map. - * The index mapping of the first map is inverted and written to - * the second one. - * - * @param srcMap is an array with length elements - * which defines the original mapping from a source array containing - * length elements to a destination array. - * Some elements of the source array may have no mapping in the - * destination array. In that case, their value will be - * the special value UBIDI_MAP_NOWHERE. - * All elements must be >=0 or equal to UBIDI_MAP_NOWHERE. - * Some elements may have a value >= length, if the - * destination array has more elements than the source array. - * There must be no duplicate indexes (two or more elements with the - * same value except UBIDI_MAP_NOWHERE). - * - * @param destMap is an array with a number of elements equal to 1 + the highest - * value in srcMap. - * destMap will be filled with the inverse mapping. - * If element with index i in srcMap has a value k different - * from UBIDI_MAP_NOWHERE, this means that element i of - * the source array maps to element k in the destination array. - * The inverse map will have value i in its k-th element. - * For all elements of the destination array which do not map to - * an element in the source array, the corresponding element in the - * inverse map will have a value equal to UBIDI_MAP_NOWHERE. - * - * @param length is the length of each array. - * @see UBIDI_MAP_NOWHERE - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); - -/** option flags for ubidi_writeReordered() */ - -/** - * option bit for ubidi_writeReordered(): - * keep combining characters after their base characters in RTL runs - * - * @see ubidi_writeReordered - * @stable ICU 2.0 - */ -#define UBIDI_KEEP_BASE_COMBINING 1 - -/** - * option bit for ubidi_writeReordered(): - * replace characters with the "mirrored" property in RTL runs - * by their mirror-image mappings - * - * @see ubidi_writeReordered - * @stable ICU 2.0 - */ -#define UBIDI_DO_MIRRORING 2 - -/** - * option bit for ubidi_writeReordered(): - * surround the run with LRMs if necessary; - * this is part of the approximate "inverse Bidi" algorithm - * - *

This option does not imply corresponding adjustment of the index - * mappings.

- * - * @see ubidi_setInverse - * @see ubidi_writeReordered - * @stable ICU 2.0 - */ -#define UBIDI_INSERT_LRM_FOR_NUMERIC 4 - -/** - * option bit for ubidi_writeReordered(): - * remove Bidi control characters - * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC) - * - *

This option does not imply corresponding adjustment of the index - * mappings.

- * - * @see ubidi_writeReordered - * @stable ICU 2.0 - */ -#define UBIDI_REMOVE_BIDI_CONTROLS 8 - -/** - * option bit for ubidi_writeReordered(): - * write the output in reverse order - * - *

This has the same effect as calling ubidi_writeReordered() - * first without this option, and then calling - * ubidi_writeReverse() without mirroring. - * Doing this in the same step is faster and avoids a temporary buffer. - * An example for using this option is output to a character terminal that - * is designed for RTL scripts and stores text in reverse order.

- * - * @see ubidi_writeReordered - * @stable ICU 2.0 - */ -#define UBIDI_OUTPUT_REVERSE 16 - -/** - * Get the length of the source text processed by the last call to - * ubidi_setPara(). This length may be different from the length - * of the source text if option #UBIDI_OPTION_STREAMING - * has been set. - *
- * Note that whenever the length of the text affects the execution or the - * result of a function, it is the processed length which must be considered, - * except for ubidi_setPara (which receives unprocessed source - * text) and ubidi_getLength (which returns the original length - * of the source text).
- * In particular, the processed length is the one to consider in the following - * cases: - *
    - *
  • maximum value of the limit argument of - * ubidi_setLine
  • - *
  • maximum value of the charIndex argument of - * ubidi_getParagraph
  • - *
  • maximum value of the charIndex argument of - * ubidi_getLevelAt
  • - *
  • number of elements in the array returned by ubidi_getLevels
  • - *
  • maximum value of the logicalStart argument of - * ubidi_getLogicalRun
  • - *
  • maximum value of the logicalIndex argument of - * ubidi_getVisualIndex
  • - *
  • number of elements filled in the *indexMap argument of - * ubidi_getLogicalMap
  • - *
  • length of text processed by ubidi_writeReordered
  • - *
- * - * @param pBiDi is the paragraph UBiDi object. - * - * @return The length of the part of the source text processed by - * the last call to ubidi_setPara. - * @see ubidi_setPara - * @see UBIDI_OPTION_STREAMING - * @stable ICU 3.6 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_getProcessedLength(const UBiDi *pBiDi); - -/** - * Get the length of the reordered text resulting from the last call to - * ubidi_setPara(). This length may be different from the length - * of the source text if option #UBIDI_OPTION_INSERT_MARKS - * or option #UBIDI_OPTION_REMOVE_CONTROLS has been set. - *
- * This resulting length is the one to consider in the following cases: - *
    - *
  • maximum value of the visualIndex argument of - * ubidi_getLogicalIndex
  • - *
  • number of elements of the *indexMap argument of - * ubidi_getVisualMap
  • - *
- * Note that this length stays identical to the source text length if - * Bidi marks are inserted or removed using option bits of - * ubidi_writeReordered, or if option - * #UBIDI_REORDER_INVERSE_NUMBERS_AS_L has been set. - * - * @param pBiDi is the paragraph UBiDi object. - * - * @return The length of the reordered text resulting from - * the last call to ubidi_setPara. - * @see ubidi_setPara - * @see UBIDI_OPTION_INSERT_MARKS - * @see UBIDI_OPTION_REMOVE_CONTROLS - * @stable ICU 3.6 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_getResultLength(const UBiDi *pBiDi); - -U_CDECL_BEGIN - -#ifndef U_HIDE_DEPRECATED_API -/** - * Value returned by UBiDiClassCallback callbacks when - * there is no need to override the standard Bidi class for a given code point. - * - * This constant is deprecated; use u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 instead. - * - * @see UBiDiClassCallback - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ -#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT -#endif // U_HIDE_DEPRECATED_API - -/** - * Callback type declaration for overriding default Bidi class values with - * custom ones. - *

Usually, the function pointer will be propagated to a UBiDi - * object by calling the ubidi_setClassCallback() function; - * then the callback will be invoked by the UBA implementation any time the - * class of a character is to be determined.

- * - * @param context is a pointer to the callback private data. - * - * @param c is the code point to get a Bidi class for. - * - * @return The directional property / Bidi class for the given code point - * c if the default class has been overridden, or - * u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 - * if the standard Bidi class value for c is to be used. - * @see ubidi_setClassCallback - * @see ubidi_getClassCallback - * @stable ICU 3.6 - */ -typedef UCharDirection U_CALLCONV -UBiDiClassCallback(const void *context, UChar32 c); - -U_CDECL_END - -/** - * Retrieve the Bidi class for a given code point. - *

If a #UBiDiClassCallback callback is defined and returns a - * value other than u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1, - * that value is used; otherwise the default class determination mechanism is invoked.

- * - * @param pBiDi is the paragraph UBiDi object. - * - * @param c is the code point whose Bidi class must be retrieved. - * - * @return The Bidi class for character c based - * on the given pBiDi instance. - * @see UBiDiClassCallback - * @stable ICU 3.6 - */ -U_STABLE UCharDirection U_EXPORT2 -ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c); - -/** - * Set the callback function and callback data used by the UBA - * implementation for Bidi class determination. - *

This may be useful for assigning Bidi classes to PUA characters, or - * for special application needs. For instance, an application may want to - * handle all spaces like L or R characters (according to the base direction) - * when creating the visual ordering of logical lines which are part of a report - * organized in columns: there should not be interaction between adjacent - * cells.

- * - * @param pBiDi is the paragraph UBiDi object. - * - * @param newFn is the new callback function pointer. - * - * @param newContext is the new callback context pointer. This can be NULL. - * - * @param oldFn fillin: Returns the old callback function pointer. This can be - * NULL. - * - * @param oldContext fillin: Returns the old callback's context. This can be - * NULL. - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @see ubidi_getClassCallback - * @stable ICU 3.6 - */ -U_STABLE void U_EXPORT2 -ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, - const void *newContext, UBiDiClassCallback **oldFn, - const void **oldContext, UErrorCode *pErrorCode); - -/** - * Get the current callback function used for Bidi class determination. - * - * @param pBiDi is the paragraph UBiDi object. - * - * @param fn fillin: Returns the callback function pointer. - * - * @param context fillin: Returns the callback's private context. - * - * @see ubidi_setClassCallback - * @stable ICU 3.6 - */ -U_STABLE void U_EXPORT2 -ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context); - -/** - * Take a UBiDi object containing the reordering - * information for a piece of text (one or more paragraphs) set by - * ubidi_setPara() or for a line of text set by - * ubidi_setLine() and write a reordered string to the - * destination buffer. - * - * This function preserves the integrity of characters with multiple - * code units and (optionally) combining characters. - * Characters in RTL runs can be replaced by mirror-image characters - * in the destination buffer. Note that "real" mirroring has - * to be done in a rendering engine by glyph selection - * and that for many "mirrored" characters there are no - * Unicode characters as mirror-image equivalents. - * There are also options to insert or remove Bidi control - * characters; see the description of the destSize - * and options parameters and of the option bit flags. - * - * @param pBiDi A pointer to a UBiDi object that - * is set by ubidi_setPara() or - * ubidi_setLine() and contains the reordering - * information for the text that it was defined for, - * as well as a pointer to that text.

- * The text was aliased (only the pointer was stored - * without copying the contents) and must not have been modified - * since the ubidi_setPara() call. - * - * @param dest A pointer to where the reordered text is to be copied. - * The source text and dest[destSize] - * must not overlap. - * - * @param destSize The size of the dest buffer, - * in number of UChars. - * If the UBIDI_INSERT_LRM_FOR_NUMERIC - * option is set, then the destination length could be - * as large as - * ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi). - * If the UBIDI_REMOVE_BIDI_CONTROLS option - * is set, then the destination length may be less than - * ubidi_getLength(pBiDi). - * If none of these options is set, then the destination length - * will be exactly ubidi_getProcessedLength(pBiDi). - * - * @param options A bit set of options for the reordering that control - * how the reordered text is written. - * The options include mirroring the characters on a code - * point basis and inserting LRM characters, which is used - * especially for transforming visually stored text - * to logically stored text (although this is still an - * imperfect implementation of an "inverse Bidi" algorithm - * because it uses the "forward Bidi" algorithm at its core). - * The available options are: - * #UBIDI_DO_MIRRORING, - * #UBIDI_INSERT_LRM_FOR_NUMERIC, - * #UBIDI_KEEP_BASE_COMBINING, - * #UBIDI_OUTPUT_REVERSE, - * #UBIDI_REMOVE_BIDI_CONTROLS - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return The length of the output string. - * - * @see ubidi_getProcessedLength - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_writeReordered(UBiDi *pBiDi, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode); - -/** - * Reverse a Right-To-Left run of Unicode text. - * - * This function preserves the integrity of characters with multiple - * code units and (optionally) combining characters. - * Characters can be replaced by mirror-image characters - * in the destination buffer. Note that "real" mirroring has - * to be done in a rendering engine by glyph selection - * and that for many "mirrored" characters there are no - * Unicode characters as mirror-image equivalents. - * There are also options to insert or remove Bidi control - * characters. - * - * This function is the implementation for reversing RTL runs as part - * of ubidi_writeReordered(). For detailed descriptions - * of the parameters, see there. - * Since no Bidi controls are inserted here, the output string length - * will never exceed srcLength. - * - * @see ubidi_writeReordered - * - * @param src A pointer to the RTL run text. - * - * @param srcLength The length of the RTL run. - * - * @param dest A pointer to where the reordered text is to be copied. - * src[srcLength] and dest[destSize] - * must not overlap. - * - * @param destSize The size of the dest buffer, - * in number of UChars. - * If the UBIDI_REMOVE_BIDI_CONTROLS option - * is set, then the destination length may be less than - * srcLength. - * If this option is not set, then the destination length - * will be exactly srcLength. - * - * @param options A bit set of options for the reordering that control - * how the reordered text is written. - * See the options parameter in ubidi_writeReordered(). - * - * @param pErrorCode must be a valid pointer to an error code value. - * - * @return The length of the output string. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubidi_writeReverse(const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - uint16_t options, - UErrorCode *pErrorCode); - -/*#define BIDI_SAMPLE_CODE*/ -/*@}*/ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ubiditransform.h b/deps/node/deps/icu-small/source/common/unicode/ubiditransform.h deleted file mode 100644 index 5c08ed5d..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ubiditransform.h +++ /dev/null @@ -1,323 +0,0 @@ -/* -****************************************************************************** -* -* © 2016 and later: Unicode, Inc. and others. -* License & terms of use: http://www.unicode.org/copyright.html -* -****************************************************************************** -* file name: ubiditransform.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2016jul24 -* created by: Lina Kemmel -* -*/ - -#ifndef UBIDITRANSFORM_H -#define UBIDITRANSFORM_H - -#include "unicode/utypes.h" -#include "unicode/ubidi.h" -#include "unicode/uchar.h" -#include "unicode/localpointer.h" - -/** - * \file - * \brief Bidi Transformations - */ - -/** - * `UBiDiOrder` indicates the order of text. - * - * This bidi transformation engine supports all possible combinations (4 in - * total) of input and output text order: - * - * - : unless the output direction is RTL, this - * corresponds to a normal operation of the Bidi algorithm as described in the - * Unicode Technical Report and implemented by `UBiDi` when the - * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL - * mode is not supported by `UBiDi` and is accomplished through - * reversing a visual LTR string, - * - * - : unless the input direction is RTL, this - * corresponds to an "inverse bidi algorithm" in `UBiDi` with the - * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`. - * Visual RTL mode is not not supported by `UBiDi` and is - * accomplished through reversing a visual LTR string, - * - * - : if the input and output base directions - * mismatch, this corresponds to the `UBiDi` implementation with the - * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the - * input and output base directions are identical, the transformation engine - * will only handle character mirroring and Arabic shaping operations without - * reordering, - * - * - : this reordering mode is not supported by - * the `UBiDi` engine; it implies character mirroring, Arabic - * shaping, and - if the input/output base directions mismatch - string - * reverse operations. - * @see ubidi_setInverse - * @see ubidi_setReorderingMode - * @see UBIDI_REORDER_DEFAULT - * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT - * @see UBIDI_REORDER_RUNS_ONLY - * @stable ICU 58 - */ -typedef enum { - /** 0: Constant indicating a logical order. - * This is the default for input text. - * @stable ICU 58 - */ - UBIDI_LOGICAL = 0, - /** 1: Constant indicating a visual order. - * This is a default for output text. - * @stable ICU 58 - */ - UBIDI_VISUAL -} UBiDiOrder; - -/** - * UBiDiMirroring indicates whether or not characters with the - * "mirrored" property in RTL runs should be replaced with their mirror-image - * counterparts. - * @see UBIDI_DO_MIRRORING - * @see ubidi_setReorderingOptions - * @see ubidi_writeReordered - * @see ubidi_writeReverse - * @stable ICU 58 - */ -typedef enum { - /** 0: Constant indicating that character mirroring should not be - * performed. - * This is the default. - * @stable ICU 58 - */ - UBIDI_MIRRORING_OFF = 0, - /** 1: Constant indicating that character mirroring should be performed. - * This corresponds to calling ubidi_writeReordered or - * ubidi_writeReverse with the - * UBIDI_DO_MIRRORING option bit set. - * @stable ICU 58 - */ - UBIDI_MIRRORING_ON -} UBiDiMirroring; - -/** - * Forward declaration of the UBiDiTransform structure that stores - * information used by the layout transformation engine. - * @stable ICU 58 - */ -typedef struct UBiDiTransform UBiDiTransform; - -/** - * Performs transformation of text from the bidi layout defined by the input - * ordering scheme to the bidi layout defined by the output ordering scheme, - * and applies character mirroring and Arabic shaping operations.

- * In terms of UBiDi, such a transformation implies: - *

    - *
  • calling ubidi_setReorderingMode as needed (when the - * reordering mode is other than normal),
  • - *
  • calling ubidi_setInverse as needed (when text should be - * transformed from a visual to a logical form),
  • - *
  • resolving embedding levels of each character in the input text by - * calling ubidi_setPara,
  • - *
  • reordering the characters based on the computed embedding levels, also - * performing character mirroring as needed, and streaming the result to the - * output, by calling ubidi_writeReordered,
  • - *
  • performing Arabic digit and letter shaping on the output text by calling - * u_shapeArabic.
  • - *
- * An "ordering scheme" encompasses the base direction and the order of text, - * and these characteristics must be defined by the caller for both input and - * output explicitly .

- * There are 36 possible combinations of ordering schemes, - * which are partially supported by UBiDi already. Examples of the - * currently supported combinations: - *

    - *
  • : this is equivalent to calling - * ubidi_setPara with paraLevel == UBIDI_LTR,
  • - *
  • : this is equivalent to calling - * ubidi_setPara with paraLevel == UBIDI_RTL,
  • - *
  • : this is equivalent to - * calling ubidi_setPara with - * paraLevel == UBIDI_DEFAULT_LTR,
  • - *
  • : this is equivalent to - * calling ubidi_setPara with - * paraLevel == UBIDI_DEFAULT_RTL,
  • - *
  • : this is equivalent to - * calling ubidi_setInverse(UBiDi*, TRUE) and then - * ubidi_setPara with paraLevel == UBIDI_LTR,
  • - *
  • : this is equivalent to - * calling ubidi_setInverse(UBiDi*, TRUE) and then - * ubidi_setPara with paraLevel == UBIDI_RTL.
  • - *
- * All combinations that involve the Visual RTL scheme are unsupported by - * UBiDi, for instance: - *
    - *
  • ,
  • - *
  • .
  • - *
- *

Example of usage of the transformation engine:
- *

- * \code
- * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
- * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
- * UErrorCode errorCode = U_ZERO_ERROR;
- * // Run a transformation.
- * ubiditransform_transform(pBidiTransform,
- *          text1, -1, text2, -1,
- *          UBIDI_LTR, UBIDI_VISUAL,
- *          UBIDI_RTL, UBIDI_LOGICAL,
- *          UBIDI_MIRRORING_OFF,
- *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
- *          &errorCode);
- * // Do something with text2.
- *  text2[4] = '2';
- * // Run a reverse transformation.
- * ubiditransform_transform(pBidiTransform,
- *          text2, -1, text1, -1,
- *          UBIDI_RTL, UBIDI_LOGICAL,
- *          UBIDI_LTR, UBIDI_VISUAL,
- *          UBIDI_MIRRORING_OFF,
- *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
- *          &errorCode);
- *\endcode
- * 
- *

- * - * @param pBiDiTransform A pointer to a UBiDiTransform object - * allocated with ubiditransform_open() or - * NULL.

- * This object serves for one-time setup to amortize initialization - * overheads. Use of this object is not thread-safe. All other threads - * should allocate a new UBiDiTransform object by calling - * ubiditransform_open() before using it. Alternatively, - * a caller can set this parameter to NULL, in which case - * the object will be allocated by the engine on the fly.

- * @param src A pointer to the text that the Bidi layout transformations will - * be performed on. - *

Note: the text must be (at least) - * srcLength long.

- * @param srcLength The length of the text, in number of UChars. If - * length == -1 then the text must be zero-terminated. - * @param dest A pointer to where the processed text is to be copied. - * @param destSize The size of the dest buffer, in number of - * UChars. If the U_SHAPE_LETTERS_UNSHAPE option is set, - * then the destination length could be as large as - * srcLength * 2. Otherwise, the destination length will - * not exceed srcLength. If the caller reserves the last - * position for zero-termination, it should be excluded from - * destSize. - *

destSize == -1 is allowed and makes sense when - * dest was holds some meaningful value, e.g. that of - * src. In this case dest must be - * zero-terminated.

- * @param inParaLevel A base embedding level of the input as defined in - * ubidi_setPara documentation for the - * paraLevel parameter. - * @param inOrder An order of the input, which can be one of the - * UBiDiOrder values. - * @param outParaLevel A base embedding level of the output as defined in - * ubidi_setPara documentation for the - * paraLevel parameter. - * @param outOrder An order of the output, which can be one of the - * UBiDiOrder values. - * @param doMirroring Indicates whether or not to perform character mirroring, - * and can accept one of the UBiDiMirroring values. - * @param shapingOptions Arabic digit and letter shaping options defined in the - * ushape.h documentation. - *

Note: Direction indicator options are computed by - * the transformation engine based on the effective ordering schemes, so - * user-defined direction indicators will be ignored.

- * @param pErrorCode A pointer to an error code value. - * - * @return The destination length, i.e. the number of UChars written to - * dest. If the transformation fails, the return value - * will be 0 (and the error code will be written to - * pErrorCode). - * - * @see UBiDiLevel - * @see UBiDiOrder - * @see UBiDiMirroring - * @see ubidi_setPara - * @see u_shapeArabic - * @stable ICU 58 - */ -U_STABLE uint32_t U_EXPORT2 -ubiditransform_transform(UBiDiTransform *pBiDiTransform, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destSize, - UBiDiLevel inParaLevel, UBiDiOrder inOrder, - UBiDiLevel outParaLevel, UBiDiOrder outOrder, - UBiDiMirroring doMirroring, uint32_t shapingOptions, - UErrorCode *pErrorCode); - -/** - * Allocates a UBiDiTransform object. This object can be reused, - * e.g. with different ordering schemes, mirroring or shaping options.

- * Note:The object can only be reused in the same thread. - * All other threads should allocate a new UBiDiTransform object - * before using it.

- * Example of usage:

- *

- * \code
- * UErrorCode errorCode = U_ZERO_ERROR;
- * // Open a new UBiDiTransform.
- * UBiDiTransform* transform = ubiditransform_open(&errorCode);
- * // Run a transformation.
- * ubiditransform_transform(transform,
- *          text1, -1, text2, -1,
- *          UBIDI_RTL, UBIDI_LOGICAL,
- *          UBIDI_LTR, UBIDI_VISUAL,
- *          UBIDI_MIRRORING_ON,
- *          U_SHAPE_DIGITS_EN2AN,
- *          &errorCode);
- * // Do something with the output text and invoke another transformation using
- * //   that text as input.
- * ubiditransform_transform(transform,
- *          text2, -1, text3, -1,
- *          UBIDI_LTR, UBIDI_VISUAL,
- *          UBIDI_RTL, UBIDI_VISUAL,
- *          UBIDI_MIRRORING_ON,
- *          0, &errorCode);
- *\endcode
- * 
- *

- * The UBiDiTransform object must be deallocated by calling - * ubiditransform_close(). - * - * @return An empty UBiDiTransform object. - * @stable ICU 58 - */ -U_STABLE UBiDiTransform* U_EXPORT2 -ubiditransform_open(UErrorCode *pErrorCode); - -/** - * Deallocates the given UBiDiTransform object. - * @stable ICU 58 - */ -U_STABLE void U_EXPORT2 -ubiditransform_close(UBiDiTransform *pBidiTransform); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUBiDiTransformPointer - * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 58 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); - -U_NAMESPACE_END - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ubrk.h b/deps/node/deps/icu-small/source/common/unicode/ubrk.h deleted file mode 100644 index 73c1553b..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ubrk.h +++ /dev/null @@ -1,628 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1996-2015, International Business Machines Corporation and others. -* All Rights Reserved. -****************************************************************************** -*/ - -#ifndef UBRK_H -#define UBRK_H - -#include "unicode/utypes.h" -#include "unicode/uloc.h" -#include "unicode/utext.h" -#include "unicode/localpointer.h" - -/** - * A text-break iterator. - * For usage in C programs. - */ -#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR -# define UBRK_TYPEDEF_UBREAK_ITERATOR - /** - * Opaque type representing an ICU Break iterator object. - * @stable ICU 2.0 - */ - typedef struct UBreakIterator UBreakIterator; -#endif - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/parseerr.h" - -/** - * \file - * \brief C API: BreakIterator - * - *

BreakIterator C API

- * - * The BreakIterator C API defines methods for finding the location - * of boundaries in text. Pointer to a UBreakIterator maintain a - * current position and scan over text returning the index of characters - * where boundaries occur. - *

- * Line boundary analysis determines where a text string can be broken - * when line-wrapping. The mechanism correctly handles punctuation and - * hyphenated words. - *

- * Note: The locale keyword "lb" can be used to modify line break - * behavior according to the CSS level 3 line-break options, see - * . For example: - * "ja@lb=strict", "zh@lb=loose". - *

- * Sentence boundary analysis allows selection with correct - * interpretation of periods within numbers and abbreviations, and - * trailing punctuation marks such as quotation marks and parentheses. - *

- * Note: The locale keyword "ss" can be used to enable use of - * segmentation suppression data (preventing breaks in English after - * abbreviations such as "Mr." or "Est.", for example), as follows: - * "en@ss=standard". - *

- * Word boundary analysis is used by search and replace functions, as - * well as within text editing applications that allow the user to - * select words with a double click. Word selection provides correct - * interpretation of punctuation marks within and following - * words. Characters that are not part of a word, such as symbols or - * punctuation marks, have word-breaks on both sides. - *

- * Character boundary analysis identifies the boundaries of - * "Extended Grapheme Clusters", which are groupings of codepoints - * that should be treated as character-like units for many text operations. - * Please see Unicode Standard Annex #29, Unicode Text Segmentation, - * http://www.unicode.org/reports/tr29/ for additional information - * on grapheme clusters and guidelines on their use. - *

- * Title boundary analysis locates all positions, - * typically starts of words, that should be set to Title Case - * when title casing the text. - *

- * The text boundary positions are found according to the rules - * described in Unicode Standard Annex #29, Text Boundaries, and - * Unicode Standard Annex #14, Line Breaking Properties. These - * are available at http://www.unicode.org/reports/tr14/ and - * http://www.unicode.org/reports/tr29/. - *

- * In addition to the plain C API defined in this header file, an - * object oriented C++ API with equivalent functionality is defined in the - * file brkiter.h. - *

- * Code snippets illustrating the use of the Break Iterator APIs - * are available in the ICU User Guide, - * http://icu-project.org/userguide/boundaryAnalysis.html - * and in the sample program icu/source/samples/break/break.cpp - */ - -/** The possible types of text boundaries. @stable ICU 2.0 */ -typedef enum UBreakIteratorType { - /** Character breaks @stable ICU 2.0 */ - UBRK_CHARACTER = 0, - /** Word breaks @stable ICU 2.0 */ - UBRK_WORD = 1, - /** Line breaks @stable ICU 2.0 */ - UBRK_LINE = 2, - /** Sentence breaks @stable ICU 2.0 */ - UBRK_SENTENCE = 3, - -#ifndef U_HIDE_DEPRECATED_API - /** - * Title Case breaks - * The iterator created using this type locates title boundaries as described for - * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, - * please use Word Boundary iterator. - * - * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. - */ - UBRK_TITLE = 4, - /** - * One more than the highest normal UBreakIteratorType value. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UBRK_COUNT = 5 -#endif // U_HIDE_DEPRECATED_API -} UBreakIteratorType; - -/** Value indicating all text boundaries have been returned. - * @stable ICU 2.0 - */ -#define UBRK_DONE ((int32_t) -1) - - -/** - * Enum constants for the word break tags returned by - * getRuleStatus(). A range of values is defined for each category of - * word, to allow for further subdivisions of a category in future releases. - * Applications should check for tag values falling within the range, rather - * than for single individual values. - * - * The numeric values of all of these constants are stable (will not change). - * - * @stable ICU 2.2 -*/ -typedef enum UWordBreak { - /** Tag value for "words" that do not fit into any of other categories. - * Includes spaces and most punctuation. */ - UBRK_WORD_NONE = 0, - /** Upper bound for tags for uncategorized words. */ - UBRK_WORD_NONE_LIMIT = 100, - /** Tag value for words that appear to be numbers, lower limit. */ - UBRK_WORD_NUMBER = 100, - /** Tag value for words that appear to be numbers, upper limit. */ - UBRK_WORD_NUMBER_LIMIT = 200, - /** Tag value for words that contain letters, excluding - * hiragana, katakana or ideographic characters, lower limit. */ - UBRK_WORD_LETTER = 200, - /** Tag value for words containing letters, upper limit */ - UBRK_WORD_LETTER_LIMIT = 300, - /** Tag value for words containing kana characters, lower limit */ - UBRK_WORD_KANA = 300, - /** Tag value for words containing kana characters, upper limit */ - UBRK_WORD_KANA_LIMIT = 400, - /** Tag value for words containing ideographic characters, lower limit */ - UBRK_WORD_IDEO = 400, - /** Tag value for words containing ideographic characters, upper limit */ - UBRK_WORD_IDEO_LIMIT = 500 -} UWordBreak; - -/** - * Enum constants for the line break tags returned by getRuleStatus(). - * A range of values is defined for each category of - * word, to allow for further subdivisions of a category in future releases. - * Applications should check for tag values falling within the range, rather - * than for single individual values. - * - * The numeric values of all of these constants are stable (will not change). - * - * @stable ICU 2.8 -*/ -typedef enum ULineBreakTag { - /** Tag value for soft line breaks, positions at which a line break - * is acceptable but not required */ - UBRK_LINE_SOFT = 0, - /** Upper bound for soft line breaks. */ - UBRK_LINE_SOFT_LIMIT = 100, - /** Tag value for a hard, or mandatory line break */ - UBRK_LINE_HARD = 100, - /** Upper bound for hard line breaks. */ - UBRK_LINE_HARD_LIMIT = 200 -} ULineBreakTag; - - - -/** - * Enum constants for the sentence break tags returned by getRuleStatus(). - * A range of values is defined for each category of - * sentence, to allow for further subdivisions of a category in future releases. - * Applications should check for tag values falling within the range, rather - * than for single individual values. - * - * The numeric values of all of these constants are stable (will not change). - * - * @stable ICU 2.8 -*/ -typedef enum USentenceBreakTag { - /** Tag value for for sentences ending with a sentence terminator - * ('.', '?', '!', etc.) character, possibly followed by a - * hard separator (CR, LF, PS, etc.) - */ - UBRK_SENTENCE_TERM = 0, - /** Upper bound for tags for sentences ended by sentence terminators. */ - UBRK_SENTENCE_TERM_LIMIT = 100, - /** Tag value for for sentences that do not contain an ending - * sentence terminator ('.', '?', '!', etc.) character, but - * are ended only by a hard separator (CR, LF, PS, etc.) or end of input. - */ - UBRK_SENTENCE_SEP = 100, - /** Upper bound for tags for sentences ended by a separator. */ - UBRK_SENTENCE_SEP_LIMIT = 200 - /** Tag value for a hard, or mandatory line break */ -} USentenceBreakTag; - - -/** - * Open a new UBreakIterator for locating text boundaries for a specified locale. - * A UBreakIterator may be used for detecting character, line, word, - * and sentence breaks in text. - * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, - * UBRK_LINE, UBRK_SENTENCE - * @param locale The locale specifying the text-breaking conventions. Note that - * locale keys such as "lb" and "ss" may be used to modify text break behavior, - * see general discussion of BreakIterator C API. - * @param text The text to be iterated over. May be null, in which case ubrk_setText() is - * used to specify the text to be iterated. - * @param textLength The number of characters in text, or -1 if null-terminated. - * @param status A UErrorCode to receive any errors. - * @return A UBreakIterator for the specified locale. - * @see ubrk_openRules - * @stable ICU 2.0 - */ -U_STABLE UBreakIterator* U_EXPORT2 -ubrk_open(UBreakIteratorType type, - const char *locale, - const UChar *text, - int32_t textLength, - UErrorCode *status); - -/** - * Open a new UBreakIterator for locating text boundaries using specified breaking rules. - * The rule syntax is ... (TBD) - * @param rules A set of rules specifying the text breaking conventions. - * @param rulesLength The number of characters in rules, or -1 if null-terminated. - * @param text The text to be iterated over. May be null, in which case ubrk_setText() is - * used to specify the text to be iterated. - * @param textLength The number of characters in text, or -1 if null-terminated. - * @param parseErr Receives position and context information for any syntax errors - * detected while parsing the rules. - * @param status A UErrorCode to receive any errors. - * @return A UBreakIterator for the specified rules. - * @see ubrk_open - * @stable ICU 2.2 - */ -U_STABLE UBreakIterator* U_EXPORT2 -ubrk_openRules(const UChar *rules, - int32_t rulesLength, - const UChar *text, - int32_t textLength, - UParseError *parseErr, - UErrorCode *status); - -/** - * Open a new UBreakIterator for locating text boundaries using precompiled binary rules. - * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules. - * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not - * compatible across different major versions of ICU, nor across platforms of different - * endianness or different base character set family (ASCII vs EBCDIC). - * @param binaryRules A set of compiled binary rules specifying the text breaking - * conventions. Ownership of the storage containing the compiled - * rules remains with the caller of this function. The compiled - * rules must not be modified or deleted during the life of the - * break iterator. - * @param rulesLength The length of binaryRules in bytes; must be >= 0. - * @param text The text to be iterated over. May be null, in which case - * ubrk_setText() is used to specify the text to be iterated. - * @param textLength The number of characters in text, or -1 if null-terminated. - * @param status Pointer to UErrorCode to receive any errors. - * @return UBreakIterator for the specified rules. - * @see ubrk_getBinaryRules - * @stable ICU 59 - */ -U_STABLE UBreakIterator* U_EXPORT2 -ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, - const UChar * text, int32_t textLength, - UErrorCode * status); - -/** - * Thread safe cloning operation - * @param bi iterator to be cloned - * @param stackBuffer Deprecated functionality as of ICU 52, use NULL.
- * user allocated space for the new clone. If NULL new memory will be allocated. - * If buffer is not large enough, new memory will be allocated. - * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. - * @param pBufferSize Deprecated functionality as of ICU 52, use NULL or 1.
- * pointer to size of allocated space. - * If *pBufferSize == 0, a sufficient size for use in cloning will - * be returned ('pre-flighting') - * If *pBufferSize is not enough for a stack-based safe clone, - * new memory will be allocated. - * @param status to indicate whether the operation went on smoothly or there were errors - * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. - * @return pointer to the new clone - * @stable ICU 2.0 - */ -U_STABLE UBreakIterator * U_EXPORT2 -ubrk_safeClone( - const UBreakIterator *bi, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status); - -#ifndef U_HIDE_DEPRECATED_API - -/** - * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone(). - * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer. - */ -#define U_BRK_SAFECLONE_BUFFERSIZE 1 - -#endif /* U_HIDE_DEPRECATED_API */ - -/** -* Close a UBreakIterator. -* Once closed, a UBreakIterator may no longer be used. -* @param bi The break iterator to close. - * @stable ICU 2.0 -*/ -U_STABLE void U_EXPORT2 -ubrk_close(UBreakIterator *bi); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUBreakIteratorPointer - * "Smart pointer" class, closes a UBreakIterator via ubrk_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close); - -U_NAMESPACE_END - -#endif - -/** - * Sets an existing iterator to point to a new piece of text. - * The break iterator retains a pointer to the supplied text. - * The caller must not modify or delete the text while the BreakIterator - * retains the reference. - * - * @param bi The iterator to use - * @param text The text to be set - * @param textLength The length of the text - * @param status The error code - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ubrk_setText(UBreakIterator* bi, - const UChar* text, - int32_t textLength, - UErrorCode* status); - - -/** - * Sets an existing iterator to point to a new piece of text. - * - * All index positions returned by break iterator functions are - * native indices from the UText. For example, when breaking UTF-8 - * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc. - * will be UTF-8 string indices, not UTF-16 positions. - * - * @param bi The iterator to use - * @param text The text to be set. - * This function makes a shallow clone of the supplied UText. This means - * that the caller is free to immediately close or otherwise reuse the - * UText that was passed as a parameter, but that the underlying text itself - * must not be altered while being referenced by the break iterator. - * @param status The error code - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -ubrk_setUText(UBreakIterator* bi, - UText* text, - UErrorCode* status); - - - -/** - * Determine the most recently-returned text boundary. - * - * @param bi The break iterator to use. - * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous, - * \ref ubrk_first, or \ref ubrk_last. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_current(const UBreakIterator *bi); - -/** - * Advance the iterator to the boundary following the current boundary. - * - * @param bi The break iterator to use. - * @return The character index of the next text boundary, or UBRK_DONE - * if all text boundaries have been returned. - * @see ubrk_previous - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_next(UBreakIterator *bi); - -/** - * Set the iterator position to the boundary preceding the current boundary. - * - * @param bi The break iterator to use. - * @return The character index of the preceding text boundary, or UBRK_DONE - * if all text boundaries have been returned. - * @see ubrk_next - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_previous(UBreakIterator *bi); - -/** - * Set the iterator position to zero, the start of the text being scanned. - * @param bi The break iterator to use. - * @return The new iterator position (zero). - * @see ubrk_last - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_first(UBreakIterator *bi); - -/** - * Set the iterator position to the index immediately beyond the last character in the text being scanned. - * This is not the same as the last character. - * @param bi The break iterator to use. - * @return The character offset immediately beyond the last character in the - * text being scanned. - * @see ubrk_first - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_last(UBreakIterator *bi); - -/** - * Set the iterator position to the first boundary preceding the specified offset. - * The new position is always smaller than offset, or UBRK_DONE. - * @param bi The break iterator to use. - * @param offset The offset to begin scanning. - * @return The text boundary preceding offset, or UBRK_DONE. - * @see ubrk_following - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_preceding(UBreakIterator *bi, - int32_t offset); - -/** - * Advance the iterator to the first boundary following the specified offset. - * The value returned is always greater than offset, or UBRK_DONE. - * @param bi The break iterator to use. - * @param offset The offset to begin scanning. - * @return The text boundary following offset, or UBRK_DONE. - * @see ubrk_preceding - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_following(UBreakIterator *bi, - int32_t offset); - -/** -* Get a locale for which text breaking information is available. -* A UBreakIterator in a locale returned by this function will perform the correct -* text breaking for the locale. -* @param index The index of the desired locale. -* @return A locale for which number text breaking information is available, or 0 if none. -* @see ubrk_countAvailable -* @stable ICU 2.0 -*/ -U_STABLE const char* U_EXPORT2 -ubrk_getAvailable(int32_t index); - -/** -* Determine how many locales have text breaking information available. -* This function is most useful as determining the loop ending condition for -* calls to \ref ubrk_getAvailable. -* @return The number of locales for which text breaking information is available. -* @see ubrk_getAvailable -* @stable ICU 2.0 -*/ -U_STABLE int32_t U_EXPORT2 -ubrk_countAvailable(void); - - -/** -* Returns true if the specified position is a boundary position. As a side -* effect, leaves the iterator pointing to the first boundary position at -* or after "offset". -* @param bi The break iterator to use. -* @param offset the offset to check. -* @return True if "offset" is a boundary position. -* @stable ICU 2.0 -*/ -U_STABLE UBool U_EXPORT2 -ubrk_isBoundary(UBreakIterator *bi, int32_t offset); - -/** - * Return the status from the break rule that determined the most recently - * returned break position. The values appear in the rule source - * within brackets, {123}, for example. For rules that do not specify a - * status, a default value of 0 is returned. - *

- * For word break iterators, the possible values are defined in enum UWordBreak. - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_getRuleStatus(UBreakIterator *bi); - -/** - * Get the statuses from the break rules that determined the most recently - * returned break position. The values appear in the rule source - * within brackets, {123}, for example. The default status value for rules - * that do not explicitly provide one is zero. - *

- * For word break iterators, the possible values are defined in enum UWordBreak. - * @param bi The break iterator to use - * @param fillInVec an array to be filled in with the status values. - * @param capacity the length of the supplied vector. A length of zero causes - * the function to return the number of status values, in the - * normal way, without attempting to store any values. - * @param status receives error codes. - * @return The number of rule status values from rules that determined - * the most recent boundary returned by the break iterator. - * @stable ICU 3.0 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status); - -/** - * Return the locale of the break iterator. You can choose between the valid and - * the actual locale. - * @param bi break iterator - * @param type locale type (valid or actual) - * @param status error code - * @return locale string - * @stable ICU 2.8 - */ -U_STABLE const char* U_EXPORT2 -ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status); - -/** - * Set the subject text string upon which the break iterator is operating - * without changing any other aspect of the state. - * The new and previous text strings must have the same content. - * - * This function is intended for use in environments where ICU is operating on - * strings that may move around in memory. It provides a mechanism for notifying - * ICU that the string has been relocated, and providing a new UText to access the - * string in its new position. - * - * Note that the break iterator never copies the underlying text - * of a string being processed, but always operates directly on the original text - * provided by the user. Refreshing simply drops the references to the old text - * and replaces them with references to the new. - * - * Caution: this function is normally used only by very specialized - * system-level code. One example use case is with garbage collection - * that moves the text in memory. - * - * @param bi The break iterator. - * @param text The new (moved) text string. - * @param status Receives errors detected by this function. - * - * @stable ICU 49 - */ -U_STABLE void U_EXPORT2 -ubrk_refreshUText(UBreakIterator *bi, - UText *text, - UErrorCode *status); - - -/** - * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator. - * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator - * more quickly than using ubrk_openRules. The compiled rules are not compatible across - * different major versions of ICU, nor across platforms of different endianness or - * different base character set family (ASCII vs EBCDIC). Supports preflighting (with - * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to - * the binaryRules buffer. However, whether preflighting or not, if the actual length - * is greater than INT32_MAX, then the function returns 0 and sets *status to - * U_INDEX_OUTOFBOUNDS_ERROR. - - * @param bi The break iterator to use. - * @param binaryRules Buffer to receive the compiled binary rules; set to NULL for - * preflighting. - * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for - * preflighting. Must be >= 0. - * @param status Pointer to UErrorCode to receive any errors, such as - * U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or - * U_ILLEGAL_ARGUMENT_ERROR. - * @return The actual byte length of the binary rules, if <= INT32_MAX; - * otherwise 0. If not preflighting and this is larger than - * rulesCapacity, *status will be set to an error. - * @see ubrk_openBinaryRules - * @stable ICU 59 - */ -U_STABLE int32_t U_EXPORT2 -ubrk_getBinaryRules(UBreakIterator *bi, - uint8_t * binaryRules, int32_t rulesCapacity, - UErrorCode * status); - -#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ucasemap.h b/deps/node/deps/icu-small/source/common/unicode/ucasemap.h deleted file mode 100644 index 6b253e3d..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucasemap.h +++ /dev/null @@ -1,385 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2005-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ucasemap.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2005may06 -* created by: Markus W. Scherer -* -* Case mapping service object and functions using it. -*/ - -#ifndef __UCASEMAP_H__ -#define __UCASEMAP_H__ - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" -#include "unicode/stringoptions.h" -#include "unicode/ustring.h" - -/** - * \file - * \brief C API: Unicode case mapping functions using a UCaseMap service object. - * - * The service object takes care of memory allocations, data loading, and setup - * for the attributes, as usual. - * - * Currently, the functionality provided here does not overlap with uchar.h - * and ustring.h, except for ucasemap_toTitle(). - * - * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings. - */ - -/** - * UCaseMap is an opaque service object for newer ICU case mapping functions. - * Older functions did not use a service object. - * @stable ICU 3.4 - */ -struct UCaseMap; -typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */ - -/** - * Open a UCaseMap service object for a locale and a set of options. - * The locale ID and options are preprocessed so that functions using the - * service object need not process them in each call. - * - * @param locale ICU locale ID, used for language-dependent - * upper-/lower-/title-casing according to the Unicode standard. - * Usual semantics: ""=root, NULL=default locale, etc. - * @param options Options bit set, used for case folding and string comparisons. - * Same flags as for u_foldCase(), u_strFoldCase(), - * u_strCaseCompare(), etc. - * Use 0 or U_FOLD_CASE_DEFAULT for default behavior. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return Pointer to a UCaseMap service object, if successful. - * - * @see U_FOLD_CASE_DEFAULT - * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @see U_TITLECASE_NO_LOWERCASE - * @see U_TITLECASE_NO_BREAK_ADJUSTMENT - * @stable ICU 3.4 - */ -U_STABLE UCaseMap * U_EXPORT2 -ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode); - -/** - * Close a UCaseMap service object. - * @param csm Object to be closed. - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -ucasemap_close(UCaseMap *csm); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUCaseMapPointer - * "Smart pointer" class, closes a UCaseMap via ucasemap_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); - -U_NAMESPACE_END - -#endif - -/** - * Get the locale ID that is used for language-dependent case mappings. - * @param csm UCaseMap service object. - * @return locale ID - * @stable ICU 3.4 - */ -U_STABLE const char * U_EXPORT2 -ucasemap_getLocale(const UCaseMap *csm); - -/** - * Get the options bit set that is used for case folding and string comparisons. - * @param csm UCaseMap service object. - * @return options bit set - * @stable ICU 3.4 - */ -U_STABLE uint32_t U_EXPORT2 -ucasemap_getOptions(const UCaseMap *csm); - -/** - * Set the locale ID that is used for language-dependent case mappings. - * - * @param csm UCaseMap service object. - * @param locale Locale ID, see ucasemap_open(). - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @see ucasemap_open - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode); - -/** - * Set the options bit set that is used for case folding and string comparisons. - * - * @param csm UCaseMap service object. - * @param options Options bit set, see ucasemap_open(). - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @see ucasemap_open - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** - * Get the break iterator that is used for titlecasing. - * Do not modify the returned break iterator. - * @param csm UCaseMap service object. - * @return titlecasing break iterator - * @stable ICU 3.8 - */ -U_STABLE const UBreakIterator * U_EXPORT2 -ucasemap_getBreakIterator(const UCaseMap *csm); - -/** - * Set the break iterator that is used for titlecasing. - * The UCaseMap service object releases a previously set break iterator - * and "adopts" this new one, taking ownership of it. - * It will be released in a subsequent call to ucasemap_setBreakIterator() - * or ucasemap_close(). - * - * Break iterator operations are not thread-safe. Therefore, titlecasing - * functions use non-const UCaseMap objects. It is not possible to titlecase - * strings concurrently using the same UCaseMap. - * - * @param csm UCaseMap service object. - * @param iterToAdopt Break iterator to be adopted for titlecasing. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @see ucasemap_toTitle - * @see ucasemap_utf8ToTitle - * @stable ICU 3.8 - */ -U_STABLE void U_EXPORT2 -ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode); - -/** - * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(), - * except that it takes ucasemap_setOptions() into account and has performance - * advantages from being able to use a UCaseMap object for multiple case mapping - * operations, saving setup time. - * - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with ucasemap_setOptions().) - * - * Note: This function takes a non-const UCaseMap pointer because it will - * open a default break iterator if no break iterator was set yet, - * and effectively call ucasemap_setBreakIterator(); - * also because the break iterator is stateful and will be modified during - * the iteration. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. This pointer is non-const! - * See the note above for details. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strToTitle - * @stable ICU 3.8 - */ -U_STABLE int32_t U_EXPORT2 -ucasemap_toTitle(UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#endif // UCONFIG_NO_BREAK_ITERATION - -/** - * Lowercase the characters in a UTF-8 string. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strToLower - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ucasemap_utf8ToLower(const UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Uppercase the characters in a UTF-8 string. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strToUpper - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ucasemap_utf8ToUpper(const UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** - * Titlecase a UTF-8 string. - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with ucasemap_setOptions().) - * - * Note: This function takes a non-const UCaseMap pointer because it will - * open a default break iterator if no break iterator was set yet, - * and effectively call ucasemap_setBreakIterator(); - * also because the break iterator is stateful and will be modified during - * the iteration. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setUText(), first(), next() and close() methods of the - * provided break iterator. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. This pointer is non-const! - * See the note above for details. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strToTitle - * @see U_TITLECASE_NO_LOWERCASE - * @see U_TITLECASE_NO_BREAK_ADJUSTMENT - * @stable ICU 3.8 - */ -U_STABLE int32_t U_EXPORT2 -ucasemap_utf8ToTitle(UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#endif - -/** - * Case-folds the characters in a UTF-8 string. - * - * Case-folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strFoldCase - * @see ucasemap_setOptions - * @see U_FOLD_CASE_DEFAULT - * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @stable ICU 3.8 - */ -U_STABLE int32_t U_EXPORT2 -ucasemap_utf8FoldCase(const UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ucat.h b/deps/node/deps/icu-small/source/common/unicode/ucat.h deleted file mode 100644 index f9c18b47..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucat.h +++ /dev/null @@ -1,160 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2003-2004, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: March 19 2003 -* Since: ICU 2.6 -********************************************************************** -*/ -#ifndef UCAT_H -#define UCAT_H - -#include "unicode/utypes.h" -#include "unicode/ures.h" - -/** - * \file - * \brief C API: Message Catalog Wrappers - * - * This C API provides look-alike functions that deliberately resemble - * the POSIX catopen, catclose, and catgets functions. The underlying - * implementation is in terms of ICU resource bundles, rather than - * POSIX message catalogs. - * - * The ICU resource bundles obey standard ICU inheritance policies. - * To facilitate this, sets and messages are flattened into one tier. - * This is done by creating resource bundle keys of the form - * <set_num>%<msg_num> where set_num is the set number and msg_num is - * the message number, formatted as decimal strings. - * - * Example: Consider a message catalog containing two sets: - * - * Set 1: Message 4 = "Good morning." - * Message 5 = "Good afternoon." - * Message 7 = "Good evening." - * Message 8 = "Good night." - * Set 4: Message 14 = "Please " - * Message 19 = "Thank you." - * Message 20 = "Sincerely," - * - * The ICU resource bundle source file would, assuming it is named - * "greet.txt", would look like this: - * - * greet - * { - * 1%4 { "Good morning." } - * 1%5 { "Good afternoon." } - * 1%7 { "Good evening." } - * 1%8 { "Good night." } - * - * 4%14 { "Please " } - * 4%19 { "Thank you." } - * 4%20 { "Sincerely," } - * } - * - * The catgets function is commonly used in combination with functions - * like printf and strftime. ICU components like message format can - * be used instead, although they use a different format syntax. - * There is an ICU package, icuio, that provides some of - * the POSIX-style formatting API. - */ - -U_CDECL_BEGIN - -/** - * An ICU message catalog descriptor, analogous to nl_catd. - * - * @stable ICU 2.6 - */ -typedef UResourceBundle* u_nl_catd; - -/** - * Open and return an ICU message catalog descriptor. The descriptor - * may be passed to u_catgets() to retrieve localized strings. - * - * @param name string containing the full path pointing to the - * directory where the resources reside followed by the package name - * e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system. - * If NULL, ICU default data files will be used. - * - * Unlike POSIX, environment variables are not interpolated within the - * name. - * - * @param locale the locale for which we want to open the resource. If - * NULL, the default ICU locale will be used (see uloc_getDefault). If - * strlen(locale) == 0, the root locale will be used. - * - * @param ec input/output error code. Upon output, - * U_USING_FALLBACK_WARNING indicates that a fallback locale was - * used. For example, 'de_CH' was requested, but nothing was found - * there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the - * default locale data or root locale data was used; neither the - * requested locale nor any of its fallback locales were found. - * - * @return a message catalog descriptor that may be passed to - * u_catgets(). If the ec parameter indicates success, then the caller - * is responsible for calling u_catclose() to close the message - * catalog. If the ec parameter indicates failure, then NULL will be - * returned. - * - * @stable ICU 2.6 - */ -U_STABLE u_nl_catd U_EXPORT2 -u_catopen(const char* name, const char* locale, UErrorCode* ec); - -/** - * Close an ICU message catalog, given its descriptor. - * - * @param catd a message catalog descriptor to be closed. May be NULL, - * in which case no action is taken. - * - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -u_catclose(u_nl_catd catd); - -/** - * Retrieve a localized string from an ICU message catalog. - * - * @param catd a message catalog descriptor returned by u_catopen. - * - * @param set_num the message catalog set number. Sets need not be - * numbered consecutively. - * - * @param msg_num the message catalog message number within the - * set. Messages need not be numbered consecutively. - * - * @param s the default string. This is returned if the string - * specified by the set_num and msg_num is not found. It must be - * zero-terminated. - * - * @param len fill-in parameter to receive the length of the result. - * May be NULL, in which case it is ignored. - * - * @param ec input/output error code. May be U_USING_FALLBACK_WARNING - * or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that - * the set_num/msg_num tuple does not specify a valid message string - * in this catalog. - * - * @return a pointer to a zero-terminated UChar array which lives in - * an internal buffer area, typically a memory mapped/DLL file. The - * caller must NOT delete this pointer. If the call is unsuccessful - * for any reason, then s is returned. This includes the situation in - * which ec indicates a failing error code upon entry to this - * function. - * - * @stable ICU 2.6 - */ -U_STABLE const UChar* U_EXPORT2 -u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num, - const UChar* s, - int32_t* len, UErrorCode* ec); - -U_CDECL_END - -#endif /*UCAT_H*/ -/*eof*/ diff --git a/deps/node/deps/icu-small/source/common/unicode/uchar.h b/deps/node/deps/icu-small/source/common/unicode/uchar.h deleted file mode 100644 index 9e180db5..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uchar.h +++ /dev/null @@ -1,4023 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File UCHAR.H -* -* Modification History: -* -* Date Name Description -* 04/02/97 aliu Creation. -* 03/29/99 helena Updated for C APIs. -* 4/15/99 Madhu Updated for C Implementation and Javadoc -* 5/20/99 Madhu Added the function u_getVersion() -* 8/19/1999 srl Upgraded scripts to Unicode 3.0 -* 8/27/1999 schererm UCharDirection constants: U_... -* 11/11/1999 weiv added u_isalnum(), cleaned comments -* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). -****************************************************************************** -*/ - -#ifndef UCHAR_H -#define UCHAR_H - -#include "unicode/utypes.h" -#include "unicode/stringoptions.h" -#include "unicode/ucpmap.h" - -#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) - -#define USET_DEFINED - -/** - * USet is the C API type corresponding to C++ class UnicodeSet. - * It is forward-declared here to avoid including unicode/uset.h file if related - * APIs are not used. - * - * @see ucnv_getUnicodeSet - * @stable ICU 2.4 - */ -typedef struct USet USet; - -#endif - - -U_CDECL_BEGIN - -/*==========================================================================*/ -/* Unicode version number */ -/*==========================================================================*/ -/** - * Unicode version number, default for the current ICU version. - * The actual Unicode Character Database (UCD) data is stored in uprops.dat - * and may be generated from UCD files from a different Unicode version. - * Call u_getUnicodeVersion to get the actual Unicode version of the data. - * - * @see u_getUnicodeVersion - * @stable ICU 2.0 - */ -#define U_UNICODE_VERSION "11.0" - -/** - * \file - * \brief C API: Unicode Properties - * - * This C API provides low-level access to the Unicode Character Database. - * In addition to raw property values, some convenience functions calculate - * derived properties, for example for Java-style programming. - * - * Unicode assigns each code point (not just assigned character) values for - * many properties. - * Most of them are simple boolean flags, or constants from a small enumerated list. - * For some properties, values are strings or other relatively more complex types. - * - * For more information see - * "About the Unicode Character Database" (http://www.unicode.org/ucd/) - * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). - * - * Many properties are accessible via generic functions that take a UProperty selector. - * - u_hasBinaryProperty() returns a binary value (TRUE/FALSE) per property and code point. - * - u_getIntPropertyValue() returns an integer value per property and code point. - * For each supported enumerated or catalog property, there is - * an enum type for all of the property's values, and - * u_getIntPropertyValue() returns the numeric values of those constants. - * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with - * all code points for which the property is true. - * - u_getIntPropertyMap() returns a map for each - * ICU-supported enumerated/catalog/int-valued property which - * maps all Unicode code points to their values for that property. - * - * Many functions are designed to match java.lang.Character functions. - * See the individual function documentation, - * and see the JDK 1.4 java.lang.Character documentation - * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html - * - * There are also functions that provide easy migration from C/POSIX functions - * like isblank(). Their use is generally discouraged because the C/POSIX - * standards do not define their semantics beyond the ASCII range, which means - * that different implementations exhibit very different behavior. - * Instead, Unicode properties should be used directly. - * - * There are also only a few, broad C/POSIX character classes, and they tend - * to be used for conflicting purposes. For example, the "isalpha()" class - * is sometimes used to determine word boundaries, while a more sophisticated - * approach would at least distinguish initial letters from continuation - * characters (the latter including combining marks). - * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) - * Another example: There is no "istitle()" class for titlecase characters. - * - * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. - * ICU implements them according to the Standard Recommendations in - * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions - * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). - * - * API access for C/POSIX character classes is as follows: - * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) - * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) - * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) - * - punct: u_ispunct(c) - * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER - * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) - * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) - * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) - * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) - * - cntrl: u_charType(c)==U_CONTROL_CHAR - * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) - * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) - * - * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, - * the Standard Recommendations in UTS #18. Instead, they match Java - * functions according to their API documentation. - * - * \htmlonly - * The C/POSIX character classes are also available in UnicodeSet patterns, - * using patterns like [:graph:] or \p{graph}. - * \endhtmlonly - * - * Note: There are several ICU whitespace functions. - * Comparison: - * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; - * most of general categories "Z" (separators) + most whitespace ISO controls - * (including no-break spaces, but excluding IS1..IS4) - * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces - * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) - * - u_isspace: Z + whitespace ISO controls (including no-break spaces) - * - u_isblank: "horizontal spaces" = TAB + Zs - */ - -/** - * Constants. - */ - -/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ -#define UCHAR_MIN_VALUE 0 - -/** - * The highest Unicode code point value (scalar value) according to - * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). - * For a single character, UChar32 is a simple type that can hold any code point value. - * - * @see UChar32 - * @stable ICU 2.0 - */ -#define UCHAR_MAX_VALUE 0x10ffff - -/** - * Get a single-bit bit set (a flag) from a bit number 0..31. - * @stable ICU 2.1 - */ -#define U_MASK(x) ((uint32_t)1<<(x)) - -/** - * Selection constants for Unicode properties. - * These constants are used in functions like u_hasBinaryProperty to select - * one of the Unicode properties. - * - * The properties APIs are intended to reflect Unicode properties as defined - * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). - * - * For details about the properties see - * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/). - * - * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, - * then properties marked with "new in Unicode 3.2" are not or not fully available. - * Check u_getUnicodeVersion to be sure. - * - * @see u_hasBinaryProperty - * @see u_getIntPropertyValue - * @see u_getUnicodeVersion - * @stable ICU 2.1 - */ -typedef enum UProperty { - /* - * Note: UProperty constants are parsed by preparseucd.py. - * It matches lines like - * UCHAR_=, - */ - - /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that - debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, - rather than UCHAR_BINARY_START. Likewise for other *_START - identifiers. */ - - /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. - Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ - UCHAR_ALPHABETIC=0, - /** First constant for binary Unicode properties. @stable ICU 2.1 */ - UCHAR_BINARY_START=UCHAR_ALPHABETIC, - /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ - UCHAR_ASCII_HEX_DIGIT=1, - /** Binary property Bidi_Control. - Format controls which have specific functions - in the Bidi Algorithm. @stable ICU 2.1 */ - UCHAR_BIDI_CONTROL=2, - /** Binary property Bidi_Mirrored. - Characters that may change display in RTL text. - Same as u_isMirrored. - See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ - UCHAR_BIDI_MIRRORED=3, - /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ - UCHAR_DASH=4, - /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). - Ignorable in most processing. - <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ - UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, - /** Binary property Deprecated (new in Unicode 3.2). - The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ - UCHAR_DEPRECATED=6, - /** Binary property Diacritic. Characters that linguistically modify - the meaning of another character to which they apply. @stable ICU 2.1 */ - UCHAR_DIACRITIC=7, - /** Binary property Extender. - Extend the value or shape of a preceding alphabetic character, - e.g., length and iteration marks. @stable ICU 2.1 */ - UCHAR_EXTENDER=8, - /** Binary property Full_Composition_Exclusion. - CompositionExclusions.txt+Singleton Decompositions+ - Non-Starter Decompositions. @stable ICU 2.1 */ - UCHAR_FULL_COMPOSITION_EXCLUSION=9, - /** Binary property Grapheme_Base (new in Unicode 3.2). - For programmatic determination of grapheme cluster boundaries. - [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ - UCHAR_GRAPHEME_BASE=10, - /** Binary property Grapheme_Extend (new in Unicode 3.2). - For programmatic determination of grapheme cluster boundaries. - Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ - UCHAR_GRAPHEME_EXTEND=11, - /** Binary property Grapheme_Link (new in Unicode 3.2). - For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ - UCHAR_GRAPHEME_LINK=12, - /** Binary property Hex_Digit. - Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ - UCHAR_HEX_DIGIT=13, - /** Binary property Hyphen. Dashes used to mark connections - between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ - UCHAR_HYPHEN=14, - /** Binary property ID_Continue. - Characters that can continue an identifier. - DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." - ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ - UCHAR_ID_CONTINUE=15, - /** Binary property ID_Start. - Characters that can start an identifier. - Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ - UCHAR_ID_START=16, - /** Binary property Ideographic. - CJKV ideographs. @stable ICU 2.1 */ - UCHAR_IDEOGRAPHIC=17, - /** Binary property IDS_Binary_Operator (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_IDS_BINARY_OPERATOR=18, - /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_IDS_TRINARY_OPERATOR=19, - /** Binary property Join_Control. - Format controls for cursive joining and ligation. @stable ICU 2.1 */ - UCHAR_JOIN_CONTROL=20, - /** Binary property Logical_Order_Exception (new in Unicode 3.2). - Characters that do not use logical order and - require special handling in most processing. @stable ICU 2.1 */ - UCHAR_LOGICAL_ORDER_EXCEPTION=21, - /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. - Ll+Other_Lowercase @stable ICU 2.1 */ - UCHAR_LOWERCASE=22, - /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ - UCHAR_MATH=23, - /** Binary property Noncharacter_Code_Point. - Code points that are explicitly defined as illegal - for the encoding of characters. @stable ICU 2.1 */ - UCHAR_NONCHARACTER_CODE_POINT=24, - /** Binary property Quotation_Mark. @stable ICU 2.1 */ - UCHAR_QUOTATION_MARK=25, - /** Binary property Radical (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_RADICAL=26, - /** Binary property Soft_Dotted (new in Unicode 3.2). - Characters with a "soft dot", like i or j. - An accent placed on these characters causes - the dot to disappear. @stable ICU 2.1 */ - UCHAR_SOFT_DOTTED=27, - /** Binary property Terminal_Punctuation. - Punctuation characters that generally mark - the end of textual units. @stable ICU 2.1 */ - UCHAR_TERMINAL_PUNCTUATION=28, - /** Binary property Unified_Ideograph (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_UNIFIED_IDEOGRAPH=29, - /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. - Lu+Other_Uppercase @stable ICU 2.1 */ - UCHAR_UPPERCASE=30, - /** Binary property White_Space. - Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. - Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ - UCHAR_WHITE_SPACE=31, - /** Binary property XID_Continue. - ID_Continue modified to allow closure under - normalization forms NFKC and NFKD. @stable ICU 2.1 */ - UCHAR_XID_CONTINUE=32, - /** Binary property XID_Start. ID_Start modified to allow - closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ - UCHAR_XID_START=33, - /** Binary property Case_Sensitive. Either the source of a case - mapping or _in_ the target of a case mapping. Not the same as - the general category Cased_Letter. @stable ICU 2.6 */ - UCHAR_CASE_SENSITIVE=34, - /** Binary property STerm (new in Unicode 4.0.1). - Sentence Terminal. Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - @stable ICU 3.0 */ - UCHAR_S_TERM=35, - /** Binary property Variation_Selector (new in Unicode 4.0.1). - Indicates all those characters that qualify as Variation Selectors. - For details on the behavior of these characters, - see StandardizedVariants.html and 15.6 Variation Selectors. - @stable ICU 3.0 */ - UCHAR_VARIATION_SELECTOR=36, - /** Binary property NFD_Inert. - ICU-specific property for characters that are inert under NFD, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFD_INERT=37, - /** Binary property NFKD_Inert. - ICU-specific property for characters that are inert under NFKD, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFKD_INERT=38, - /** Binary property NFC_Inert. - ICU-specific property for characters that are inert under NFC, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFC_INERT=39, - /** Binary property NFKC_Inert. - ICU-specific property for characters that are inert under NFKC, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFKC_INERT=40, - /** Binary Property Segment_Starter. - ICU-specific property for characters that are starters in terms of - Unicode normalization and combining character sequences. - They have ccc=0 and do not occur in non-initial position of the - canonical decomposition of any character - (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). - ICU uses this property for segmenting a string for generating a set of - canonically equivalent strings, e.g. for canonical closure while - processing collation tailoring rules. - @stable ICU 3.0 */ - UCHAR_SEGMENT_STARTER=41, - /** Binary property Pattern_Syntax (new in Unicode 4.1). - See UAX #31 Identifier and Pattern Syntax - (http://www.unicode.org/reports/tr31/) - @stable ICU 3.4 */ - UCHAR_PATTERN_SYNTAX=42, - /** Binary property Pattern_White_Space (new in Unicode 4.1). - See UAX #31 Identifier and Pattern Syntax - (http://www.unicode.org/reports/tr31/) - @stable ICU 3.4 */ - UCHAR_PATTERN_WHITE_SPACE=43, - /** Binary property alnum (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_ALNUM=44, - /** Binary property blank (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_BLANK=45, - /** Binary property graph (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_GRAPH=46, - /** Binary property print (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_PRINT=47, - /** Binary property xdigit (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_XDIGIT=48, - /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */ - UCHAR_CASED=49, - /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */ - UCHAR_CASE_IGNORABLE=50, - /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_LOWERCASED=51, - /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_UPPERCASED=52, - /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_TITLECASED=53, - /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_CASEFOLDED=54, - /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_CASEMAPPED=55, - /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, - /** - * Binary property Emoji. - * See http://www.unicode.org/reports/tr51/#Emoji_Properties - * - * @stable ICU 57 - */ - UCHAR_EMOJI=57, - /** - * Binary property Emoji_Presentation. - * See http://www.unicode.org/reports/tr51/#Emoji_Properties - * - * @stable ICU 57 - */ - UCHAR_EMOJI_PRESENTATION=58, - /** - * Binary property Emoji_Modifier. - * See http://www.unicode.org/reports/tr51/#Emoji_Properties - * - * @stable ICU 57 - */ - UCHAR_EMOJI_MODIFIER=59, - /** - * Binary property Emoji_Modifier_Base. - * See http://www.unicode.org/reports/tr51/#Emoji_Properties - * - * @stable ICU 57 - */ - UCHAR_EMOJI_MODIFIER_BASE=60, - /** - * Binary property Emoji_Component. - * See http://www.unicode.org/reports/tr51/#Emoji_Properties - * - * @stable ICU 60 - */ - UCHAR_EMOJI_COMPONENT=61, - /** - * Binary property Regional_Indicator. - * @stable ICU 60 - */ - UCHAR_REGIONAL_INDICATOR=62, - /** - * Binary property Prepended_Concatenation_Mark. - * @stable ICU 60 - */ - UCHAR_PREPENDED_CONCATENATION_MARK=63, - /** - * Binary property Extended_Pictographic. - * See http://www.unicode.org/reports/tr51/#Emoji_Properties - * - * @stable ICU 62 - */ - UCHAR_EXTENDED_PICTOGRAPHIC=64, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the last constant for binary Unicode properties. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCHAR_BINARY_LIMIT, -#endif // U_HIDE_DEPRECATED_API - - /** Enumerated property Bidi_Class. - Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ - UCHAR_BIDI_CLASS=0x1000, - /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ - UCHAR_INT_START=UCHAR_BIDI_CLASS, - /** Enumerated property Block. - Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ - UCHAR_BLOCK=0x1001, - /** Enumerated property Canonical_Combining_Class. - Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ - UCHAR_CANONICAL_COMBINING_CLASS=0x1002, - /** Enumerated property Decomposition_Type. - Returns UDecompositionType values. @stable ICU 2.2 */ - UCHAR_DECOMPOSITION_TYPE=0x1003, - /** Enumerated property East_Asian_Width. - See http://www.unicode.org/reports/tr11/ - Returns UEastAsianWidth values. @stable ICU 2.2 */ - UCHAR_EAST_ASIAN_WIDTH=0x1004, - /** Enumerated property General_Category. - Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ - UCHAR_GENERAL_CATEGORY=0x1005, - /** Enumerated property Joining_Group. - Returns UJoiningGroup values. @stable ICU 2.2 */ - UCHAR_JOINING_GROUP=0x1006, - /** Enumerated property Joining_Type. - Returns UJoiningType values. @stable ICU 2.2 */ - UCHAR_JOINING_TYPE=0x1007, - /** Enumerated property Line_Break. - Returns ULineBreak values. @stable ICU 2.2 */ - UCHAR_LINE_BREAK=0x1008, - /** Enumerated property Numeric_Type. - Returns UNumericType values. @stable ICU 2.2 */ - UCHAR_NUMERIC_TYPE=0x1009, - /** Enumerated property Script. - Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ - UCHAR_SCRIPT=0x100A, - /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. - Returns UHangulSyllableType values. @stable ICU 2.6 */ - UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, - /** Enumerated property NFD_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFD_QUICK_CHECK=0x100C, - /** Enumerated property NFKD_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFKD_QUICK_CHECK=0x100D, - /** Enumerated property NFC_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFC_QUICK_CHECK=0x100E, - /** Enumerated property NFKC_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFKC_QUICK_CHECK=0x100F, - /** Enumerated property Lead_Canonical_Combining_Class. - ICU-specific property for the ccc of the first code point - of the decomposition, or lccc(c)=ccc(NFD(c)[0]). - Useful for checking for canonically ordered text; - see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . - Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ - UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, - /** Enumerated property Trail_Canonical_Combining_Class. - ICU-specific property for the ccc of the last code point - of the decomposition, or tccc(c)=ccc(NFD(c)[last]). - Useful for checking for canonically ordered text; - see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . - Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ - UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, - /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). - Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - Returns UGraphemeClusterBreak values. @stable ICU 3.4 */ - UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, - /** Enumerated property Sentence_Break (new in Unicode 4.1). - Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - Returns USentenceBreak values. @stable ICU 3.4 */ - UCHAR_SENTENCE_BREAK=0x1013, - /** Enumerated property Word_Break (new in Unicode 4.1). - Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - Returns UWordBreakValues values. @stable ICU 3.4 */ - UCHAR_WORD_BREAK=0x1014, - /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). - Used in UAX #9: Unicode Bidirectional Algorithm - (http://www.unicode.org/reports/tr9/) - Returns UBidiPairedBracketType values. @stable ICU 52 */ - UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, - /** - * Enumerated property Indic_Positional_Category. - * New in Unicode 6.0 as provisional property Indic_Matra_Category; - * renamed and changed to informative in Unicode 8.0. - * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt - * @stable ICU 63 - */ - UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016, - /** - * Enumerated property Indic_Syllabic_Category. - * New in Unicode 6.0 as provisional; informative since Unicode 8.0. - * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt - * @stable ICU 63 - */ - UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017, - /** - * Enumerated property Vertical_Orientation. - * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). - * New as a UCD property in Unicode 10.0. - * @stable ICU 63 - */ - UCHAR_VERTICAL_ORIENTATION=0x1018, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the last constant for enumerated/integer Unicode properties. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCHAR_INT_LIMIT=0x1019, -#endif // U_HIDE_DEPRECATED_API - - /** Bitmask property General_Category_Mask. - This is the General_Category property returned as a bit mask. - When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), - returns bit masks for UCharCategory values where exactly one bit is set. - When used with u_getPropertyValueName() and u_getPropertyValueEnum(), - a multi-bit mask is used for sets of categories like "Letters". - Mask values should be cast to uint32_t. - @stable ICU 2.4 */ - UCHAR_GENERAL_CATEGORY_MASK=0x2000, - /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ - UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the last constant for bit-mask Unicode properties. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCHAR_MASK_LIMIT=0x2001, -#endif // U_HIDE_DEPRECATED_API - - /** Double property Numeric_Value. - Corresponds to u_getNumericValue. @stable ICU 2.4 */ - UCHAR_NUMERIC_VALUE=0x3000, - /** First constant for double Unicode properties. @stable ICU 2.4 */ - UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the last constant for double Unicode properties. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCHAR_DOUBLE_LIMIT=0x3001, -#endif // U_HIDE_DEPRECATED_API - - /** String property Age. - Corresponds to u_charAge. @stable ICU 2.4 */ - UCHAR_AGE=0x4000, - /** First constant for string Unicode properties. @stable ICU 2.4 */ - UCHAR_STRING_START=UCHAR_AGE, - /** String property Bidi_Mirroring_Glyph. - Corresponds to u_charMirror. @stable ICU 2.4 */ - UCHAR_BIDI_MIRRORING_GLYPH=0x4001, - /** String property Case_Folding. - Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ - UCHAR_CASE_FOLDING=0x4002, -#ifndef U_HIDE_DEPRECATED_API - /** Deprecated string property ISO_Comment. - Corresponds to u_getISOComment. @deprecated ICU 49 */ - UCHAR_ISO_COMMENT=0x4003, -#endif /* U_HIDE_DEPRECATED_API */ - /** String property Lowercase_Mapping. - Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ - UCHAR_LOWERCASE_MAPPING=0x4004, - /** String property Name. - Corresponds to u_charName. @stable ICU 2.4 */ - UCHAR_NAME=0x4005, - /** String property Simple_Case_Folding. - Corresponds to u_foldCase. @stable ICU 2.4 */ - UCHAR_SIMPLE_CASE_FOLDING=0x4006, - /** String property Simple_Lowercase_Mapping. - Corresponds to u_tolower. @stable ICU 2.4 */ - UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, - /** String property Simple_Titlecase_Mapping. - Corresponds to u_totitle. @stable ICU 2.4 */ - UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, - /** String property Simple_Uppercase_Mapping. - Corresponds to u_toupper. @stable ICU 2.4 */ - UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, - /** String property Titlecase_Mapping. - Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ - UCHAR_TITLECASE_MAPPING=0x400A, -#ifndef U_HIDE_DEPRECATED_API - /** String property Unicode_1_Name. - This property is of little practical value. - Beginning with ICU 49, ICU APIs return an empty string for this property. - Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ - UCHAR_UNICODE_1_NAME=0x400B, -#endif /* U_HIDE_DEPRECATED_API */ - /** String property Uppercase_Mapping. - Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ - UCHAR_UPPERCASE_MAPPING=0x400C, - /** String property Bidi_Paired_Bracket (new in Unicode 6.3). - Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ - UCHAR_BIDI_PAIRED_BRACKET=0x400D, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the last constant for string Unicode properties. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCHAR_STRING_LIMIT=0x400E, -#endif // U_HIDE_DEPRECATED_API - - /** Miscellaneous property Script_Extensions (new in Unicode 6.0). - Some characters are commonly used in multiple scripts. - For more information, see UAX #24: http://www.unicode.org/reports/tr24/. - Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. - @stable ICU 4.6 */ - UCHAR_SCRIPT_EXTENSIONS=0x7000, - /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ - UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the last constant for Unicode properties with unusual value types. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCHAR_OTHER_PROPERTY_LIMIT=0x7001, -#endif // U_HIDE_DEPRECATED_API - - /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ - UCHAR_INVALID_CODE = -1 -} UProperty; - -/** - * Data for enumerated Unicode general category types. - * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . - * @stable ICU 2.0 - */ -typedef enum UCharCategory -{ - /* - * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. - * It matches pairs of lines like - * / ** comment... * / - * U_<[A-Z_]+> = , - */ - - /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ - U_UNASSIGNED = 0, - /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ - U_GENERAL_OTHER_TYPES = 0, - /** Lu @stable ICU 2.0 */ - U_UPPERCASE_LETTER = 1, - /** Ll @stable ICU 2.0 */ - U_LOWERCASE_LETTER = 2, - /** Lt @stable ICU 2.0 */ - U_TITLECASE_LETTER = 3, - /** Lm @stable ICU 2.0 */ - U_MODIFIER_LETTER = 4, - /** Lo @stable ICU 2.0 */ - U_OTHER_LETTER = 5, - /** Mn @stable ICU 2.0 */ - U_NON_SPACING_MARK = 6, - /** Me @stable ICU 2.0 */ - U_ENCLOSING_MARK = 7, - /** Mc @stable ICU 2.0 */ - U_COMBINING_SPACING_MARK = 8, - /** Nd @stable ICU 2.0 */ - U_DECIMAL_DIGIT_NUMBER = 9, - /** Nl @stable ICU 2.0 */ - U_LETTER_NUMBER = 10, - /** No @stable ICU 2.0 */ - U_OTHER_NUMBER = 11, - /** Zs @stable ICU 2.0 */ - U_SPACE_SEPARATOR = 12, - /** Zl @stable ICU 2.0 */ - U_LINE_SEPARATOR = 13, - /** Zp @stable ICU 2.0 */ - U_PARAGRAPH_SEPARATOR = 14, - /** Cc @stable ICU 2.0 */ - U_CONTROL_CHAR = 15, - /** Cf @stable ICU 2.0 */ - U_FORMAT_CHAR = 16, - /** Co @stable ICU 2.0 */ - U_PRIVATE_USE_CHAR = 17, - /** Cs @stable ICU 2.0 */ - U_SURROGATE = 18, - /** Pd @stable ICU 2.0 */ - U_DASH_PUNCTUATION = 19, - /** Ps @stable ICU 2.0 */ - U_START_PUNCTUATION = 20, - /** Pe @stable ICU 2.0 */ - U_END_PUNCTUATION = 21, - /** Pc @stable ICU 2.0 */ - U_CONNECTOR_PUNCTUATION = 22, - /** Po @stable ICU 2.0 */ - U_OTHER_PUNCTUATION = 23, - /** Sm @stable ICU 2.0 */ - U_MATH_SYMBOL = 24, - /** Sc @stable ICU 2.0 */ - U_CURRENCY_SYMBOL = 25, - /** Sk @stable ICU 2.0 */ - U_MODIFIER_SYMBOL = 26, - /** So @stable ICU 2.0 */ - U_OTHER_SYMBOL = 27, - /** Pi @stable ICU 2.0 */ - U_INITIAL_PUNCTUATION = 28, - /** Pf @stable ICU 2.0 */ - U_FINAL_PUNCTUATION = 29, - /** - * One higher than the last enum UCharCategory constant. - * This numeric value is stable (will not change), see - * http://www.unicode.org/policies/stability_policy.html#Property_Value - * - * @stable ICU 2.0 - */ - U_CHAR_CATEGORY_COUNT -} UCharCategory; - -/** - * U_GC_XX_MASK constants are bit flags corresponding to Unicode - * general category values. - * For each category, the nth bit is set if the numeric value of the - * corresponding UCharCategory constant is n. - * - * There are also some U_GC_Y_MASK constants for groups of general categories - * like L for all letter categories. - * - * @see u_charType - * @see U_GET_GC_MASK - * @see UCharCategory - * @stable ICU 2.1 - */ -#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CS_MASK U_MASK(U_SURROGATE) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) - - -/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ -#define U_GC_L_MASK \ - (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) - -/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ -#define U_GC_LC_MASK \ - (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) - -/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ -#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) - -/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ -#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) - -/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ -#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) - -/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ -#define U_GC_C_MASK \ - (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) - -/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ -#define U_GC_P_MASK \ - (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ - U_GC_PI_MASK|U_GC_PF_MASK) - -/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ -#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) - -/** - * This specifies the language directional property of a character set. - * @stable ICU 2.0 - */ -typedef enum UCharDirection { - /* - * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. - * It matches pairs of lines like - * / ** comment... * / - * U_<[A-Z_]+> = , - */ - - /** L @stable ICU 2.0 */ - U_LEFT_TO_RIGHT = 0, - /** R @stable ICU 2.0 */ - U_RIGHT_TO_LEFT = 1, - /** EN @stable ICU 2.0 */ - U_EUROPEAN_NUMBER = 2, - /** ES @stable ICU 2.0 */ - U_EUROPEAN_NUMBER_SEPARATOR = 3, - /** ET @stable ICU 2.0 */ - U_EUROPEAN_NUMBER_TERMINATOR = 4, - /** AN @stable ICU 2.0 */ - U_ARABIC_NUMBER = 5, - /** CS @stable ICU 2.0 */ - U_COMMON_NUMBER_SEPARATOR = 6, - /** B @stable ICU 2.0 */ - U_BLOCK_SEPARATOR = 7, - /** S @stable ICU 2.0 */ - U_SEGMENT_SEPARATOR = 8, - /** WS @stable ICU 2.0 */ - U_WHITE_SPACE_NEUTRAL = 9, - /** ON @stable ICU 2.0 */ - U_OTHER_NEUTRAL = 10, - /** LRE @stable ICU 2.0 */ - U_LEFT_TO_RIGHT_EMBEDDING = 11, - /** LRO @stable ICU 2.0 */ - U_LEFT_TO_RIGHT_OVERRIDE = 12, - /** AL @stable ICU 2.0 */ - U_RIGHT_TO_LEFT_ARABIC = 13, - /** RLE @stable ICU 2.0 */ - U_RIGHT_TO_LEFT_EMBEDDING = 14, - /** RLO @stable ICU 2.0 */ - U_RIGHT_TO_LEFT_OVERRIDE = 15, - /** PDF @stable ICU 2.0 */ - U_POP_DIRECTIONAL_FORMAT = 16, - /** NSM @stable ICU 2.0 */ - U_DIR_NON_SPACING_MARK = 17, - /** BN @stable ICU 2.0 */ - U_BOUNDARY_NEUTRAL = 18, - /** FSI @stable ICU 52 */ - U_FIRST_STRONG_ISOLATE = 19, - /** LRI @stable ICU 52 */ - U_LEFT_TO_RIGHT_ISOLATE = 20, - /** RLI @stable ICU 52 */ - U_RIGHT_TO_LEFT_ISOLATE = 21, - /** PDI @stable ICU 52 */ - U_POP_DIRECTIONAL_ISOLATE = 22, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest UCharDirection value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_CHAR_DIRECTION_COUNT -#endif // U_HIDE_DEPRECATED_API -} UCharDirection; - -/** - * Bidi Paired Bracket Type constants. - * - * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE - * @stable ICU 52 - */ -typedef enum UBidiPairedBracketType { - /* - * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. - * It matches lines like - * U_BPT_ - */ - - /** Not a paired bracket. @stable ICU 52 */ - U_BPT_NONE, - /** Open paired bracket. @stable ICU 52 */ - U_BPT_OPEN, - /** Close paired bracket. @stable ICU 52 */ - U_BPT_CLOSE, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UBidiPairedBracketType value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_BPT_COUNT /* 3 */ -#endif // U_HIDE_DEPRECATED_API -} UBidiPairedBracketType; - -/** - * Constants for Unicode blocks, see the Unicode Data file Blocks.txt - * @stable ICU 2.0 - */ -enum UBlockCode { - /* - * Note: UBlockCode constants are parsed by preparseucd.py. - * It matches lines like - * UBLOCK_ = , - */ - - /** New No_Block value in Unicode 4. @stable ICU 2.6 */ - UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ - - /** @stable ICU 2.0 */ - UBLOCK_BASIC_LATIN = 1, /*[0000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ - - /** @stable ICU 2.0 */ - UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ - - /** - * Unicode 3.2 renames this block to "Greek and Coptic". - * @stable ICU 2.0 - */ - UBLOCK_GREEK =8, /*[0370]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CYRILLIC =9, /*[0400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARMENIAN =10, /*[0530]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HEBREW =11, /*[0590]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARABIC =12, /*[0600]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SYRIAC =13, /*[0700]*/ - - /** @stable ICU 2.0 */ - UBLOCK_THAANA =14, /*[0780]*/ - - /** @stable ICU 2.0 */ - UBLOCK_DEVANAGARI =15, /*[0900]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BENGALI =16, /*[0980]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GURMUKHI =17, /*[0A00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GUJARATI =18, /*[0A80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ORIYA =19, /*[0B00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_TAMIL =20, /*[0B80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_TELUGU =21, /*[0C00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KANNADA =22, /*[0C80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MALAYALAM =23, /*[0D00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SINHALA =24, /*[0D80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_THAI =25, /*[0E00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LAO =26, /*[0E80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_TIBETAN =27, /*[0F00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MYANMAR =28, /*[1000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GEORGIAN =29, /*[10A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HANGUL_JAMO =30, /*[1100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ETHIOPIC =31, /*[1200]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CHEROKEE =32, /*[13A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_OGHAM =34, /*[1680]*/ - - /** @stable ICU 2.0 */ - UBLOCK_RUNIC =35, /*[16A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KHMER =36, /*[1780]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MONGOLIAN =37, /*[1800]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ - - /** - * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". - * @stable ICU 2.0 - */ - UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_NUMBER_FORMS =45, /*[2150]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARROWS =46, /*[2190]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BOX_DRAWING =52, /*[2500]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ - - /** @stable ICU 2.0 */ - UBLOCK_DINGBATS =56, /*[2700]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HIRAGANA =62, /*[3040]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KATAKANA =63, /*[30A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BOPOMOFO =64, /*[3100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KANBUN =66, /*[3190]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_YI_SYLLABLES =72, /*[A000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_YI_RADICALS =73, /*[A490]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ - - /** - * Same as UBLOCK_PRIVATE_USE. - * Until Unicode 3.1.1, the corresponding block name was "Private Use", - * and multiple code point ranges had this block. - * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and - * adds separate blocks for the supplementary PUAs. - * - * @stable ICU 2.0 - */ - UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ - /** - * Same as UBLOCK_PRIVATE_USE_AREA. - * Until Unicode 3.1.1, the corresponding block name was "Private Use", - * and multiple code point ranges had this block. - * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and - * adds separate blocks for the supplementary PUAs. - * - * @stable ICU 2.0 - */ - UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, - - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ - - /** @stable ICU 2.0 */ - UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SPECIALS =86, /*[FFF0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ - - /* New blocks in Unicode 3.1 */ - - /** @stable ICU 2.0 */ - UBLOCK_OLD_ITALIC = 88, /*[10300]*/ - /** @stable ICU 2.0 */ - UBLOCK_GOTHIC = 89, /*[10330]*/ - /** @stable ICU 2.0 */ - UBLOCK_DESERET = 90, /*[10400]*/ - /** @stable ICU 2.0 */ - UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ - /** @stable ICU 2.0 */ - UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ - /** @stable ICU 2.0 */ - UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ - /** @stable ICU 2.0 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ - /** @stable ICU 2.0 */ - UBLOCK_TAGS = 96, /*[E0000]*/ - - /* New blocks in Unicode 3.2 */ - - /** @stable ICU 3.0 */ - UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ - /** - * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". - * @stable ICU 2.2 - */ - UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, - /** @stable ICU 2.2 */ - UBLOCK_TAGALOG = 98, /*[1700]*/ - /** @stable ICU 2.2 */ - UBLOCK_HANUNOO = 99, /*[1720]*/ - /** @stable ICU 2.2 */ - UBLOCK_BUHID = 100, /*[1740]*/ - /** @stable ICU 2.2 */ - UBLOCK_TAGBANWA = 101, /*[1760]*/ - /** @stable ICU 2.2 */ - UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ - /** @stable ICU 2.2 */ - UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ - /** @stable ICU 2.2 */ - UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ - /** @stable ICU 2.2 */ - UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ - - /* New blocks in Unicode 4 */ - - /** @stable ICU 2.6 */ - UBLOCK_LIMBU = 111, /*[1900]*/ - /** @stable ICU 2.6 */ - UBLOCK_TAI_LE = 112, /*[1950]*/ - /** @stable ICU 2.6 */ - UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ - /** @stable ICU 2.6 */ - UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ - /** @stable ICU 2.6 */ - UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ - /** @stable ICU 2.6 */ - UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ - /** @stable ICU 2.6 */ - UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ - /** @stable ICU 2.6 */ - UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ - /** @stable ICU 2.6 */ - UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ - /** @stable ICU 2.6 */ - UBLOCK_UGARITIC = 120, /*[10380]*/ - /** @stable ICU 2.6 */ - UBLOCK_SHAVIAN = 121, /*[10450]*/ - /** @stable ICU 2.6 */ - UBLOCK_OSMANYA = 122, /*[10480]*/ - /** @stable ICU 2.6 */ - UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ - /** @stable ICU 2.6 */ - UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ - /** @stable ICU 2.6 */ - UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ - - /* New blocks in Unicode 4.1 */ - - /** @stable ICU 3.4 */ - UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ - /** @stable ICU 3.4 */ - UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ - /** @stable ICU 3.4 */ - UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ - /** @stable ICU 3.4 */ - UBLOCK_BUGINESE = 129, /*[1A00]*/ - /** @stable ICU 3.4 */ - UBLOCK_CJK_STROKES = 130, /*[31C0]*/ - /** @stable ICU 3.4 */ - UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ - /** @stable ICU 3.4 */ - UBLOCK_COPTIC = 132, /*[2C80]*/ - /** @stable ICU 3.4 */ - UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ - /** @stable ICU 3.4 */ - UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ - /** @stable ICU 3.4 */ - UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ - /** @stable ICU 3.4 */ - UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ - /** @stable ICU 3.4 */ - UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ - /** @stable ICU 3.4 */ - UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ - /** @stable ICU 3.4 */ - UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ - /** @stable ICU 3.4 */ - UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ - /** @stable ICU 3.4 */ - UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ - /** @stable ICU 3.4 */ - UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ - /** @stable ICU 3.4 */ - UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ - /** @stable ICU 3.4 */ - UBLOCK_TIFINAGH = 144, /*[2D30]*/ - /** @stable ICU 3.4 */ - UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ - - /* New blocks in Unicode 5.0 */ - - /** @stable ICU 3.6 */ - UBLOCK_NKO = 146, /*[07C0]*/ - /** @stable ICU 3.6 */ - UBLOCK_BALINESE = 147, /*[1B00]*/ - /** @stable ICU 3.6 */ - UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ - /** @stable ICU 3.6 */ - UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ - /** @stable ICU 3.6 */ - UBLOCK_PHAGS_PA = 150, /*[A840]*/ - /** @stable ICU 3.6 */ - UBLOCK_PHOENICIAN = 151, /*[10900]*/ - /** @stable ICU 3.6 */ - UBLOCK_CUNEIFORM = 152, /*[12000]*/ - /** @stable ICU 3.6 */ - UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ - /** @stable ICU 3.6 */ - UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ - - /* New blocks in Unicode 5.1 */ - - /** @stable ICU 4.0 */ - UBLOCK_SUNDANESE = 155, /*[1B80]*/ - /** @stable ICU 4.0 */ - UBLOCK_LEPCHA = 156, /*[1C00]*/ - /** @stable ICU 4.0 */ - UBLOCK_OL_CHIKI = 157, /*[1C50]*/ - /** @stable ICU 4.0 */ - UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ - /** @stable ICU 4.0 */ - UBLOCK_VAI = 159, /*[A500]*/ - /** @stable ICU 4.0 */ - UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ - /** @stable ICU 4.0 */ - UBLOCK_SAURASHTRA = 161, /*[A880]*/ - /** @stable ICU 4.0 */ - UBLOCK_KAYAH_LI = 162, /*[A900]*/ - /** @stable ICU 4.0 */ - UBLOCK_REJANG = 163, /*[A930]*/ - /** @stable ICU 4.0 */ - UBLOCK_CHAM = 164, /*[AA00]*/ - /** @stable ICU 4.0 */ - UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ - /** @stable ICU 4.0 */ - UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ - /** @stable ICU 4.0 */ - UBLOCK_LYCIAN = 167, /*[10280]*/ - /** @stable ICU 4.0 */ - UBLOCK_CARIAN = 168, /*[102A0]*/ - /** @stable ICU 4.0 */ - UBLOCK_LYDIAN = 169, /*[10920]*/ - /** @stable ICU 4.0 */ - UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ - /** @stable ICU 4.0 */ - UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ - - /* New blocks in Unicode 5.2 */ - - /** @stable ICU 4.4 */ - UBLOCK_SAMARITAN = 172, /*[0800]*/ - /** @stable ICU 4.4 */ - UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ - /** @stable ICU 4.4 */ - UBLOCK_TAI_THAM = 174, /*[1A20]*/ - /** @stable ICU 4.4 */ - UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ - /** @stable ICU 4.4 */ - UBLOCK_LISU = 176, /*[A4D0]*/ - /** @stable ICU 4.4 */ - UBLOCK_BAMUM = 177, /*[A6A0]*/ - /** @stable ICU 4.4 */ - UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ - /** @stable ICU 4.4 */ - UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ - /** @stable ICU 4.4 */ - UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ - /** @stable ICU 4.4 */ - UBLOCK_JAVANESE = 181, /*[A980]*/ - /** @stable ICU 4.4 */ - UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ - /** @stable ICU 4.4 */ - UBLOCK_TAI_VIET = 183, /*[AA80]*/ - /** @stable ICU 4.4 */ - UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ - /** @stable ICU 4.4 */ - UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ - /** @stable ICU 4.4 */ - UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ - /** @stable ICU 4.4 */ - UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ - /** @stable ICU 4.4 */ - UBLOCK_AVESTAN = 188, /*[10B00]*/ - /** @stable ICU 4.4 */ - UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ - /** @stable ICU 4.4 */ - UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ - /** @stable ICU 4.4 */ - UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ - /** @stable ICU 4.4 */ - UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ - /** @stable ICU 4.4 */ - UBLOCK_KAITHI = 193, /*[11080]*/ - /** @stable ICU 4.4 */ - UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ - /** @stable ICU 4.4 */ - UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ - /** @stable ICU 4.4 */ - UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ - /** @stable ICU 4.4 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ - - /* New blocks in Unicode 6.0 */ - - /** @stable ICU 4.6 */ - UBLOCK_MANDAIC = 198, /*[0840]*/ - /** @stable ICU 4.6 */ - UBLOCK_BATAK = 199, /*[1BC0]*/ - /** @stable ICU 4.6 */ - UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ - /** @stable ICU 4.6 */ - UBLOCK_BRAHMI = 201, /*[11000]*/ - /** @stable ICU 4.6 */ - UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ - /** @stable ICU 4.6 */ - UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ - /** @stable ICU 4.6 */ - UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ - /** @stable ICU 4.6 */ - UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ - /** @stable ICU 4.6 */ - UBLOCK_EMOTICONS = 206, /*[1F600]*/ - /** @stable ICU 4.6 */ - UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ - /** @stable ICU 4.6 */ - UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ - /** @stable ICU 4.6 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ - - /* New blocks in Unicode 6.1 */ - - /** @stable ICU 49 */ - UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ - /** @stable ICU 49 */ - UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ - /** @stable ICU 49 */ - UBLOCK_CHAKMA = 212, /*[11100]*/ - /** @stable ICU 49 */ - UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ - /** @stable ICU 49 */ - UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ - /** @stable ICU 49 */ - UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ - /** @stable ICU 49 */ - UBLOCK_MIAO = 216, /*[16F00]*/ - /** @stable ICU 49 */ - UBLOCK_SHARADA = 217, /*[11180]*/ - /** @stable ICU 49 */ - UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ - /** @stable ICU 49 */ - UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ - /** @stable ICU 49 */ - UBLOCK_TAKRI = 220, /*[11680]*/ - - /* New blocks in Unicode 7.0 */ - - /** @stable ICU 54 */ - UBLOCK_BASSA_VAH = 221, /*[16AD0]*/ - /** @stable ICU 54 */ - UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/ - /** @stable ICU 54 */ - UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/ - /** @stable ICU 54 */ - UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/ - /** @stable ICU 54 */ - UBLOCK_DUPLOYAN = 225, /*[1BC00]*/ - /** @stable ICU 54 */ - UBLOCK_ELBASAN = 226, /*[10500]*/ - /** @stable ICU 54 */ - UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/ - /** @stable ICU 54 */ - UBLOCK_GRANTHA = 228, /*[11300]*/ - /** @stable ICU 54 */ - UBLOCK_KHOJKI = 229, /*[11200]*/ - /** @stable ICU 54 */ - UBLOCK_KHUDAWADI = 230, /*[112B0]*/ - /** @stable ICU 54 */ - UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/ - /** @stable ICU 54 */ - UBLOCK_LINEAR_A = 232, /*[10600]*/ - /** @stable ICU 54 */ - UBLOCK_MAHAJANI = 233, /*[11150]*/ - /** @stable ICU 54 */ - UBLOCK_MANICHAEAN = 234, /*[10AC0]*/ - /** @stable ICU 54 */ - UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/ - /** @stable ICU 54 */ - UBLOCK_MODI = 236, /*[11600]*/ - /** @stable ICU 54 */ - UBLOCK_MRO = 237, /*[16A40]*/ - /** @stable ICU 54 */ - UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/ - /** @stable ICU 54 */ - UBLOCK_NABATAEAN = 239, /*[10880]*/ - /** @stable ICU 54 */ - UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/ - /** @stable ICU 54 */ - UBLOCK_OLD_PERMIC = 241, /*[10350]*/ - /** @stable ICU 54 */ - UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/ - /** @stable ICU 54 */ - UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/ - /** @stable ICU 54 */ - UBLOCK_PALMYRENE = 244, /*[10860]*/ - /** @stable ICU 54 */ - UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/ - /** @stable ICU 54 */ - UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/ - /** @stable ICU 54 */ - UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/ - /** @stable ICU 54 */ - UBLOCK_SIDDHAM = 248, /*[11580]*/ - /** @stable ICU 54 */ - UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/ - /** @stable ICU 54 */ - UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/ - /** @stable ICU 54 */ - UBLOCK_TIRHUTA = 251, /*[11480]*/ - /** @stable ICU 54 */ - UBLOCK_WARANG_CITI = 252, /*[118A0]*/ - - /* New blocks in Unicode 8.0 */ - - /** @stable ICU 56 */ - UBLOCK_AHOM = 253, /*[11700]*/ - /** @stable ICU 56 */ - UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/ - /** @stable ICU 56 */ - UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/ - /** @stable ICU 56 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/ - /** @stable ICU 56 */ - UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/ - /** @stable ICU 56 */ - UBLOCK_HATRAN = 258, /*[108E0]*/ - /** @stable ICU 56 */ - UBLOCK_MULTANI = 259, /*[11280]*/ - /** @stable ICU 56 */ - UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/ - /** @stable ICU 56 */ - UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/ - /** @stable ICU 56 */ - UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/ - - /* New blocks in Unicode 9.0 */ - - /** @stable ICU 58 */ - UBLOCK_ADLAM = 263, /*[1E900]*/ - /** @stable ICU 58 */ - UBLOCK_BHAIKSUKI = 264, /*[11C00]*/ - /** @stable ICU 58 */ - UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/ - /** @stable ICU 58 */ - UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/ - /** @stable ICU 58 */ - UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/ - /** @stable ICU 58 */ - UBLOCK_MARCHEN = 268, /*[11C70]*/ - /** @stable ICU 58 */ - UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/ - /** @stable ICU 58 */ - UBLOCK_NEWA = 270, /*[11400]*/ - /** @stable ICU 58 */ - UBLOCK_OSAGE = 271, /*[104B0]*/ - /** @stable ICU 58 */ - UBLOCK_TANGUT = 272, /*[17000]*/ - /** @stable ICU 58 */ - UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/ - - // New blocks in Unicode 10.0 - - /** @stable ICU 60 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/ - /** @stable ICU 60 */ - UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/ - /** @stable ICU 60 */ - UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/ - /** @stable ICU 60 */ - UBLOCK_NUSHU = 277, /*[1B170]*/ - /** @stable ICU 60 */ - UBLOCK_SOYOMBO = 278, /*[11A50]*/ - /** @stable ICU 60 */ - UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/ - /** @stable ICU 60 */ - UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/ - - // New blocks in Unicode 11.0 - - /** @stable ICU 62 */ - UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/ - /** @stable ICU 62 */ - UBLOCK_DOGRA = 282, /*[11800]*/ - /** @stable ICU 62 */ - UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/ - /** @stable ICU 62 */ - UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/ - /** @stable ICU 62 */ - UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/ - /** @stable ICU 62 */ - UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/ - /** @stable ICU 62 */ - UBLOCK_MAKASAR = 287, /*[11EE0]*/ - /** @stable ICU 62 */ - UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/ - /** @stable ICU 62 */ - UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/ - /** @stable ICU 62 */ - UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/ - /** @stable ICU 62 */ - UBLOCK_SOGDIAN = 291, /*[10F30]*/ - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UBlockCode value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UBLOCK_COUNT = 292, -#endif // U_HIDE_DEPRECATED_API - - /** @stable ICU 2.0 */ - UBLOCK_INVALID_CODE=-1 -}; - -/** @stable ICU 2.0 */ -typedef enum UBlockCode UBlockCode; - -/** - * East Asian Width constants. - * - * @see UCHAR_EAST_ASIAN_WIDTH - * @see u_getIntPropertyValue - * @stable ICU 2.2 - */ -typedef enum UEastAsianWidth { - /* - * Note: UEastAsianWidth constants are parsed by preparseucd.py. - * It matches lines like - * U_EA_ - */ - - U_EA_NEUTRAL, /*[N]*/ - U_EA_AMBIGUOUS, /*[A]*/ - U_EA_HALFWIDTH, /*[H]*/ - U_EA_FULLWIDTH, /*[F]*/ - U_EA_NARROW, /*[Na]*/ - U_EA_WIDE, /*[W]*/ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UEastAsianWidth value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_EA_COUNT -#endif // U_HIDE_DEPRECATED_API -} UEastAsianWidth; - -/** - * Selector constants for u_charName(). - * u_charName() returns the "modern" name of a - * Unicode character; or the name that was defined in - * Unicode version 1.0, before the Unicode standard merged - * with ISO-10646; or an "extended" name that gives each - * Unicode code point a unique name. - * - * @see u_charName - * @stable ICU 2.0 - */ -typedef enum UCharNameChoice { - /** Unicode character name (Name property). @stable ICU 2.0 */ - U_UNICODE_CHAR_NAME, -#ifndef U_HIDE_DEPRECATED_API - /** - * The Unicode_1_Name property value which is of little practical value. - * Beginning with ICU 49, ICU APIs return an empty string for this name choice. - * @deprecated ICU 49 - */ - U_UNICODE_10_CHAR_NAME, -#endif /* U_HIDE_DEPRECATED_API */ - /** Standard or synthetic character name. @stable ICU 2.0 */ - U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, - /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ - U_CHAR_NAME_ALIAS, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UCharNameChoice value. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_CHAR_NAME_CHOICE_COUNT -#endif // U_HIDE_DEPRECATED_API -} UCharNameChoice; - -/** - * Selector constants for u_getPropertyName() and - * u_getPropertyValueName(). These selectors are used to choose which - * name is returned for a given property or value. All properties and - * values have a long name. Most have a short name, but some do not. - * Unicode allows for additional names, beyond the long and short - * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where - * i=1, 2,... - * - * @see u_getPropertyName() - * @see u_getPropertyValueName() - * @stable ICU 2.4 - */ -typedef enum UPropertyNameChoice { - U_SHORT_PROPERTY_NAME, - U_LONG_PROPERTY_NAME, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UPropertyNameChoice value. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_PROPERTY_NAME_CHOICE_COUNT -#endif // U_HIDE_DEPRECATED_API -} UPropertyNameChoice; - -/** - * Decomposition Type constants. - * - * @see UCHAR_DECOMPOSITION_TYPE - * @stable ICU 2.2 - */ -typedef enum UDecompositionType { - /* - * Note: UDecompositionType constants are parsed by preparseucd.py. - * It matches lines like - * U_DT_ - */ - - U_DT_NONE, /*[none]*/ - U_DT_CANONICAL, /*[can]*/ - U_DT_COMPAT, /*[com]*/ - U_DT_CIRCLE, /*[enc]*/ - U_DT_FINAL, /*[fin]*/ - U_DT_FONT, /*[font]*/ - U_DT_FRACTION, /*[fra]*/ - U_DT_INITIAL, /*[init]*/ - U_DT_ISOLATED, /*[iso]*/ - U_DT_MEDIAL, /*[med]*/ - U_DT_NARROW, /*[nar]*/ - U_DT_NOBREAK, /*[nb]*/ - U_DT_SMALL, /*[sml]*/ - U_DT_SQUARE, /*[sqr]*/ - U_DT_SUB, /*[sub]*/ - U_DT_SUPER, /*[sup]*/ - U_DT_VERTICAL, /*[vert]*/ - U_DT_WIDE, /*[wide]*/ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UDecompositionType value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_DT_COUNT /* 18 */ -#endif // U_HIDE_DEPRECATED_API -} UDecompositionType; - -/** - * Joining Type constants. - * - * @see UCHAR_JOINING_TYPE - * @stable ICU 2.2 - */ -typedef enum UJoiningType { - /* - * Note: UJoiningType constants are parsed by preparseucd.py. - * It matches lines like - * U_JT_ - */ - - U_JT_NON_JOINING, /*[U]*/ - U_JT_JOIN_CAUSING, /*[C]*/ - U_JT_DUAL_JOINING, /*[D]*/ - U_JT_LEFT_JOINING, /*[L]*/ - U_JT_RIGHT_JOINING, /*[R]*/ - U_JT_TRANSPARENT, /*[T]*/ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UJoiningType value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_JT_COUNT /* 6 */ -#endif // U_HIDE_DEPRECATED_API -} UJoiningType; - -/** - * Joining Group constants. - * - * @see UCHAR_JOINING_GROUP - * @stable ICU 2.2 - */ -typedef enum UJoiningGroup { - /* - * Note: UJoiningGroup constants are parsed by preparseucd.py. - * It matches lines like - * U_JG_ - */ - - U_JG_NO_JOINING_GROUP, - U_JG_AIN, - U_JG_ALAPH, - U_JG_ALEF, - U_JG_BEH, - U_JG_BETH, - U_JG_DAL, - U_JG_DALATH_RISH, - U_JG_E, - U_JG_FEH, - U_JG_FINAL_SEMKATH, - U_JG_GAF, - U_JG_GAMAL, - U_JG_HAH, - U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */ - U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, - U_JG_HE, - U_JG_HEH, - U_JG_HEH_GOAL, - U_JG_HETH, - U_JG_KAF, - U_JG_KAPH, - U_JG_KNOTTED_HEH, - U_JG_LAM, - U_JG_LAMADH, - U_JG_MEEM, - U_JG_MIM, - U_JG_NOON, - U_JG_NUN, - U_JG_PE, - U_JG_QAF, - U_JG_QAPH, - U_JG_REH, - U_JG_REVERSED_PE, - U_JG_SAD, - U_JG_SADHE, - U_JG_SEEN, - U_JG_SEMKATH, - U_JG_SHIN, - U_JG_SWASH_KAF, - U_JG_SYRIAC_WAW, - U_JG_TAH, - U_JG_TAW, - U_JG_TEH_MARBUTA, - U_JG_TETH, - U_JG_WAW, - U_JG_YEH, - U_JG_YEH_BARREE, - U_JG_YEH_WITH_TAIL, - U_JG_YUDH, - U_JG_YUDH_HE, - U_JG_ZAIN, - U_JG_FE, /**< @stable ICU 2.6 */ - U_JG_KHAPH, /**< @stable ICU 2.6 */ - U_JG_ZHAIN, /**< @stable ICU 2.6 */ - U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */ - U_JG_FARSI_YEH, /**< @stable ICU 4.4 */ - U_JG_NYA, /**< @stable ICU 4.4 */ - U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */ - U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */ - U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */ - U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */ - U_JG_AFRICAN_FEH, /**< @stable ICU 58 */ - U_JG_AFRICAN_NOON, /**< @stable ICU 58 */ - U_JG_AFRICAN_QAF, /**< @stable ICU 58 */ - - U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_JA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_RA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */ - U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */ - - U_JG_HANIFI_ROHINGYA_KINNA_YA, /**< @stable ICU 62 */ - U_JG_HANIFI_ROHINGYA_PA, /**< @stable ICU 62 */ - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UJoiningGroup value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_JG_COUNT -#endif // U_HIDE_DEPRECATED_API -} UJoiningGroup; - -/** - * Grapheme Cluster Break constants. - * - * @see UCHAR_GRAPHEME_CLUSTER_BREAK - * @stable ICU 3.4 - */ -typedef enum UGraphemeClusterBreak { - /* - * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. - * It matches lines like - * U_GCB_ - */ - - U_GCB_OTHER = 0, /*[XX]*/ - U_GCB_CONTROL = 1, /*[CN]*/ - U_GCB_CR = 2, /*[CR]*/ - U_GCB_EXTEND = 3, /*[EX]*/ - U_GCB_L = 4, /*[L]*/ - U_GCB_LF = 5, /*[LF]*/ - U_GCB_LV = 6, /*[LV]*/ - U_GCB_LVT = 7, /*[LVT]*/ - U_GCB_T = 8, /*[T]*/ - U_GCB_V = 9, /*[V]*/ - /** @stable ICU 4.0 */ - U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ - /** @stable ICU 4.0 */ - U_GCB_PREPEND = 11, /*[PP]*/ - /** @stable ICU 50 */ - U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ - /** @stable ICU 58 */ - U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ - /** @stable ICU 58 */ - U_GCB_E_BASE_GAZ = 14, /*[EBG]*/ - /** @stable ICU 58 */ - U_GCB_E_MODIFIER = 15, /*[EM]*/ - /** @stable ICU 58 */ - U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/ - /** @stable ICU 58 */ - U_GCB_ZWJ = 17, /*[ZWJ]*/ - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UGraphemeClusterBreak value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_GCB_COUNT = 18 -#endif // U_HIDE_DEPRECATED_API -} UGraphemeClusterBreak; - -/** - * Word Break constants. - * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.) - * - * @see UCHAR_WORD_BREAK - * @stable ICU 3.4 - */ -typedef enum UWordBreakValues { - /* - * Note: UWordBreakValues constants are parsed by preparseucd.py. - * It matches lines like - * U_WB_ - */ - - U_WB_OTHER = 0, /*[XX]*/ - U_WB_ALETTER = 1, /*[LE]*/ - U_WB_FORMAT = 2, /*[FO]*/ - U_WB_KATAKANA = 3, /*[KA]*/ - U_WB_MIDLETTER = 4, /*[ML]*/ - U_WB_MIDNUM = 5, /*[MN]*/ - U_WB_NUMERIC = 6, /*[NU]*/ - U_WB_EXTENDNUMLET = 7, /*[EX]*/ - /** @stable ICU 4.0 */ - U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ - /** @stable ICU 4.0 */ - U_WB_EXTEND = 9, /*[Extend]*/ - /** @stable ICU 4.0 */ - U_WB_LF = 10, /*[LF]*/ - /** @stable ICU 4.0 */ - U_WB_MIDNUMLET =11, /*[MB]*/ - /** @stable ICU 4.0 */ - U_WB_NEWLINE =12, /*[NL]*/ - /** @stable ICU 50 */ - U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ - /** @stable ICU 52 */ - U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ - /** @stable ICU 52 */ - U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ - /** @stable ICU 52 */ - U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ - /** @stable ICU 58 */ - U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ - /** @stable ICU 58 */ - U_WB_E_BASE_GAZ = 18, /*[EBG]*/ - /** @stable ICU 58 */ - U_WB_E_MODIFIER = 19, /*[EM]*/ - /** @stable ICU 58 */ - U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/ - /** @stable ICU 58 */ - U_WB_ZWJ = 21, /*[ZWJ]*/ - /** @stable ICU 62 */ - U_WB_WSEGSPACE = 22, /*[WSEGSPACE]*/ - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UWordBreakValues value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_WB_COUNT = 23 -#endif // U_HIDE_DEPRECATED_API -} UWordBreakValues; - -/** - * Sentence Break constants. - * - * @see UCHAR_SENTENCE_BREAK - * @stable ICU 3.4 - */ -typedef enum USentenceBreak { - /* - * Note: USentenceBreak constants are parsed by preparseucd.py. - * It matches lines like - * U_SB_ - */ - - U_SB_OTHER = 0, /*[XX]*/ - U_SB_ATERM = 1, /*[AT]*/ - U_SB_CLOSE = 2, /*[CL]*/ - U_SB_FORMAT = 3, /*[FO]*/ - U_SB_LOWER = 4, /*[LO]*/ - U_SB_NUMERIC = 5, /*[NU]*/ - U_SB_OLETTER = 6, /*[LE]*/ - U_SB_SEP = 7, /*[SE]*/ - U_SB_SP = 8, /*[SP]*/ - U_SB_STERM = 9, /*[ST]*/ - U_SB_UPPER = 10, /*[UP]*/ - U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ - U_SB_EXTEND = 12, /*[EX]*/ - U_SB_LF = 13, /*[LF]*/ - U_SB_SCONTINUE = 14, /*[SC]*/ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal USentenceBreak value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_SB_COUNT = 15 -#endif // U_HIDE_DEPRECATED_API -} USentenceBreak; - -/** - * Line Break constants. - * - * @see UCHAR_LINE_BREAK - * @stable ICU 2.2 - */ -typedef enum ULineBreak { - /* - * Note: ULineBreak constants are parsed by preparseucd.py. - * It matches lines like - * U_LB_ - */ - - U_LB_UNKNOWN = 0, /*[XX]*/ - U_LB_AMBIGUOUS = 1, /*[AI]*/ - U_LB_ALPHABETIC = 2, /*[AL]*/ - U_LB_BREAK_BOTH = 3, /*[B2]*/ - U_LB_BREAK_AFTER = 4, /*[BA]*/ - U_LB_BREAK_BEFORE = 5, /*[BB]*/ - U_LB_MANDATORY_BREAK = 6, /*[BK]*/ - U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ - U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ - U_LB_COMBINING_MARK = 9, /*[CM]*/ - U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ - U_LB_EXCLAMATION = 11, /*[EX]*/ - U_LB_GLUE = 12, /*[GL]*/ - U_LB_HYPHEN = 13, /*[HY]*/ - U_LB_IDEOGRAPHIC = 14, /*[ID]*/ - /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */ - U_LB_INSEPARABLE = 15, /*[IN]*/ - U_LB_INSEPERABLE = U_LB_INSEPARABLE, - U_LB_INFIX_NUMERIC = 16, /*[IS]*/ - U_LB_LINE_FEED = 17, /*[LF]*/ - U_LB_NONSTARTER = 18, /*[NS]*/ - U_LB_NUMERIC = 19, /*[NU]*/ - U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ - U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ - U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ - U_LB_QUOTATION = 23, /*[QU]*/ - U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ - U_LB_SURROGATE = 25, /*[SG]*/ - U_LB_SPACE = 26, /*[SP]*/ - U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ - U_LB_ZWSPACE = 28, /*[ZW]*/ - /** @stable ICU 2.6 */ - U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ - /** @stable ICU 2.6 */ - U_LB_WORD_JOINER = 30, /*[WJ]*/ - /** @stable ICU 3.4 */ - U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ - /** @stable ICU 3.4 */ - U_LB_H3 = 32, /*[H3]*/ - /** @stable ICU 3.4 */ - U_LB_JL = 33, /*[JL]*/ - /** @stable ICU 3.4 */ - U_LB_JT = 34, /*[JT]*/ - /** @stable ICU 3.4 */ - U_LB_JV = 35, /*[JV]*/ - /** @stable ICU 4.4 */ - U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ - /** @stable ICU 49 */ - U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ - /** @stable ICU 49 */ - U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ - /** @stable ICU 50 */ - U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */ - /** @stable ICU 58 */ - U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ - /** @stable ICU 58 */ - U_LB_E_MODIFIER = 41, /*[EM]*/ - /** @stable ICU 58 */ - U_LB_ZWJ = 42, /*[ZWJ]*/ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal ULineBreak value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_LB_COUNT = 43 -#endif // U_HIDE_DEPRECATED_API -} ULineBreak; - -/** - * Numeric Type constants. - * - * @see UCHAR_NUMERIC_TYPE - * @stable ICU 2.2 - */ -typedef enum UNumericType { - /* - * Note: UNumericType constants are parsed by preparseucd.py. - * It matches lines like - * U_NT_ - */ - - U_NT_NONE, /*[None]*/ - U_NT_DECIMAL, /*[de]*/ - U_NT_DIGIT, /*[di]*/ - U_NT_NUMERIC, /*[nu]*/ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UNumericType value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_NT_COUNT -#endif // U_HIDE_DEPRECATED_API -} UNumericType; - -/** - * Hangul Syllable Type constants. - * - * @see UCHAR_HANGUL_SYLLABLE_TYPE - * @stable ICU 2.6 - */ -typedef enum UHangulSyllableType { - /* - * Note: UHangulSyllableType constants are parsed by preparseucd.py. - * It matches lines like - * U_HST_ - */ - - U_HST_NOT_APPLICABLE, /*[NA]*/ - U_HST_LEADING_JAMO, /*[L]*/ - U_HST_VOWEL_JAMO, /*[V]*/ - U_HST_TRAILING_JAMO, /*[T]*/ - U_HST_LV_SYLLABLE, /*[LV]*/ - U_HST_LVT_SYLLABLE, /*[LVT]*/ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UHangulSyllableType value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_HST_COUNT -#endif // U_HIDE_DEPRECATED_API -} UHangulSyllableType; - -/** - * Indic Positional Category constants. - * - * @see UCHAR_INDIC_POSITIONAL_CATEGORY - * @stable ICU 63 - */ -typedef enum UIndicPositionalCategory { - /* - * Note: UIndicPositionalCategory constants are parsed by preparseucd.py. - * It matches lines like - * U_INPC_ - */ - - /** @stable ICU 63 */ - U_INPC_NA, - /** @stable ICU 63 */ - U_INPC_BOTTOM, - /** @stable ICU 63 */ - U_INPC_BOTTOM_AND_LEFT, - /** @stable ICU 63 */ - U_INPC_BOTTOM_AND_RIGHT, - /** @stable ICU 63 */ - U_INPC_LEFT, - /** @stable ICU 63 */ - U_INPC_LEFT_AND_RIGHT, - /** @stable ICU 63 */ - U_INPC_OVERSTRUCK, - /** @stable ICU 63 */ - U_INPC_RIGHT, - /** @stable ICU 63 */ - U_INPC_TOP, - /** @stable ICU 63 */ - U_INPC_TOP_AND_BOTTOM, - /** @stable ICU 63 */ - U_INPC_TOP_AND_BOTTOM_AND_RIGHT, - /** @stable ICU 63 */ - U_INPC_TOP_AND_LEFT, - /** @stable ICU 63 */ - U_INPC_TOP_AND_LEFT_AND_RIGHT, - /** @stable ICU 63 */ - U_INPC_TOP_AND_RIGHT, - /** @stable ICU 63 */ - U_INPC_VISUAL_ORDER_LEFT, -} UIndicPositionalCategory; - -/** - * Indic Syllabic Category constants. - * - * @see UCHAR_INDIC_SYLLABIC_CATEGORY - * @stable ICU 63 - */ -typedef enum UIndicSyllabicCategory { - /* - * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py. - * It matches lines like - * U_INSC_ - */ - - /** @stable ICU 63 */ - U_INSC_OTHER, - /** @stable ICU 63 */ - U_INSC_AVAGRAHA, - /** @stable ICU 63 */ - U_INSC_BINDU, - /** @stable ICU 63 */ - U_INSC_BRAHMI_JOINING_NUMBER, - /** @stable ICU 63 */ - U_INSC_CANTILLATION_MARK, - /** @stable ICU 63 */ - U_INSC_CONSONANT, - /** @stable ICU 63 */ - U_INSC_CONSONANT_DEAD, - /** @stable ICU 63 */ - U_INSC_CONSONANT_FINAL, - /** @stable ICU 63 */ - U_INSC_CONSONANT_HEAD_LETTER, - /** @stable ICU 63 */ - U_INSC_CONSONANT_INITIAL_POSTFIXED, - /** @stable ICU 63 */ - U_INSC_CONSONANT_KILLER, - /** @stable ICU 63 */ - U_INSC_CONSONANT_MEDIAL, - /** @stable ICU 63 */ - U_INSC_CONSONANT_PLACEHOLDER, - /** @stable ICU 63 */ - U_INSC_CONSONANT_PRECEDING_REPHA, - /** @stable ICU 63 */ - U_INSC_CONSONANT_PREFIXED, - /** @stable ICU 63 */ - U_INSC_CONSONANT_SUBJOINED, - /** @stable ICU 63 */ - U_INSC_CONSONANT_SUCCEEDING_REPHA, - /** @stable ICU 63 */ - U_INSC_CONSONANT_WITH_STACKER, - /** @stable ICU 63 */ - U_INSC_GEMINATION_MARK, - /** @stable ICU 63 */ - U_INSC_INVISIBLE_STACKER, - /** @stable ICU 63 */ - U_INSC_JOINER, - /** @stable ICU 63 */ - U_INSC_MODIFYING_LETTER, - /** @stable ICU 63 */ - U_INSC_NON_JOINER, - /** @stable ICU 63 */ - U_INSC_NUKTA, - /** @stable ICU 63 */ - U_INSC_NUMBER, - /** @stable ICU 63 */ - U_INSC_NUMBER_JOINER, - /** @stable ICU 63 */ - U_INSC_PURE_KILLER, - /** @stable ICU 63 */ - U_INSC_REGISTER_SHIFTER, - /** @stable ICU 63 */ - U_INSC_SYLLABLE_MODIFIER, - /** @stable ICU 63 */ - U_INSC_TONE_LETTER, - /** @stable ICU 63 */ - U_INSC_TONE_MARK, - /** @stable ICU 63 */ - U_INSC_VIRAMA, - /** @stable ICU 63 */ - U_INSC_VISARGA, - /** @stable ICU 63 */ - U_INSC_VOWEL, - /** @stable ICU 63 */ - U_INSC_VOWEL_DEPENDENT, - /** @stable ICU 63 */ - U_INSC_VOWEL_INDEPENDENT, -} UIndicSyllabicCategory; - -/** - * Vertical Orientation constants. - * - * @see UCHAR_VERTICAL_ORIENTATION - * @stable ICU 63 - */ -typedef enum UVerticalOrientation { - /* - * Note: UVerticalOrientation constants are parsed by preparseucd.py. - * It matches lines like - * U_VO_ - */ - - /** @stable ICU 63 */ - U_VO_ROTATED, - /** @stable ICU 63 */ - U_VO_TRANSFORMED_ROTATED, - /** @stable ICU 63 */ - U_VO_TRANSFORMED_UPRIGHT, - /** @stable ICU 63 */ - U_VO_UPRIGHT, -} UVerticalOrientation; - -/** - * Check a binary Unicode property for a code point. - * - * Unicode, especially in version 3.2, defines many more properties than the - * original set in UnicodeData.txt. - * - * The properties APIs are intended to reflect Unicode properties as defined - * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). - * For details about the properties see http://www.unicode.org/ucd/ . - * For names of Unicode properties see the UCD file PropertyAliases.txt. - * - * Important: If ICU is built with UCD files from Unicode versions below 3.2, - * then properties marked with "new in Unicode 3.2" are not or not fully available. - * - * @param c Code point to test. - * @param which UProperty selector constant, identifies which binary property to check. - * Must be UCHAR_BINARY_START<=which=0. - * True for characters with general category "Nd" (decimal digit numbers) - * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. - * (That is, for letters with code points - * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) - * - * In order to narrow the definition of hexadecimal digits to only ASCII - * characters, use (c<=0x7f && u_isxdigit(c)). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a hexadecimal digit - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isxdigit(UChar32 c); - -/** - * Determines whether the specified code point is a punctuation character. - * True for characters with general categories "P" (punctuation). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a punctuation character - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_ispunct(UChar32 c); - -/** - * Determines whether the specified code point is a "graphic" character - * (printable, excluding spaces). - * TRUE for all characters except those with general categories - * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), - * "Cn" (unassigned), and "Z" (separators). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a "graphic" character - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isgraph(UChar32 c); - -/** - * Determines whether the specified code point is a "blank" or "horizontal space", - * a character that visibly separates words on a line. - * The following are equivalent definitions: - * - * TRUE for Unicode White_Space characters except for "vertical space controls" - * where "vertical space controls" are the following characters: - * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) - * - * same as - * - * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators). - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a "blank" - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isblank(UChar32 c); - -/** - * Determines whether the specified code point is "defined", - * which usually means that it is assigned a character. - * True for general categories other than "Cn" (other, not assigned), - * i.e., true for all code points mentioned in UnicodeData.txt. - * - * Note that non-character code points (e.g., U+FDD0) are not "defined" - * (they are Cn), but surrogate code points are "defined" (Cs). - * - * Same as java.lang.Character.isDefined(). - * - * @param c the code point to be tested - * @return TRUE if the code point is assigned a character - * - * @see u_isdigit - * @see u_isalpha - * @see u_isalnum - * @see u_isupper - * @see u_islower - * @see u_istitle - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isdefined(UChar32 c); - -/** - * Determines if the specified character is a space character or not. - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the character to be tested - * @return true if the character is a space character; false otherwise. - * - * @see u_isJavaSpaceChar - * @see u_isWhitespace - * @see u_isUWhiteSpace - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isspace(UChar32 c); - -/** - * Determine if the specified code point is a space character according to Java. - * True for characters with general categories "Z" (separators), - * which does not include control codes (e.g., TAB or Line Feed). - * - * Same as java.lang.Character.isSpaceChar(). - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * @param c the code point to be tested - * @return TRUE if the code point is a space character according to Character.isSpaceChar() - * - * @see u_isspace - * @see u_isWhitespace - * @see u_isUWhiteSpace - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isJavaSpaceChar(UChar32 c); - -/** - * Determines if the specified code point is a whitespace character according to Java/ICU. - * A character is considered to be a Java whitespace character if and only - * if it satisfies one of the following criteria: - * - * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not - * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). - * - It is U+0009 HORIZONTAL TABULATION. - * - It is U+000A LINE FEED. - * - It is U+000B VERTICAL TABULATION. - * - It is U+000C FORM FEED. - * - It is U+000D CARRIAGE RETURN. - * - It is U+001C FILE SEPARATOR. - * - It is U+001D GROUP SEPARATOR. - * - It is U+001E RECORD SEPARATOR. - * - It is U+001F UNIT SEPARATOR. - * - * This API tries to sync with the semantics of Java's - * java.lang.Character.isWhitespace(), but it may not return - * the exact same results because of the Unicode version - * difference. - * - * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) - * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. - * See http://www.unicode.org/versions/Unicode4.0.1/ - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * @param c the code point to be tested - * @return TRUE if the code point is a whitespace character according to Java/ICU - * - * @see u_isspace - * @see u_isJavaSpaceChar - * @see u_isUWhiteSpace - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isWhitespace(UChar32 c); - -/** - * Determines whether the specified code point is a control character - * (as defined by this function). - * A control character is one of the following: - * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) - * - U_CONTROL_CHAR (Cc) - * - U_FORMAT_CHAR (Cf) - * - U_LINE_SEPARATOR (Zl) - * - U_PARAGRAPH_SEPARATOR (Zp) - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a control character - * - * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT - * @see u_isprint - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_iscntrl(UChar32 c); - -/** - * Determines whether the specified code point is an ISO control code. - * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). - * - * Same as java.lang.Character.isISOControl(). - * - * @param c the code point to be tested - * @return TRUE if the code point is an ISO control code - * - * @see u_iscntrl - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isISOControl(UChar32 c); - -/** - * Determines whether the specified code point is a printable character. - * True for general categories other than "C" (controls). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a printable character - * - * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT - * @see u_iscntrl - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isprint(UChar32 c); - -/** - * Determines whether the specified code point is a base character. - * True for general categories "L" (letters), "N" (numbers), - * "Mc" (spacing combining marks), and "Me" (enclosing marks). - * - * Note that this is different from the Unicode definition in - * chapter 3.5, conformance clause D13, - * which defines base characters to be all characters (not Cn) - * that do not graphically combine with preceding characters (M) - * and that are neither control (Cc) or format (Cf) characters. - * - * @param c the code point to be tested - * @return TRUE if the code point is a base character according to this function - * - * @see u_isalpha - * @see u_isdigit - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isbase(UChar32 c); - -/** - * Returns the bidirectional category value for the code point, - * which is used in the Unicode bidirectional algorithm - * (UAX #9 http://www.unicode.org/reports/tr9/). - * Note that some unassigned code points have bidi values - * of R or AL because they are in blocks that are reserved - * for Right-To-Left scripts. - * - * Same as java.lang.Character.getDirectionality() - * - * @param c the code point to be tested - * @return the bidirectional category (UCharDirection) value - * - * @see UCharDirection - * @stable ICU 2.0 - */ -U_STABLE UCharDirection U_EXPORT2 -u_charDirection(UChar32 c); - -/** - * Determines whether the code point has the Bidi_Mirrored property. - * This property is set for characters that are commonly used in - * Right-To-Left contexts and need to be displayed with a "mirrored" - * glyph. - * - * Same as java.lang.Character.isMirrored(). - * Same as UCHAR_BIDI_MIRRORED - * - * @param c the code point to be tested - * @return TRUE if the character has the Bidi_Mirrored property - * - * @see UCHAR_BIDI_MIRRORED - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isMirrored(UChar32 c); - -/** - * Maps the specified character to a "mirror-image" character. - * For characters with the Bidi_Mirrored property, implementations - * sometimes need a "poor man's" mapping to another Unicode - * character (code point) such that the default glyph may serve - * as the mirror-image of the default glyph of the specified - * character. This is useful for text conversion to and from - * codepages with visual order, and for displays without glyph - * selection capabilities. - * - * @param c the code point to be mapped - * @return another Unicode code point that may serve as a mirror-image - * substitute, or c itself if there is no such mapping or c - * does not have the Bidi_Mirrored property - * - * @see UCHAR_BIDI_MIRRORED - * @see u_isMirrored - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_charMirror(UChar32 c); - -/** - * Maps the specified character to its paired bracket character. - * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). - * Otherwise c itself is returned. - * See http://www.unicode.org/reports/tr9/ - * - * @param c the code point to be mapped - * @return the paired bracket code point, - * or c itself if there is no such mapping - * (Bidi_Paired_Bracket_Type=None) - * - * @see UCHAR_BIDI_PAIRED_BRACKET - * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE - * @see u_charMirror - * @stable ICU 52 - */ -U_STABLE UChar32 U_EXPORT2 -u_getBidiPairedBracket(UChar32 c); - -/** - * Returns the general category value for the code point. - * - * Same as java.lang.Character.getType(). - * - * @param c the code point to be tested - * @return the general category (UCharCategory) value - * - * @see UCharCategory - * @stable ICU 2.0 - */ -U_STABLE int8_t U_EXPORT2 -u_charType(UChar32 c); - -/** - * Get a single-bit bit set for the general category of a character. - * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. - * Same as U_MASK(u_charType(c)). - * - * @param c the code point to be tested - * @return a single-bit mask corresponding to the general category (UCharCategory) value - * - * @see u_charType - * @see UCharCategory - * @see U_GC_CN_MASK - * @stable ICU 2.1 - */ -#define U_GET_GC_MASK(c) U_MASK(u_charType(c)) - -/** - * Callback from u_enumCharTypes(), is called for each contiguous range - * of code points c (where start<=cnameChoice, the character name written - * into the buffer is the "modern" name or the name that was defined - * in Unicode version 1.0. - * The name contains only "invariant" characters - * like A-Z, 0-9, space, and '-'. - * Unicode 1.0 names are only retrieved if they are different from the modern - * names and if the data file contains the data for them. gennames may or may - * not be called with a command line option to include 1.0 names in unames.dat. - * - * @param code The character (code point) for which to get the name. - * It must be 0<=code<=0x10ffff. - * @param nameChoice Selector for which name to get. - * @param buffer Destination address for copying the name. - * The name will always be zero-terminated. - * If there is no name, then the buffer will be set to the empty string. - * @param bufferLength ==sizeof(buffer) - * @param pErrorCode Pointer to a UErrorCode variable; - * check for U_SUCCESS() after u_charName() - * returns. - * @return The length of the name, or 0 if there is no name for this character. - * If the bufferLength is less than or equal to the length, then the buffer - * contains the truncated name and the returned length indicates the full - * length of the name. - * The length does not include the zero-termination. - * - * @see UCharNameChoice - * @see u_charFromName - * @see u_enumCharNames - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_charName(UChar32 code, UCharNameChoice nameChoice, - char *buffer, int32_t bufferLength, - UErrorCode *pErrorCode); - -#ifndef U_HIDE_DEPRECATED_API -/** - * Returns an empty string. - * Used to return the ISO 10646 comment for a character. - * The Unicode ISO_Comment property is deprecated and has no values. - * - * @param c The character (code point) for which to get the ISO comment. - * It must be 0<=c<=0x10ffff. - * @param dest Destination address for copying the comment. - * The comment will be zero-terminated if possible. - * If there is no comment, then the buffer will be set to the empty string. - * @param destCapacity ==sizeof(dest) - * @param pErrorCode Pointer to a UErrorCode variable; - * check for U_SUCCESS() after u_getISOComment() - * returns. - * @return 0 - * - * @deprecated ICU 49 - */ -U_DEPRECATED int32_t U_EXPORT2 -u_getISOComment(UChar32 c, - char *dest, int32_t destCapacity, - UErrorCode *pErrorCode); -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Find a Unicode character by its name and return its code point value. - * The name is matched exactly and completely. - * If the name does not correspond to a code point, pErrorCode - * is set to U_INVALID_CHAR_FOUND. - * A Unicode 1.0 name is matched only if it differs from the modern name. - * Unicode names are all uppercase. Extended names are lowercase followed - * by an uppercase hexadecimal number, and within angle brackets. - * - * @param nameChoice Selector for which name to match. - * @param name The name to match. - * @param pErrorCode Pointer to a UErrorCode variable - * @return The Unicode value of the code point with the given name, - * or an undefined value if there is no such code point. - * - * @see UCharNameChoice - * @see u_charName - * @see u_enumCharNames - * @stable ICU 1.7 - */ -U_STABLE UChar32 U_EXPORT2 -u_charFromName(UCharNameChoice nameChoice, - const char *name, - UErrorCode *pErrorCode); - -/** - * Type of a callback function for u_enumCharNames() that gets called - * for each Unicode character with the code point value and - * the character name. - * If such a function returns FALSE, then the enumeration is stopped. - * - * @param context The context pointer that was passed to u_enumCharNames(). - * @param code The Unicode code point for the character with this name. - * @param nameChoice Selector for which kind of names is enumerated. - * @param name The character's name, zero-terminated. - * @param length The length of the name. - * @return TRUE if the enumeration should continue, FALSE to stop it. - * - * @see UCharNameChoice - * @see u_enumCharNames - * @stable ICU 1.7 - */ -typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, - UChar32 code, - UCharNameChoice nameChoice, - const char *name, - int32_t length); - -/** - * Enumerate all assigned Unicode characters between the start and limit - * code points (start inclusive, limit exclusive) and call a function - * for each, passing the code point value and the character name. - * For Unicode 1.0 names, only those are enumerated that differ from the - * modern names. - * - * @param start The first code point in the enumeration range. - * @param limit One more than the last code point in the enumeration range - * (the first one after the range). - * @param fn The function that is to be called for each character name. - * @param context An arbitrary pointer that is passed to the function. - * @param nameChoice Selector for which kind of names to enumerate. - * @param pErrorCode Pointer to a UErrorCode variable - * - * @see UCharNameChoice - * @see UEnumCharNamesFn - * @see u_charName - * @see u_charFromName - * @stable ICU 1.7 - */ -U_STABLE void U_EXPORT2 -u_enumCharNames(UChar32 start, UChar32 limit, - UEnumCharNamesFn *fn, - void *context, - UCharNameChoice nameChoice, - UErrorCode *pErrorCode); - -/** - * Return the Unicode name for a given property, as given in the - * Unicode database file PropertyAliases.txt. - * - * In addition, this function maps the property - * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / - * "General_Category_Mask". These names are not in - * PropertyAliases.txt. - * - * @param property UProperty selector other than UCHAR_INVALID_CODE. - * If out of range, NULL is returned. - * - * @param nameChoice selector for which name to get. If out of range, - * NULL is returned. All properties have a long name. Most - * have a short name, but some do not. Unicode allows for - * additional names; if present these will be returned by - * U_LONG_PROPERTY_NAME + i, where i=1, 2,... - * - * @return a pointer to the name, or NULL if either the - * property or the nameChoice is out of range. If a given - * nameChoice returns NULL, then all larger values of - * nameChoice will return NULL, with one exception: if NULL is - * returned for U_SHORT_PROPERTY_NAME, then - * U_LONG_PROPERTY_NAME (and higher) may still return a - * non-NULL value. The returned pointer is valid until - * u_cleanup() is called. - * - * @see UProperty - * @see UPropertyNameChoice - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -u_getPropertyName(UProperty property, - UPropertyNameChoice nameChoice); - -/** - * Return the UProperty enum for a given property name, as specified - * in the Unicode database file PropertyAliases.txt. Short, long, and - * any other variants are recognized. - * - * In addition, this function maps the synthetic names "gcm" / - * "General_Category_Mask" to the property - * UCHAR_GENERAL_CATEGORY_MASK. These names are not in - * PropertyAliases.txt. - * - * @param alias the property name to be matched. The name is compared - * using "loose matching" as described in PropertyAliases.txt. - * - * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name - * does not match any property. - * - * @see UProperty - * @stable ICU 2.4 - */ -U_STABLE UProperty U_EXPORT2 -u_getPropertyEnum(const char* alias); - -/** - * Return the Unicode name for a given property value, as given in the - * Unicode database file PropertyValueAliases.txt. - * - * Note: Some of the names in PropertyValueAliases.txt can only be - * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not - * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / - * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" - * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". - * - * @param property UProperty selector constant. - * Must be UCHAR_BINARY_START<=which2<=radix<=36 or if the - * value of c is not a valid digit in the specified - * radix, -1 is returned. A character is a valid digit - * if at least one of the following is true: - *

    - *
  • The character has a decimal digit value. - * Such characters have the general category "Nd" (decimal digit numbers) - * and a Numeric_Type of Decimal. - * In this case the value is the character's decimal digit value.
  • - *
  • The character is one of the uppercase Latin letters - * 'A' through 'Z'. - * In this case the value is c-'A'+10.
  • - *
  • The character is one of the lowercase Latin letters - * 'a' through 'z'. - * In this case the value is ch-'a'+10.
  • - *
  • Latin letters from both the ASCII range (0061..007A, 0041..005A) - * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) - * are recognized.
  • - *
- * - * Same as java.lang.Character.digit(). - * - * @param ch the code point to be tested. - * @param radix the radix. - * @return the numeric value represented by the character in the - * specified radix, - * or -1 if there is no value or if the value exceeds the radix. - * - * @see UCHAR_NUMERIC_TYPE - * @see u_forDigit - * @see u_charDigitValue - * @see u_isdigit - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_digit(UChar32 ch, int8_t radix); - -/** - * Determines the character representation for a specific digit in - * the specified radix. If the value of radix is not a - * valid radix, or the value of digit is not a valid - * digit in the specified radix, the null character - * (U+0000) is returned. - *

- * The radix argument is valid if it is greater than or - * equal to 2 and less than or equal to 36. - * The digit argument is valid if - * 0 <= digit < radix. - *

- * If the digit is less than 10, then - * '0' + digit is returned. Otherwise, the value - * 'a' + digit - 10 is returned. - * - * Same as java.lang.Character.forDigit(). - * - * @param digit the number to convert to a character. - * @param radix the radix. - * @return the char representation of the specified digit - * in the specified radix. - * - * @see u_digit - * @see u_charDigitValue - * @see u_isdigit - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_forDigit(int32_t digit, int8_t radix); - -/** - * Get the "age" of the code point. - * The "age" is the Unicode version when the code point was first - * designated (as a non-character or for Private Use) - * or assigned a character. - * This can be useful to avoid emitting code points to receiving - * processes that do not accept newer characters. - * The data is from the UCD file DerivedAge.txt. - * - * @param c The code point. - * @param versionArray The Unicode version number array, to be filled in. - * - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -u_charAge(UChar32 c, UVersionInfo versionArray); - -/** - * Gets the Unicode version information. - * The version array is filled in with the version information - * for the Unicode standard that is currently used by ICU. - * For example, Unicode version 3.1.1 is represented as an array with - * the values { 3, 1, 1, 0 }. - * - * @param versionArray an output array that will be filled in with - * the Unicode version number - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_getUnicodeVersion(UVersionInfo versionArray); - -#if !UCONFIG_NO_NORMALIZATION -/** - * Get the FC_NFKC_Closure property string for a character. - * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" - * or for "FNC": http://www.unicode.org/reports/tr15/ - * - * @param c The character (code point) for which to get the FC_NFKC_Closure string. - * It must be 0<=c<=0x10ffff. - * @param dest Destination address for copying the string. - * The string will be zero-terminated if possible. - * If there is no FC_NFKC_Closure string, - * then the buffer will be set to the empty string. - * @param destCapacity ==sizeof(dest) - * @param pErrorCode Pointer to a UErrorCode variable. - * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. - * If the destCapacity is less than or equal to the length, then the buffer - * contains the truncated name and the returned length indicates the full - * length of the name. - * The length does not include the zero-termination. - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); - -#endif - - -U_CDECL_END - -#endif /*_UCHAR*/ -/*eof*/ diff --git a/deps/node/deps/icu-small/source/common/unicode/ucharstrie.h b/deps/node/deps/icu-small/source/common/unicode/ucharstrie.h deleted file mode 100644 index dfc93f6d..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucharstrie.h +++ /dev/null @@ -1,578 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ucharstrie.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010nov14 -* created by: Markus W. Scherer -*/ - -#ifndef __UCHARSTRIE_H__ -#define __UCHARSTRIE_H__ - -/** - * \file - * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences) - * to integer values. - */ - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "unicode/uobject.h" -#include "unicode/ustringtrie.h" - -U_NAMESPACE_BEGIN - -class Appendable; -class UCharsTrieBuilder; -class UVector32; - -/** - * Light-weight, non-const reader class for a UCharsTrie. - * Traverses a char16_t-serialized data structure with minimal state, - * for mapping strings (16-bit-unit sequences) to non-negative integer values. - * - * This class owns the serialized trie data only if it was constructed by - * the builder's build() method. - * The public constructor and the copy constructor only alias the data (only copy the pointer). - * There is no assignment operator. - * - * This class is not intended for public subclassing. - * @stable ICU 4.8 - */ -class U_COMMON_API UCharsTrie : public UMemory { -public: - /** - * Constructs a UCharsTrie reader instance. - * - * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder, - * starting with the first char16_t of that sequence. - * The UCharsTrie object will not read more char16_ts than - * the UCharsTrieBuilder generated in the corresponding build() call. - * - * The array is not copied/cloned and must not be modified while - * the UCharsTrie object is in use. - * - * @param trieUChars The char16_t array that contains the serialized trie. - * @stable ICU 4.8 - */ - UCharsTrie(ConstChar16Ptr trieUChars) - : ownedArray_(NULL), uchars_(trieUChars), - pos_(uchars_), remainingMatchLength_(-1) {} - - /** - * Destructor. - * @stable ICU 4.8 - */ - ~UCharsTrie(); - - /** - * Copy constructor, copies the other trie reader object and its state, - * but not the char16_t array which will be shared. (Shallow copy.) - * @param other Another UCharsTrie object. - * @stable ICU 4.8 - */ - UCharsTrie(const UCharsTrie &other) - : ownedArray_(NULL), uchars_(other.uchars_), - pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {} - - /** - * Resets this trie to its initial state. - * @return *this - * @stable ICU 4.8 - */ - UCharsTrie &reset() { - pos_=uchars_; - remainingMatchLength_=-1; - return *this; - } - - /** - * UCharsTrie state object, for saving a trie's current state - * and resetting the trie back to this state later. - * @stable ICU 4.8 - */ - class State : public UMemory { - public: - /** - * Constructs an empty State. - * @stable ICU 4.8 - */ - State() { uchars=NULL; } - private: - friend class UCharsTrie; - - const char16_t *uchars; - const char16_t *pos; - int32_t remainingMatchLength; - }; - - /** - * Saves the state of this trie. - * @param state The State object to hold the trie's state. - * @return *this - * @see resetToState - * @stable ICU 4.8 - */ - const UCharsTrie &saveState(State &state) const { - state.uchars=uchars_; - state.pos=pos_; - state.remainingMatchLength=remainingMatchLength_; - return *this; - } - - /** - * Resets this trie to the saved state. - * If the state object contains no state, or the state of a different trie, - * then this trie remains unchanged. - * @param state The State object which holds a saved trie state. - * @return *this - * @see saveState - * @see reset - * @stable ICU 4.8 - */ - UCharsTrie &resetToState(const State &state) { - if(uchars_==state.uchars && uchars_!=NULL) { - pos_=state.pos; - remainingMatchLength_=state.remainingMatchLength; - } - return *this; - } - - /** - * Determines whether the string so far matches, whether it has a value, - * and whether another input char16_t can continue a matching string. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult current() const; - - /** - * Traverses the trie from the initial state for this input char16_t. - * Equivalent to reset().next(uchar). - * @param uchar Input char value. Values below 0 and above 0xffff will never match. - * @return The match/value Result. - * @stable ICU 4.8 - */ - inline UStringTrieResult first(int32_t uchar) { - remainingMatchLength_=-1; - return nextImpl(uchars_, uchar); - } - - /** - * Traverses the trie from the initial state for the - * one or two UTF-16 code units for this input code point. - * Equivalent to reset().nextForCodePoint(cp). - * @param cp A Unicode code point 0..0x10ffff. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult firstForCodePoint(UChar32 cp); - - /** - * Traverses the trie from the current state for this input char16_t. - * @param uchar Input char value. Values below 0 and above 0xffff will never match. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult next(int32_t uchar); - - /** - * Traverses the trie from the current state for the - * one or two UTF-16 code units for this input code point. - * @param cp A Unicode code point 0..0x10ffff. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult nextForCodePoint(UChar32 cp); - - /** - * Traverses the trie from the current state for this string. - * Equivalent to - * \code - * Result result=current(); - * for(each c in s) - * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; - * result=next(c); - * return result; - * \endcode - * @param s A string. Can be NULL if length is 0. - * @param length The length of the string. Can be -1 if NUL-terminated. - * @return The match/value Result. - * @stable ICU 4.8 - */ - UStringTrieResult next(ConstChar16Ptr s, int32_t length); - - /** - * Returns a matching string's value if called immediately after - * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. - * getValue() can be called multiple times. - * - * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! - * @return The value for the string so far. - * @stable ICU 4.8 - */ - inline int32_t getValue() const { - const char16_t *pos=pos_; - int32_t leadUnit=*pos++; - // U_ASSERT(leadUnit>=kMinValueLead); - return leadUnit&kValueIsFinal ? - readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit); - } - - /** - * Determines whether all strings reachable from the current state - * map to the same value. - * @param uniqueValue Receives the unique value, if this function returns TRUE. - * (output-only) - * @return TRUE if all strings reachable from the current state - * map to the same value. - * @stable ICU 4.8 - */ - inline UBool hasUniqueValue(int32_t &uniqueValue) const { - const char16_t *pos=pos_; - // Skip the rest of a pending linear-match node. - return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue); - } - - /** - * Finds each char16_t which continues the string from the current state. - * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. - * @param out Each next char16_t is appended to this object. - * @return the number of char16_ts which continue the string from here - * @stable ICU 4.8 - */ - int32_t getNextUChars(Appendable &out) const; - - /** - * Iterator for all of the (string, value) pairs in a UCharsTrie. - * @stable ICU 4.8 - */ - class U_COMMON_API Iterator : public UMemory { - public: - /** - * Iterates from the root of a char16_t-serialized UCharsTrie. - * @param trieUChars The trie char16_ts. - * @param maxStringLength If 0, the iterator returns full strings. - * Otherwise, the iterator returns strings with this maximum length. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 4.8 - */ - Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode); - - /** - * Iterates from the current state of the specified UCharsTrie. - * @param trie The trie whose state will be copied for iteration. - * @param maxStringLength If 0, the iterator returns full strings. - * Otherwise, the iterator returns strings with this maximum length. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @stable ICU 4.8 - */ - Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); - - /** - * Destructor. - * @stable ICU 4.8 - */ - ~Iterator(); - - /** - * Resets this iterator to its initial state. - * @return *this - * @stable ICU 4.8 - */ - Iterator &reset(); - - /** - * @return TRUE if there are more elements. - * @stable ICU 4.8 - */ - UBool hasNext() const; - - /** - * Finds the next (string, value) pair if there is one. - * - * If the string is truncated to the maximum length and does not - * have a real value, then the value is set to -1. - * In this case, this "not a real value" is indistinguishable from - * a real value of -1. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if there is another element. - * @stable ICU 4.8 - */ - UBool next(UErrorCode &errorCode); - - /** - * @return The string for the last successful next(). - * @stable ICU 4.8 - */ - const UnicodeString &getString() const { return str_; } - /** - * @return The value for the last successful next(). - * @stable ICU 4.8 - */ - int32_t getValue() const { return value_; } - - private: - UBool truncateAndStop() { - pos_=NULL; - value_=-1; // no real value for str - return TRUE; - } - - const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode); - - const char16_t *uchars_; - const char16_t *pos_; - const char16_t *initialPos_; - int32_t remainingMatchLength_; - int32_t initialRemainingMatchLength_; - UBool skipValue_; // Skip intermediate value which was already delivered. - - UnicodeString str_; - int32_t maxLength_; - int32_t value_; - - // The stack stores pairs of integers for backtracking to another - // outbound edge of a branch node. - // The first integer is an offset from uchars_. - // The second integer has the str_.length() from before the node in bits 15..0, - // and the remaining branch length in bits 31..16. - // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit, - // but the code looks more confusing that way.) - UVector32 *stack_; - }; - -private: - friend class UCharsTrieBuilder; - - /** - * Constructs a UCharsTrie reader instance. - * Unlike the public constructor which just aliases an array, - * this constructor adopts the builder's array. - * This constructor is only called by the builder. - */ - UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars) - : ownedArray_(adoptUChars), uchars_(trieUChars), - pos_(uchars_), remainingMatchLength_(-1) {} - - // No assignment operator. - UCharsTrie &operator=(const UCharsTrie &other); - - inline void stop() { - pos_=NULL; - } - - // Reads a compact 32-bit integer. - // pos is already after the leadUnit, and the lead unit has bit 15 reset. - static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) { - int32_t value; - if(leadUnit=kMinTwoUnitValueLead) { - if(leadUnit>6)-1; - } else if(leadUnit=kMinTwoUnitNodeValueLead) { - if(leadUnit=kMinTwoUnitDeltaLead) { - if(delta==kThreeUnitDeltaLead) { - delta=(pos[0]<<16)|pos[1]; - pos+=2; - } else { - delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++; - } - } - return pos+delta; - } - - static const char16_t *skipDelta(const char16_t *pos) { - int32_t delta=*pos++; - if(delta>=kMinTwoUnitDeltaLead) { - if(delta==kThreeUnitDeltaLead) { - pos+=2; - } else { - ++pos; - } - } - return pos; - } - - static inline UStringTrieResult valueResult(int32_t node) { - return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15)); - } - - // Handles a branch node for both next(uchar) and next(string). - UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar); - - // Requires remainingLength_<0. - UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar); - - // Helper functions for hasUniqueValue(). - // Recursively finds a unique value (or whether there is not a unique one) - // from a branch. - static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length, - UBool haveUniqueValue, int32_t &uniqueValue); - // Recursively finds a unique value (or whether there is not a unique one) - // starting from a position on a node lead unit. - static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue); - - // Helper functions for getNextUChars(). - // getNextUChars() when pos is on a branch node. - static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out); - - // UCharsTrie data structure - // - // The trie consists of a series of char16_t-serialized nodes for incremental - // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer) - // The root node is at the beginning of the trie data. - // - // Types of nodes are distinguished by their node lead unit ranges. - // After each node, except a final-value node, another node follows to - // encode match values or continue matching further units. - // - // Node types: - // - Final-value node: Stores a 32-bit integer in a compact, variable-length format. - // The value is for the string/char16_t sequence so far. - // - Match node, optionally with an intermediate value in a different compact format. - // The value, if present, is for the string/char16_t sequence so far. - // - // Aside from the value, which uses the node lead unit's high bits: - // - // - Linear-match node: Matches a number of units. - // - Branch node: Branches to other nodes according to the current input unit. - // The node unit is the length of the branch (number of units to select from) - // minus 1. It is followed by a sub-node: - // - If the length is at most kMaxBranchLinearSubNodeLength, then - // there are length-1 (key, value) pairs and then one more comparison unit. - // If one of the key units matches, then the value is either a final value for - // the string so far, or a "jump" delta to the next node. - // If the last unit matches, then matching continues with the next node. - // (Values have the same encoding as final-value nodes.) - // - If the length is greater than kMaxBranchLinearSubNodeLength, then - // there is one unit and one "jump" delta. - // If the input unit is less than the sub-node unit, then "jump" by delta to - // the next sub-node which will have a length of length/2. - // (The delta has its own compact encoding.) - // Otherwise, skip the "jump" delta to the next sub-node - // which will have a length of length-length/2. - - // Match-node lead unit values, after masking off intermediate-value bits: - - // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise - // the length is one more than the next unit. - - // For a branch sub-node with at most this many entries, we drop down - // to a linear search. - static const int32_t kMaxBranchLinearSubNodeLength=5; - - // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node. - static const int32_t kMinLinearMatch=0x30; - static const int32_t kMaxLinearMatchLength=0x10; - - // Match-node lead unit bits 14..6 for the optional intermediate value. - // If these bits are 0, then there is no intermediate value. - // Otherwise, see the *NodeValue* constants below. - static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x0040 - static const int32_t kNodeTypeMask=kMinValueLead-1; // 0x003f - - // A final-value node has bit 15 set. - static const int32_t kValueIsFinal=0x8000; - - // Compact value: After testing and masking off bit 15, use the following thresholds. - static const int32_t kMaxOneUnitValue=0x3fff; - - static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1; // 0x4000 - static const int32_t kThreeUnitValueLead=0x7fff; - - static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1; // 0x3ffeffff - - // Compact intermediate-value integer, lead unit shared with a branch or linear-match node. - static const int32_t kMaxOneUnitNodeValue=0xff; - static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6); // 0x4040 - static const int32_t kThreeUnitNodeValueLead=0x7fc0; - - static const int32_t kMaxTwoUnitNodeValue= - ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1; // 0xfdffff - - // Compact delta integers. - static const int32_t kMaxOneUnitDelta=0xfbff; - static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1; // 0xfc00 - static const int32_t kThreeUnitDeltaLead=0xffff; - - static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff - - char16_t *ownedArray_; - - // Fixed value referencing the UCharsTrie words. - const char16_t *uchars_; - - // Iterator variables. - - // Pointer to next trie unit to read. NULL if no more matches. - const char16_t *pos_; - // Remaining length of a linear-match node, minus 1. Negative if not in such a node. - int32_t remainingMatchLength_; -}; - -U_NAMESPACE_END - -#endif // __UCHARSTRIE_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/ucharstriebuilder.h b/deps/node/deps/icu-small/source/common/unicode/ucharstriebuilder.h deleted file mode 100644 index 2aa4757e..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucharstriebuilder.h +++ /dev/null @@ -1,187 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ucharstriebuilder.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010nov14 -* created by: Markus W. Scherer -*/ - -#ifndef __UCHARSTRIEBUILDER_H__ -#define __UCHARSTRIEBUILDER_H__ - -#include "unicode/utypes.h" -#include "unicode/stringtriebuilder.h" -#include "unicode/ucharstrie.h" -#include "unicode/unistr.h" - -/** - * \file - * \brief C++ API: Builder for icu::UCharsTrie - */ - -U_NAMESPACE_BEGIN - -class UCharsTrieElement; - -/** - * Builder class for UCharsTrie. - * - * This class is not intended for public subclassing. - * @stable ICU 4.8 - */ -class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder { -public: - /** - * Constructs an empty builder. - * @param errorCode Standard ICU error code. - * @stable ICU 4.8 - */ - UCharsTrieBuilder(UErrorCode &errorCode); - - /** - * Destructor. - * @stable ICU 4.8 - */ - virtual ~UCharsTrieBuilder(); - - /** - * Adds a (string, value) pair. - * The string must be unique. - * The string contents will be copied; the builder does not keep - * a reference to the input UnicodeString or its buffer. - * @param s The input string. - * @param value The value associated with this string. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return *this - * @stable ICU 4.8 - */ - UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode); - - /** - * Builds a UCharsTrie for the add()ed data. - * Once built, no further data can be add()ed until clear() is called. - * - * A UCharsTrie cannot be empty. At least one (string, value) pair - * must have been add()ed. - * - * This method passes ownership of the builder's internal result array to the new trie object. - * Another call to any build() variant will re-serialize the trie. - * After clear() has been called, a new array will be used as well. - * @param buildOption Build option, see UStringTrieBuildOption. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return A new UCharsTrie for the add()ed data. - * @stable ICU 4.8 - */ - UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); - - /** - * Builds a UCharsTrie for the add()ed data and char16_t-serializes it. - * Once built, no further data can be add()ed until clear() is called. - * - * A UCharsTrie cannot be empty. At least one (string, value) pair - * must have been add()ed. - * - * Multiple calls to buildUnicodeString() set the UnicodeStrings to the - * builder's same char16_t array, without rebuilding. - * If buildUnicodeString() is called after build(), the trie will be - * re-serialized into a new array. - * If build() is called after buildUnicodeString(), the trie object will become - * the owner of the previously returned array. - * After clear() has been called, a new array will be used as well. - * @param buildOption Build option, see UStringTrieBuildOption. - * @param result A UnicodeString which will be set to the char16_t-serialized - * UCharsTrie for the add()ed data. - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return result - * @stable ICU 4.8 - */ - UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result, - UErrorCode &errorCode); - - /** - * Removes all (string, value) pairs. - * New data can then be add()ed and a new trie can be built. - * @return *this - * @stable ICU 4.8 - */ - UCharsTrieBuilder &clear() { - strings.remove(); - elementsLength=0; - ucharsLength=0; - return *this; - } - -private: - UCharsTrieBuilder(const UCharsTrieBuilder &other); // no copy constructor - UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other); // no assignment operator - - void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode); - - virtual int32_t getElementStringLength(int32_t i) const; - virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const; - virtual int32_t getElementValue(int32_t i) const; - - virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const; - - virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const; - virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const; - virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const; - - virtual UBool matchNodesCanHaveValues() const { return TRUE; } - - virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; } - virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; } - virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; } - - class UCTLinearMatchNode : public LinearMatchNode { - public: - UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode); - virtual UBool operator==(const Node &other) const; - virtual void write(StringTrieBuilder &builder); - private: - const char16_t *s; - }; - - virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, - Node *nextNode) const; - - UBool ensureCapacity(int32_t length); - virtual int32_t write(int32_t unit); - int32_t write(const char16_t *s, int32_t length); - virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length); - virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); - virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); - virtual int32_t writeDeltaTo(int32_t jumpTarget); - - UnicodeString strings; - UCharsTrieElement *elements; - int32_t elementsCapacity; - int32_t elementsLength; - - // char16_t serialization of the trie. - // Grows from the back: ucharsLength measures from the end of the buffer! - char16_t *uchars; - int32_t ucharsCapacity; - int32_t ucharsLength; -}; - -U_NAMESPACE_END - -#endif // __UCHARSTRIEBUILDER_H__ diff --git a/deps/node/deps/icu-small/source/common/unicode/uchriter.h b/deps/node/deps/icu-small/source/common/unicode/uchriter.h deleted file mode 100644 index 38f67c5b..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uchriter.h +++ /dev/null @@ -1,387 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1998-2005, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef UCHRITER_H -#define UCHRITER_H - -#include "unicode/utypes.h" -#include "unicode/chariter.h" - -/** - * \file - * \brief C++ API: char16_t Character Iterator - */ - -U_NAMESPACE_BEGIN - -/** - * A concrete subclass of CharacterIterator that iterates over the - * characters (code units or code points) in a char16_t array. - * It's possible not only to create an - * iterator that iterates over an entire char16_t array, but also to - * create one that iterates over only a subrange of a char16_t array - * (iterators over different subranges of the same char16_t array don't - * compare equal). - * @see CharacterIterator - * @see ForwardCharacterIterator - * @stable ICU 2.0 - */ -class U_COMMON_API UCharCharacterIterator : public CharacterIterator { -public: - /** - * Create an iterator over the char16_t array referred to by "textPtr". - * The iteration range is 0 to length-1. - * text is only aliased, not adopted (the - * destructor will not delete it). - * @param textPtr The char16_t array to be iterated over - * @param length The length of the char16_t array - * @stable ICU 2.0 - */ - UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length); - - /** - * Create an iterator over the char16_t array referred to by "textPtr". - * The iteration range is 0 to length-1. - * text is only aliased, not adopted (the - * destructor will not delete it). - * The starting - * position is specified by "position". If "position" is outside the valid - * iteration range, the behavior of this object is undefined. - * @param textPtr The char16_t array to be iteratd over - * @param length The length of the char16_t array - * @param position The starting position of the iteration - * @stable ICU 2.0 - */ - UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, - int32_t position); - - /** - * Create an iterator over the char16_t array referred to by "textPtr". - * The iteration range is 0 to end-1. - * text is only aliased, not adopted (the - * destructor will not delete it). - * The starting - * position is specified by "position". If begin and end do not - * form a valid iteration range or "position" is outside the valid - * iteration range, the behavior of this object is undefined. - * @param textPtr The char16_t array to be iterated over - * @param length The length of the char16_t array - * @param textBegin The begin position of the iteration range - * @param textEnd The end position of the iteration range - * @param position The starting position of the iteration - * @stable ICU 2.0 - */ - UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, - int32_t textBegin, - int32_t textEnd, - int32_t position); - - /** - * Copy constructor. The new iterator iterates over the same range - * of the same string as "that", and its initial position is the - * same as "that"'s current position. - * @param that The UCharCharacterIterator to be copied - * @stable ICU 2.0 - */ - UCharCharacterIterator(const UCharCharacterIterator& that); - - /** - * Destructor. - * @stable ICU 2.0 - */ - virtual ~UCharCharacterIterator(); - - /** - * Assignment operator. *this is altered to iterate over the sane - * range of the same string as "that", and refers to the same - * character within that string as "that" does. - * @param that The object to be copied - * @return the newly created object - * @stable ICU 2.0 - */ - UCharCharacterIterator& - operator=(const UCharCharacterIterator& that); - - /** - * Returns true if the iterators iterate over the same range of the - * same string and are pointing at the same character. - * @param that The ForwardCharacterIterator used to be compared for equality - * @return true if the iterators iterate over the same range of the - * same string and are pointing at the same character. - * @stable ICU 2.0 - */ - virtual UBool operator==(const ForwardCharacterIterator& that) const; - - /** - * Generates a hash code for this iterator. - * @return the hash code. - * @stable ICU 2.0 - */ - virtual int32_t hashCode(void) const; - - /** - * Returns a new UCharCharacterIterator referring to the same - * character in the same range of the same string as this one. The - * caller must delete the new iterator. - * @return the CharacterIterator newly created - * @stable ICU 2.0 - */ - virtual CharacterIterator* clone(void) const; - - /** - * Sets the iterator to refer to the first code unit in its - * iteration range, and returns that code unit. - * This can be used to begin an iteration with next(). - * @return the first code unit in its iteration range. - * @stable ICU 2.0 - */ - virtual char16_t first(void); - - /** - * Sets the iterator to refer to the first code unit in its - * iteration range, returns that code unit, and moves the position - * to the second code unit. This is an alternative to setToStart() - * for forward iteration with nextPostInc(). - * @return the first code unit in its iteration range - * @stable ICU 2.0 - */ - virtual char16_t firstPostInc(void); - - /** - * Sets the iterator to refer to the first code point in its - * iteration range, and returns that code unit, - * This can be used to begin an iteration with next32(). - * Note that an iteration with next32PostInc(), beginning with, - * e.g., setToStart() or firstPostInc(), is more efficient. - * @return the first code point in its iteration range - * @stable ICU 2.0 - */ - virtual UChar32 first32(void); - - /** - * Sets the iterator to refer to the first code point in its - * iteration range, returns that code point, and moves the position - * to the second code point. This is an alternative to setToStart() - * for forward iteration with next32PostInc(). - * @return the first code point in its iteration range. - * @stable ICU 2.0 - */ - virtual UChar32 first32PostInc(void); - - /** - * Sets the iterator to refer to the last code unit in its - * iteration range, and returns that code unit. - * This can be used to begin an iteration with previous(). - * @return the last code unit in its iteration range. - * @stable ICU 2.0 - */ - virtual char16_t last(void); - - /** - * Sets the iterator to refer to the last code point in its - * iteration range, and returns that code unit. - * This can be used to begin an iteration with previous32(). - * @return the last code point in its iteration range. - * @stable ICU 2.0 - */ - virtual UChar32 last32(void); - - /** - * Sets the iterator to refer to the "position"-th code unit - * in the text-storage object the iterator refers to, and - * returns that code unit. - * @param position the position within the text-storage object - * @return the code unit - * @stable ICU 2.0 - */ - virtual char16_t setIndex(int32_t position); - - /** - * Sets the iterator to refer to the beginning of the code point - * that contains the "position"-th code unit - * in the text-storage object the iterator refers to, and - * returns that code point. - * The current position is adjusted to the beginning of the code point - * (its first code unit). - * @param position the position within the text-storage object - * @return the code unit - * @stable ICU 2.0 - */ - virtual UChar32 setIndex32(int32_t position); - - /** - * Returns the code unit the iterator currently refers to. - * @return the code unit the iterator currently refers to. - * @stable ICU 2.0 - */ - virtual char16_t current(void) const; - - /** - * Returns the code point the iterator currently refers to. - * @return the code point the iterator currently refers to. - * @stable ICU 2.0 - */ - virtual UChar32 current32(void) const; - - /** - * Advances to the next code unit in the iteration range (toward - * endIndex()), and returns that code unit. If there are no more - * code units to return, returns DONE. - * @return the next code unit in the iteration range. - * @stable ICU 2.0 - */ - virtual char16_t next(void); - - /** - * Gets the current code unit for returning and advances to the next code unit - * in the iteration range - * (toward endIndex()). If there are - * no more code units to return, returns DONE. - * @return the current code unit. - * @stable ICU 2.0 - */ - virtual char16_t nextPostInc(void); - - /** - * Advances to the next code point in the iteration range (toward - * endIndex()), and returns that code point. If there are no more - * code points to return, returns DONE. - * Note that iteration with "pre-increment" semantics is less - * efficient than iteration with "post-increment" semantics - * that is provided by next32PostInc(). - * @return the next code point in the iteration range. - * @stable ICU 2.0 - */ - virtual UChar32 next32(void); - - /** - * Gets the current code point for returning and advances to the next code point - * in the iteration range - * (toward endIndex()). If there are - * no more code points to return, returns DONE. - * @return the current point. - * @stable ICU 2.0 - */ - virtual UChar32 next32PostInc(void); - - /** - * Returns FALSE if there are no more code units or code points - * at or after the current position in the iteration range. - * This is used with nextPostInc() or next32PostInc() in forward - * iteration. - * @return FALSE if there are no more code units or code points - * at or after the current position in the iteration range. - * @stable ICU 2.0 - */ - virtual UBool hasNext(); - - /** - * Advances to the previous code unit in the iteration range (toward - * startIndex()), and returns that code unit. If there are no more - * code units to return, returns DONE. - * @return the previous code unit in the iteration range. - * @stable ICU 2.0 - */ - virtual char16_t previous(void); - - /** - * Advances to the previous code point in the iteration range (toward - * startIndex()), and returns that code point. If there are no more - * code points to return, returns DONE. - * @return the previous code point in the iteration range. - * @stable ICU 2.0 - */ - virtual UChar32 previous32(void); - - /** - * Returns FALSE if there are no more code units or code points - * before the current position in the iteration range. - * This is used with previous() or previous32() in backward - * iteration. - * @return FALSE if there are no more code units or code points - * before the current position in the iteration range. - * @stable ICU 2.0 - */ - virtual UBool hasPrevious(); - - /** - * Moves the current position relative to the start or end of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code units forward - * or backward by specifying a positive or negative delta. - * @param delta the position relative to origin. A positive delta means forward; - * a negative delta means backward. - * @param origin Origin enumeration {kStart, kCurrent, kEnd} - * @return the new position - * @stable ICU 2.0 - */ - virtual int32_t move(int32_t delta, EOrigin origin); - - /** - * Moves the current position relative to the start or end of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code points forward - * or backward by specifying a positive or negative delta. - * @param delta the position relative to origin. A positive delta means forward; - * a negative delta means backward. - * @param origin Origin enumeration {kStart, kCurrent, kEnd} - * @return the new position - * @stable ICU 2.0 - */ -#ifdef move32 - // One of the system headers right now is sometimes defining a conflicting macro we don't use -#undef move32 -#endif - virtual int32_t move32(int32_t delta, EOrigin origin); - - /** - * Sets the iterator to iterate over a new range of text - * @stable ICU 2.0 - */ - void setText(ConstChar16Ptr newText, int32_t newTextLength); - - /** - * Copies the char16_t array under iteration into the UnicodeString - * referred to by "result". Even if this iterator iterates across - * only a part of this string, the whole string is copied. - * @param result Receives a copy of the text under iteration. - * @stable ICU 2.0 - */ - virtual void getText(UnicodeString& result); - - /** - * Return a class ID for this class (not really public) - * @return a class ID for this class - * @stable ICU 2.0 - */ - static UClassID U_EXPORT2 getStaticClassID(void); - - /** - * Return a class ID for this object (not really public) - * @return a class ID for this object. - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const; - -protected: - /** - * Protected constructor - * @stable ICU 2.0 - */ - UCharCharacterIterator(); - /** - * Protected member text - * @stable ICU 2.0 - */ - const char16_t* text; - -}; - -U_NAMESPACE_END -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uclean.h b/deps/node/deps/icu-small/source/common/unicode/uclean.h deleted file mode 100644 index ab0cd6da..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uclean.h +++ /dev/null @@ -1,262 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2001-2014, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* file name: uclean.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001July05 -* created by: George Rhoten -*/ - -#ifndef __UCLEAN_H__ -#define __UCLEAN_H__ - -#include "unicode/utypes.h" -/** - * \file - * \brief C API: Initialize and clean up ICU - */ - -/** - * Initialize ICU. - * - * Use of this function is optional. It is OK to simply use ICU - * services and functions without first having initialized - * ICU by calling u_init(). - * - * u_init() will attempt to load some part of ICU's data, and is - * useful as a test for configuration or installation problems that - * leave the ICU data inaccessible. A successful invocation of u_init() - * does not, however, guarantee that all ICU data is accessible. - * - * Multiple calls to u_init() cause no harm, aside from the small amount - * of time required. - * - * In old versions of ICU, u_init() was required in multi-threaded applications - * to ensure the thread safety of ICU. u_init() is no longer needed for this purpose. - * - * @param status An ICU UErrorCode parameter. It must not be NULL. - * An Error will be returned if some required part of ICU data can not - * be loaded or initialized. - * The function returns immediately if the input error code indicates a - * failure, as usual. - * - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -u_init(UErrorCode *status); - -#ifndef U_HIDE_SYSTEM_API -/** - * Clean up the system resources, such as allocated memory or open files, - * used in all ICU libraries. This will free/delete all memory owned by the - * ICU libraries, and return them to their original load state. All open ICU - * items (collators, resource bundles, converters, etc.) must be closed before - * calling this function, otherwise ICU may not free its allocated memory - * (e.g. close your converters and resource bundles before calling this - * function). Generally, this function should be called once just before - * an application exits. For applications that dynamically load and unload - * the ICU libraries (relatively uncommon), u_cleanup() should be called - * just before the library unload. - *

- * u_cleanup() also clears any ICU heap functions, mutex functions or - * trace functions that may have been set for the process. - * This has the effect of restoring ICU to its initial condition, before - * any of these override functions were installed. Refer to - * u_setMemoryFunctions(), u_setMutexFunctions and - * utrace_setFunctions(). If ICU is to be reinitialized after - * calling u_cleanup(), these runtime override functions will need to - * be set up again if they are still required. - *

- * u_cleanup() is not thread safe. All other threads should stop using ICU - * before calling this function. - *

- * Any open ICU items will be left in an undefined state by u_cleanup(), - * and any subsequent attempt to use such an item will give unpredictable - * results. - *

- * After calling u_cleanup(), an application may continue to use ICU by - * calling u_init(). An application must invoke u_init() first from one single - * thread before allowing other threads call u_init(). All threads existing - * at the time of the first thread's call to u_init() must also call - * u_init() themselves before continuing with other ICU operations. - *

- * The use of u_cleanup() just before an application terminates is optional, - * but it should be called only once for performance reasons. The primary - * benefit is to eliminate reports of memory or resource leaks originating - * in ICU code from the results generated by heap analysis tools. - *

- * Use this function with great care! - *

- * - * @stable ICU 2.0 - * @system - */ -U_STABLE void U_EXPORT2 -u_cleanup(void); - -U_CDECL_BEGIN -/** - * Pointer type for a user supplied memory allocation function. - * @param context user supplied value, obtained from u_setMemoryFunctions(). - * @param size The number of bytes to be allocated - * @return Pointer to the newly allocated memory, or NULL if the allocation failed. - * @stable ICU 2.8 - * @system - */ -typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size); -/** - * Pointer type for a user supplied memory re-allocation function. - * @param context user supplied value, obtained from u_setMemoryFunctions(). - * @param size The number of bytes to be allocated - * @return Pointer to the newly allocated memory, or NULL if the allocation failed. - * @stable ICU 2.8 - * @system - */ -typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size); -/** - * Pointer type for a user supplied memory free function. Behavior should be - * similar the standard C library free(). - * @param context user supplied value, obtained from u_setMemoryFunctions(). - * @param mem Pointer to the memory block to be resized - * @param size The new size for the block - * @return Pointer to the resized memory block, or NULL if the resizing failed. - * @stable ICU 2.8 - * @system - */ -typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem); - -/** - * Set the functions that ICU will use for memory allocation. - * Use of this function is optional; by default (without this function), ICU will - * use the standard C library malloc() and free() functions. - * This function can only be used when ICU is in an initial, unused state, before - * u_init() has been called. - * @param context This pointer value will be saved, and then (later) passed as - * a parameter to the memory functions each time they - * are called. - * @param a Pointer to a user-supplied malloc function. - * @param r Pointer to a user-supplied realloc function. - * @param f Pointer to a user-supplied free function. - * @param status Receives error values. - * @stable ICU 2.8 - * @system - */ -U_STABLE void U_EXPORT2 -u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f, - UErrorCode *status); - -U_CDECL_END - -#ifndef U_HIDE_DEPRECATED_API -/********************************************************************************* - * - * Deprecated Functions - * - * The following functions for user supplied mutexes are no longer supported. - * Any attempt to use them will return a U_UNSUPPORTED_ERROR. - * - **********************************************************************************/ - -/** - * An opaque pointer type that represents an ICU mutex. - * For user-implemented mutexes, the value will typically point to a - * struct or object that implements the mutex. - * @deprecated ICU 52. This type is no longer supported. - * @system - */ -typedef void *UMTX; - -U_CDECL_BEGIN -/** - * Function Pointer type for a user supplied mutex initialization function. - * The user-supplied function will be called by ICU whenever ICU needs to create a - * new mutex. The function implementation should create a mutex, and store a pointer - * to something that uniquely identifies the mutex into the UMTX that is supplied - * as a parameter. - * @param context user supplied value, obtained from u_setMutexFunctions(). - * @param mutex Receives a pointer that identifies the new mutex. - * The mutex init function must set the UMTX to a non-null value. - * Subsequent calls by ICU to lock, unlock, or destroy a mutex will - * identify the mutex by the UMTX value. - * @param status Error status. Report errors back to ICU by setting this variable - * with an error code. - * @deprecated ICU 52. This function is no longer supported. - * @system - */ -typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status); - - -/** - * Function Pointer type for a user supplied mutex functions. - * One of the user-supplied functions with this signature will be called by ICU - * whenever ICU needs to lock, unlock, or destroy a mutex. - * @param context user supplied value, obtained from u_setMutexFunctions(). - * @param mutex specify the mutex on which to operate. - * @deprecated ICU 52. This function is no longer supported. - * @system - */ -typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex); -U_CDECL_END - -/** - * Set the functions that ICU will use for mutex operations - * Use of this function is optional; by default (without this function), ICU will - * directly access system functions for mutex operations - * This function can only be used when ICU is in an initial, unused state, before - * u_init() has been called. - * @param context This pointer value will be saved, and then (later) passed as - * a parameter to the user-supplied mutex functions each time they - * are called. - * @param init Pointer to a mutex initialization function. Must be non-null. - * @param destroy Pointer to the mutex destroy function. Must be non-null. - * @param lock pointer to the mutex lock function. Must be non-null. - * @param unlock Pointer to the mutex unlock function. Must be non-null. - * @param status Receives error values. - * @deprecated ICU 52. This function is no longer supported. - * @system - */ -U_DEPRECATED void U_EXPORT2 -u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock, - UErrorCode *status); - - -/** - * Pointer type for a user supplied atomic increment or decrement function. - * @param context user supplied value, obtained from u_setAtomicIncDecFunctions(). - * @param p Pointer to a 32 bit int to be incremented or decremented - * @return The value of the variable after the inc or dec operation. - * @deprecated ICU 52. This function is no longer supported. - * @system - */ -typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p); - -/** - * Set the functions that ICU will use for atomic increment and decrement of int32_t values. - * Use of this function is optional; by default (without this function), ICU will - * use its own internal implementation of atomic increment/decrement. - * This function can only be used when ICU is in an initial, unused state, before - * u_init() has been called. - * @param context This pointer value will be saved, and then (later) passed as - * a parameter to the increment and decrement functions each time they - * are called. This function can only be called - * @param inc Pointer to a function to do an atomic increment operation. Must be non-null. - * @param dec Pointer to a function to do an atomic decrement operation. Must be non-null. - * @param status Receives error values. - * @deprecated ICU 52. This function is no longer supported. - * @system - */ -U_DEPRECATED void U_EXPORT2 -u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec, - UErrorCode *status); - -#endif /* U_HIDE_DEPRECATED_API */ -#endif /* U_HIDE_SYSTEM_API */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ucnv.h b/deps/node/deps/icu-small/source/common/unicode/ucnv.h deleted file mode 100644 index ec7c5f35..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucnv.h +++ /dev/null @@ -1,2042 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** - * ucnv.h: - * External APIs for the ICU's codeset conversion library - * Bertrand A. Damiba - * - * Modification History: - * - * Date Name Description - * 04/04/99 helena Fixed internal header inclusion. - * 05/11/00 helena Added setFallback and usesFallback APIs. - * 06/29/2000 helena Major rewrite of the callback APIs. - * 12/07/2000 srl Update of documentation - */ - -/** - * \file - * \brief C API: Character conversion - * - *

Character Conversion C API

- * - *

This API is used to convert codepage or character encoded data to and - * from UTF-16. You can open a converter with {@link ucnv_open() }. With that - * converter, you can get its properties, set options, convert your data and - * close the converter.

- * - *

Since many software programs recognize different converter names for - * different types of converters, there are other functions in this API to - * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() }, - * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the - * more frequently used alias functions to get this information.

- * - *

When a converter encounters an illegal, irregular, invalid or unmappable character - * its default behavior is to use a substitution character to replace the - * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() } - * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines - * many other callback actions that can be used instead of a character substitution.

- * - *

More information about this API can be found in our - * User's - * Guide.

- */ - -#ifndef UCNV_H -#define UCNV_H - -#include "unicode/ucnv_err.h" -#include "unicode/uenum.h" -#include "unicode/localpointer.h" - -#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) - -#define USET_DEFINED - -/** - * USet is the C API type corresponding to C++ class UnicodeSet. - * It is forward-declared here to avoid including unicode/uset.h file if related - * conversion APIs are not used. - * - * @see ucnv_getUnicodeSet - * @stable ICU 2.4 - */ -typedef struct USet USet; - -#endif - -#if !UCONFIG_NO_CONVERSION - -U_CDECL_BEGIN - -/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */ -#define UCNV_MAX_CONVERTER_NAME_LENGTH 60 -/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */ -#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH) - -/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ -#define UCNV_SI 0x0F -/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ -#define UCNV_SO 0x0E - -/** - * Enum for specifying basic types of converters - * @see ucnv_getType - * @stable ICU 2.0 - */ -typedef enum { - /** @stable ICU 2.0 */ - UCNV_UNSUPPORTED_CONVERTER = -1, - /** @stable ICU 2.0 */ - UCNV_SBCS = 0, - /** @stable ICU 2.0 */ - UCNV_DBCS = 1, - /** @stable ICU 2.0 */ - UCNV_MBCS = 2, - /** @stable ICU 2.0 */ - UCNV_LATIN_1 = 3, - /** @stable ICU 2.0 */ - UCNV_UTF8 = 4, - /** @stable ICU 2.0 */ - UCNV_UTF16_BigEndian = 5, - /** @stable ICU 2.0 */ - UCNV_UTF16_LittleEndian = 6, - /** @stable ICU 2.0 */ - UCNV_UTF32_BigEndian = 7, - /** @stable ICU 2.0 */ - UCNV_UTF32_LittleEndian = 8, - /** @stable ICU 2.0 */ - UCNV_EBCDIC_STATEFUL = 9, - /** @stable ICU 2.0 */ - UCNV_ISO_2022 = 10, - - /** @stable ICU 2.0 */ - UCNV_LMBCS_1 = 11, - /** @stable ICU 2.0 */ - UCNV_LMBCS_2, - /** @stable ICU 2.0 */ - UCNV_LMBCS_3, - /** @stable ICU 2.0 */ - UCNV_LMBCS_4, - /** @stable ICU 2.0 */ - UCNV_LMBCS_5, - /** @stable ICU 2.0 */ - UCNV_LMBCS_6, - /** @stable ICU 2.0 */ - UCNV_LMBCS_8, - /** @stable ICU 2.0 */ - UCNV_LMBCS_11, - /** @stable ICU 2.0 */ - UCNV_LMBCS_16, - /** @stable ICU 2.0 */ - UCNV_LMBCS_17, - /** @stable ICU 2.0 */ - UCNV_LMBCS_18, - /** @stable ICU 2.0 */ - UCNV_LMBCS_19, - /** @stable ICU 2.0 */ - UCNV_LMBCS_LAST = UCNV_LMBCS_19, - /** @stable ICU 2.0 */ - UCNV_HZ, - /** @stable ICU 2.0 */ - UCNV_SCSU, - /** @stable ICU 2.0 */ - UCNV_ISCII, - /** @stable ICU 2.0 */ - UCNV_US_ASCII, - /** @stable ICU 2.0 */ - UCNV_UTF7, - /** @stable ICU 2.2 */ - UCNV_BOCU1, - /** @stable ICU 2.2 */ - UCNV_UTF16, - /** @stable ICU 2.2 */ - UCNV_UTF32, - /** @stable ICU 2.2 */ - UCNV_CESU8, - /** @stable ICU 2.4 */ - UCNV_IMAP_MAILBOX, - /** @stable ICU 4.8 */ - UCNV_COMPOUND_TEXT, - - /* Number of converter types for which we have conversion routines. */ - UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES -} UConverterType; - -/** - * Enum for specifying which platform a converter ID refers to. - * The use of platform/CCSID is not recommended. See ucnv_openCCSID(). - * - * @see ucnv_getPlatform - * @see ucnv_openCCSID - * @see ucnv_getCCSID - * @stable ICU 2.0 - */ -typedef enum { - UCNV_UNKNOWN = -1, - UCNV_IBM = 0 -} UConverterPlatform; - -/** - * Function pointer for error callback in the codepage to unicode direction. - * Called when an error has occurred in conversion to unicode, or on open/close of the callback (see reason). - * @param context Pointer to the callback's private data - * @param args Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param pErrorCode ICU error code in/out parameter. - * For converter callback functions, set to a conversion error - * before the call, and the callback may reset it to U_ZERO_ERROR. - * @see ucnv_setToUCallBack - * @see UConverterToUnicodeArgs - * @stable ICU 2.0 - */ -typedef void (U_EXPORT2 *UConverterToUCallback) ( - const void* context, - UConverterToUnicodeArgs *args, - const char *codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode *pErrorCode); - -/** - * Function pointer for error callback in the unicode to codepage direction. - * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason). - * @param context Pointer to the callback's private data - * @param args Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param pErrorCode ICU error code in/out parameter. - * For converter callback functions, set to a conversion error - * before the call, and the callback may reset it to U_ZERO_ERROR. - * @see ucnv_setFromUCallBack - * @stable ICU 2.0 - */ -typedef void (U_EXPORT2 *UConverterFromUCallback) ( - const void* context, - UConverterFromUnicodeArgs *args, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode *pErrorCode); - -U_CDECL_END - -/** - * Character that separates converter names from options and options from each other. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_OPTION_SEP_CHAR ',' - -/** - * String version of UCNV_OPTION_SEP_CHAR. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_OPTION_SEP_STRING "," - -/** - * Character that separates a converter option from its value. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_VALUE_SEP_CHAR '=' - -/** - * String version of UCNV_VALUE_SEP_CHAR. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_VALUE_SEP_STRING "=" - -/** - * Converter option for specifying a locale. - * For example, ucnv_open("SCSU,locale=ja", &errorCode); - * See convrtrs.txt. - * - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_LOCALE_OPTION_STRING ",locale=" - -/** - * Converter option for specifying a version selector (0..9) for some converters. - * For example, - * \code - * ucnv_open("UTF-7,version=1", &errorCode); - * \endcode - * See convrtrs.txt. - * - * @see ucnv_open - * @stable ICU 2.4 - */ -#define UCNV_VERSION_OPTION_STRING ",version=" - -/** - * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages. - * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on - * S/390 (z/OS) Unix System Services (Open Edition). - * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode); - * See convrtrs.txt. - * - * @see ucnv_open - * @stable ICU 2.4 - */ -#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl" - -/** - * Do a fuzzy compare of two converter/alias names. - * The comparison is case-insensitive, ignores leading zeroes if they are not - * followed by further digits, and ignores all but letters and digits. - * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. - * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 - * at http://www.unicode.org/reports/tr22/ - * - * @param name1 a converter name or alias, zero-terminated - * @param name2 a converter name or alias, zero-terminated - * @return 0 if the names match, or a negative value if the name1 - * lexically precedes name2, or a positive value if the name1 - * lexically follows name2. - * @stable ICU 2.0 - */ -U_STABLE int U_EXPORT2 -ucnv_compareNames(const char *name1, const char *name2); - - -/** - * Creates a UConverter object with the name of a coded character set specified as a C string. - * The actual name will be resolved with the alias file - * using a case-insensitive string comparison that ignores - * leading zeroes and all non-alphanumeric characters. - * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. - * (See also ucnv_compareNames().) - * If NULL is passed for the converter name, it will create one with the - * getDefaultName return value. - * - *

A converter name for ICU 1.5 and above may contain options - * like a locale specification to control the specific behavior of - * the newly instantiated converter. - * The meaning of the options depends on the particular converter. - * If an option is not defined for or recognized by a given converter, then it is ignored.

- * - *

Options are appended to the converter name string, with a - * UCNV_OPTION_SEP_CHAR between the name and the first option and - * also between adjacent options.

- * - *

If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.

- * - *

The conversion behavior and names can vary between platforms. ICU may - * convert some characters differently from other platforms. Details on this topic - * are in the User's - * Guide. Aliases starting with a "cp" prefix have no specific meaning - * other than its an alias starting with the letters "cp". Please do not - * associate any meaning to these aliases.

- * - * \snippet samples/ucnv/convsamp.cpp ucnv_open - * - * @param converterName Name of the coded character set table. - * This may have options appended to the string. - * IANA alias character set names, IBM CCSIDs starting with "ibm-", - * Windows codepage numbers starting with "windows-" are frequently - * used for this parameter. See ucnv_getAvailableName and - * ucnv_getAlias for a complete list that is available. - * If this parameter is NULL, the default converter will be used. - * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR - * @return the created Unicode converter object, or NULL if an error occurred - * @see ucnv_openU - * @see ucnv_openCCSID - * @see ucnv_getAvailableName - * @see ucnv_getAlias - * @see ucnv_getDefaultName - * @see ucnv_close - * @see ucnv_compareNames - * @stable ICU 2.0 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_open(const char *converterName, UErrorCode *err); - - -/** - * Creates a Unicode converter with the names specified as unicode string. - * The name should be limited to the ASCII-7 alphanumerics range. - * The actual name will be resolved with the alias file - * using a case-insensitive string comparison that ignores - * leading zeroes and all non-alphanumeric characters. - * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. - * (See also ucnv_compareNames().) - * If NULL is passed for the converter name, it will create - * one with the ucnv_getDefaultName() return value. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * - *

See ucnv_open for the complete details

- * @param name Name of the UConverter table in a zero terminated - * Unicode string - * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR, - * U_FILE_ACCESS_ERROR - * @return the created Unicode converter object, or NULL if an - * error occurred - * @see ucnv_open - * @see ucnv_openCCSID - * @see ucnv_close - * @see ucnv_compareNames - * @stable ICU 2.0 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_openU(const UChar *name, - UErrorCode *err); - -/** - * Creates a UConverter object from a CCSID number and platform pair. - * Note that the usefulness of this function is limited to platforms with numeric - * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for - * encodings. - * - * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related. - * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and - * for some Unicode conversion tables there are multiple CCSIDs. - * Some "alternate" Unicode conversion tables are provided by the - * IBM CDRA conversion table registry. - * The most prominent example of a systematic modification of conversion tables that is - * not provided in the form of conversion table files in the repository is - * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all - * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well. - * - * Only IBM default conversion tables are accessible with ucnv_openCCSID(). - * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated - * with that CCSID. - * - * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM. - * - * In summary, the use of CCSIDs and the associated API functions is not recommended. - * - * In order to open a converter with the default IBM CDRA Unicode conversion table, - * you can use this function or use the prefix "ibm-": - * \code - * char name[20]; - * sprintf(name, "ibm-%hu", ccsid); - * cnv=ucnv_open(name, &errorCode); - * \endcode - * - * In order to open a converter with the IBM S/390 Unix System Services variant - * of a Unicode/EBCDIC conversion table, - * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING: - * \code - * char name[20]; - * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid); - * cnv=ucnv_open(name, &errorCode); - * \endcode - * - * In order to open a converter from a Microsoft codepage number, use the prefix "cp": - * \code - * char name[20]; - * sprintf(name, "cp%hu", codepageID); - * cnv=ucnv_open(name, &errorCode); - * \endcode - * - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * - * @param codepage codepage number to create - * @param platform the platform in which the codepage number exists - * @param err error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR - * @return the created Unicode converter object, or NULL if an error - * occurred. - * @see ucnv_open - * @see ucnv_openU - * @see ucnv_close - * @see ucnv_getCCSID - * @see ucnv_getPlatform - * @see UConverterPlatform - * @stable ICU 2.0 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_openCCSID(int32_t codepage, - UConverterPlatform platform, - UErrorCode * err); - -/** - *

Creates a UConverter object specified from a packageName and a converterName.

- * - *

The packageName and converterName must point to an ICU udata object, as defined by - * udata_open( packageName, "cnv", converterName, err) or equivalent. - * Typically, packageName will refer to a (.dat) file, or to a package registered with - * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.

- * - *

The name will NOT be looked up in the alias mechanism, nor will the converter be - * stored in the converter cache or the alias table. The only way to open further converters - * is call this function multiple times, or use the ucnv_safeClone() function to clone a - * 'master' converter.

- * - *

A future version of ICU may add alias table lookups and/or caching - * to this function.

- * - *

Example Use: - * cnv = ucnv_openPackage("myapp", "myconverter", &err); - *

- * - * @param packageName name of the package (equivalent to 'path' in udata_open() call) - * @param converterName name of the data item to be used, without suffix. - * @param err outgoing error status U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR - * @return the created Unicode converter object, or NULL if an error occurred - * @see udata_open - * @see ucnv_open - * @see ucnv_safeClone - * @see ucnv_close - * @stable ICU 2.2 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err); - -/** - * Thread safe converter cloning operation. - * For most efficient operation, pass in a stackBuffer (and a *pBufferSize) - * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space. - * If the buffer size is sufficient, then the clone will use the stack buffer; - * otherwise, it will be allocated, and *pBufferSize will indicate - * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.) - * - * You must ucnv_close() the clone in any case. - * - * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not) - * then *pBufferSize will be changed to a sufficient size - * for cloning this converter, - * without actually cloning the converter ("pure pre-flighting"). - * - * If *pBufferSize is greater than zero but not large enough for a stack-based - * clone, then the converter is cloned using newly allocated memory - * and *pBufferSize is changed to the necessary size. - * - * If the converter clone fits into the stack buffer but the stack buffer is not - * sufficiently aligned for the clone, then the clone will use an - * adjusted pointer and use an accordingly smaller buffer size. - * - * @param cnv converter to be cloned - * @param stackBuffer Deprecated functionality as of ICU 52, use NULL.
- * user allocated space for the new clone. If NULL new memory will be allocated. - * If buffer is not large enough, new memory will be allocated. - * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. - * @param pBufferSize Deprecated functionality as of ICU 52, use NULL or 1.
- * pointer to size of allocated space. - * @param status to indicate whether the operation went on smoothly or there were errors - * An informational status value, U_SAFECLONE_ALLOCATED_WARNING, - * is used if any allocations were necessary. - * However, it is better to check if *pBufferSize grew for checking for - * allocations because warning codes can be overridden by subsequent - * function calls. - * @return pointer to the new clone - * @stable ICU 2.0 - */ -U_STABLE UConverter * U_EXPORT2 -ucnv_safeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status); - -#ifndef U_HIDE_DEPRECATED_API - -/** - * \def U_CNV_SAFECLONE_BUFFERSIZE - * Definition of a buffer size that is designed to be large enough for - * converters to be cloned with ucnv_safeClone(). - * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer. - */ -#define U_CNV_SAFECLONE_BUFFERSIZE 1024 - -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Deletes the unicode converter and releases resources associated - * with just this instance. - * Does not free up shared converter tables. - * - * @param converter the converter object to be deleted - * @see ucnv_open - * @see ucnv_openU - * @see ucnv_openCCSID - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_close(UConverter * converter); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUConverterPointer - * "Smart pointer" class, closes a UConverter via ucnv_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close); - -U_NAMESPACE_END - -#endif - -/** - * Fills in the output parameter, subChars, with the substitution characters - * as multiple bytes. - * If ucnv_setSubstString() set a Unicode string because the converter is - * stateful, then subChars will be an empty string. - * - * @param converter the Unicode converter - * @param subChars the substitution characters - * @param len on input the capacity of subChars, on output the number - * of bytes copied to it - * @param err the outgoing error status code. - * If the substitution character array is too small, an - * U_INDEX_OUTOFBOUNDS_ERROR will be returned. - * @see ucnv_setSubstString - * @see ucnv_setSubstChars - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getSubstChars(const UConverter *converter, - char *subChars, - int8_t *len, - UErrorCode *err); - -/** - * Sets the substitution chars when converting from unicode to a codepage. The - * substitution is specified as a string of 1-4 bytes, and may contain - * NULL bytes. - * The subChars must represent a single character. The caller needs to know the - * byte sequence of a valid character in the converter's charset. - * For some converters, for example some ISO 2022 variants, only single-byte - * substitution characters may be supported. - * The newer ucnv_setSubstString() function relaxes these limitations. - * - * @param converter the Unicode converter - * @param subChars the substitution character byte sequence we want set - * @param len the number of bytes in subChars - * @param err the error status code. U_INDEX_OUTOFBOUNDS_ERROR if - * len is bigger than the maximum number of bytes allowed in subchars - * @see ucnv_setSubstString - * @see ucnv_getSubstChars - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setSubstChars(UConverter *converter, - const char *subChars, - int8_t len, - UErrorCode *err); - -/** - * Set a substitution string for converting from Unicode to a charset. - * The caller need not know the charset byte sequence for each charset. - * - * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence - * for a single character, this function takes a Unicode string with - * zero, one or more characters, and immediately verifies that the string can be - * converted to the charset. - * If not, or if the result is too long (more than 32 bytes as of ICU 3.6), - * then the function returns with an error accordingly. - * - * Also unlike ucnv_setSubstChars(), this function works for stateful charsets - * by converting on the fly at the point of substitution rather than setting - * a fixed byte sequence. - * - * @param cnv The UConverter object. - * @param s The Unicode string. - * @param length The number of UChars in s, or -1 for a NUL-terminated string. - * @param err Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * - * @see ucnv_setSubstChars - * @see ucnv_getSubstChars - * @stable ICU 3.6 - */ -U_STABLE void U_EXPORT2 -ucnv_setSubstString(UConverter *cnv, - const UChar *s, - int32_t length, - UErrorCode *err); - -/** - * Fills in the output parameter, errBytes, with the error characters from the - * last failing conversion. - * - * @param converter the Unicode converter - * @param errBytes the codepage bytes which were in error - * @param len on input the capacity of errBytes, on output the number of - * bytes which were copied to it - * @param err the error status code. - * If the substitution character array is too small, an - * U_INDEX_OUTOFBOUNDS_ERROR will be returned. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getInvalidChars(const UConverter *converter, - char *errBytes, - int8_t *len, - UErrorCode *err); - -/** - * Fills in the output parameter, errChars, with the error characters from the - * last failing conversion. - * - * @param converter the Unicode converter - * @param errUChars the UChars which were in error - * @param len on input the capacity of errUChars, on output the number of - * UChars which were copied to it - * @param err the error status code. - * If the substitution character array is too small, an - * U_INDEX_OUTOFBOUNDS_ERROR will be returned. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getInvalidUChars(const UConverter *converter, - UChar *errUChars, - int8_t *len, - UErrorCode *err); - -/** - * Resets the state of a converter to the default state. This is used - * in the case of an error, to restart a conversion from a known default state. - * It will also empty the internal output buffers. - * @param converter the Unicode converter - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_reset(UConverter *converter); - -/** - * Resets the to-Unicode part of a converter state to the default state. - * This is used in the case of an error to restart a conversion to - * Unicode to a known default state. It will also empty the internal - * output buffers used for the conversion to Unicode codepoints. - * @param converter the Unicode converter - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_resetToUnicode(UConverter *converter); - -/** - * Resets the from-Unicode part of a converter state to the default state. - * This is used in the case of an error to restart a conversion from - * Unicode to a known default state. It will also empty the internal output - * buffers used for the conversion from Unicode codepoints. - * @param converter the Unicode converter - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_resetFromUnicode(UConverter *converter); - -/** - * Returns the maximum number of bytes that are output per UChar in conversion - * from Unicode using this converter. - * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING - * to calculate the size of a target buffer for conversion from Unicode. - * - * Note: Before ICU 2.8, this function did not return reliable numbers for - * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS. - * - * This number may not be the same as the maximum number of bytes per - * "conversion unit". In other words, it may not be the intuitively expected - * number of bytes per character that would be published for a charset, - * and may not fulfill any other purpose than the allocation of an output - * buffer of guaranteed sufficient size for a given input length and converter. - * - * Examples for special cases that are taken into account: - * - Supplementary code points may convert to more bytes than BMP code points. - * This function returns bytes per UChar (UTF-16 code unit), not per - * Unicode code point, for efficient buffer allocation. - * - State-shifting output (SI/SO, escapes, etc.) from stateful converters. - * - When m input UChars are converted to n output bytes, then the maximum m/n - * is taken into account. - * - * The number returned here does not take into account - * (see UCNV_GET_MAX_BYTES_FOR_STRING): - * - callbacks which output more than one charset character sequence per call, - * like escape callbacks - * - initial and final non-character bytes that are output by some converters - * (automatic BOMs, initial escape sequence, final SI, etc.) - * - * Examples for returned values: - * - SBCS charsets: 1 - * - Shift-JIS: 2 - * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) - * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) - * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) - * - ISO-2022: 3 (always outputs UTF-8) - * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS) - * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS) - * - * @param converter The Unicode converter. - * @return The maximum number of bytes per UChar (16 bit code unit) - * that are output by ucnv_fromUnicode(), - * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING - * for buffer allocation. - * - * @see UCNV_GET_MAX_BYTES_FOR_STRING - * @see ucnv_getMinCharSize - * @stable ICU 2.0 - */ -U_STABLE int8_t U_EXPORT2 -ucnv_getMaxCharSize(const UConverter *converter); - -/** - * Calculates the size of a buffer for conversion from Unicode to a charset. - * The calculated size is guaranteed to be sufficient for this conversion. - * - * It takes into account initial and final non-character bytes that are output - * by some converters. - * It does not take into account callbacks which output more than one charset - * character sequence per call, like escape callbacks. - * The default (substitution) callback only outputs one charset character sequence. - * - * @param length Number of UChars to be converted. - * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter - * that will be used. - * @return Size of a buffer that will be large enough to hold the output bytes of - * converting length UChars with the converter that returned the maxCharSize. - * - * @see ucnv_getMaxCharSize - * @stable ICU 2.8 - */ -#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \ - (((int32_t)(length)+10)*(int32_t)(maxCharSize)) - -/** - * Returns the minimum byte length (per codepoint) for characters in this codepage. - * This is usually either 1 or 2. - * @param converter the Unicode converter - * @return the minimum number of bytes per codepoint allowed by this particular converter - * @see ucnv_getMaxCharSize - * @stable ICU 2.0 - */ -U_STABLE int8_t U_EXPORT2 -ucnv_getMinCharSize(const UConverter *converter); - -/** - * Returns the display name of the converter passed in based on the Locale - * passed in. If the locale contains no display name, the internal ASCII - * name will be filled in. - * - * @param converter the Unicode converter. - * @param displayLocale is the specific Locale we want to localized for - * @param displayName user provided buffer to be filled in - * @param displayNameCapacity size of displayName Buffer - * @param err error status code - * @return displayNameLength number of UChar needed in displayName - * @see ucnv_getName - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_getDisplayName(const UConverter *converter, - const char *displayLocale, - UChar *displayName, - int32_t displayNameCapacity, - UErrorCode *err); - -/** - * Gets the internal, canonical name of the converter (zero-terminated). - * The lifetime of the returned string will be that of the converter - * passed to this function. - * @param converter the Unicode converter - * @param err UErrorCode status - * @return the internal name of the converter - * @see ucnv_getDisplayName - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getName(const UConverter *converter, UErrorCode *err); - -/** - * Gets a codepage number associated with the converter. This is not guaranteed - * to be the one used to create the converter. Some converters do not represent - * platform registered codepages and return zero for the codepage number. - * The error code fill-in parameter indicates if the codepage number - * is available. - * Does not check if the converter is NULL or if converter's data - * table is NULL. - * - * Important: The use of CCSIDs is not recommended because it is limited - * to only two platforms in principle and only one (UCNV_IBM) in the current - * ICU converter API. - * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely. - * For more details see ucnv_openCCSID(). - * - * @param converter the Unicode converter - * @param err the error status code. - * @return If any error occurs, -1 will be returned otherwise, the codepage number - * will be returned - * @see ucnv_openCCSID - * @see ucnv_getPlatform - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_getCCSID(const UConverter *converter, - UErrorCode *err); - -/** - * Gets a codepage platform associated with the converter. Currently, - * only UCNV_IBM will be returned. - * Does not test if the converter is NULL or if converter's data - * table is NULL. - * @param converter the Unicode converter - * @param err the error status code. - * @return The codepage platform - * @stable ICU 2.0 - */ -U_STABLE UConverterPlatform U_EXPORT2 -ucnv_getPlatform(const UConverter *converter, - UErrorCode *err); - -/** - * Gets the type of the converter - * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, - * EBCDIC_STATEFUL, LATIN_1 - * @param converter a valid, opened converter - * @return the type of the converter - * @stable ICU 2.0 - */ -U_STABLE UConverterType U_EXPORT2 -ucnv_getType(const UConverter * converter); - -/** - * Gets the "starter" (lead) bytes for converters of type MBCS. - * Will fill in an U_ILLEGAL_ARGUMENT_ERROR if converter passed in - * is not MBCS. Fills in an array of type UBool, with the value of the byte - * as offset to the array. For example, if (starters[0x20] == TRUE) at return, - * it means that the byte 0x20 is a starter byte in this converter. - * Context pointers are always owned by the caller. - * - * @param converter a valid, opened converter of type MBCS - * @param starters an array of size 256 to be filled in - * @param err error status, U_ILLEGAL_ARGUMENT_ERROR if the - * converter is not a type which can return starters. - * @see ucnv_getType - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getStarters(const UConverter* converter, - UBool starters[256], - UErrorCode* err); - - -/** - * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet(). - * @see ucnv_getUnicodeSet - * @stable ICU 2.6 - */ -typedef enum UConverterUnicodeSet { - /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */ - UCNV_ROUNDTRIP_SET, - /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */ - UCNV_ROUNDTRIP_AND_FALLBACK_SET, -#ifndef U_HIDE_DEPRECATED_API - /** - * Number of UConverterUnicodeSet selectors. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCNV_SET_COUNT -#endif // U_HIDE_DEPRECATED_API -} UConverterUnicodeSet; - - -/** - * Returns the set of Unicode code points that can be converted by an ICU converter. - * - * Returns one of several kinds of set: - * - * 1. UCNV_ROUNDTRIP_SET - * - * The set of all Unicode code points that can be roundtrip-converted - * (converted without any data loss) with the converter (ucnv_fromUnicode()). - * This set will not include code points that have fallback mappings - * or are only the result of reverse fallback mappings. - * This set will also not include PUA code points with fallbacks, although - * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback(). - * See UTR #22 "Character Mapping Markup Language" - * at http://www.unicode.org/reports/tr22/ - * - * This is useful for example for - * - checking that a string or document can be roundtrip-converted with a converter, - * without/before actually performing the conversion - * - testing if a converter can be used for text for typical text for a certain locale, - * by comparing its roundtrip set with the set of ExemplarCharacters from - * ICU's locale data or other sources - * - * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET - * - * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode()) - * when fallbacks are turned on (see ucnv_setFallback()). - * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks). - * - * In the future, there may be more UConverterUnicodeSet choices to select - * sets with different properties. - * - * @param cnv The converter for which a set is requested. - * @param setFillIn A valid USet *. It will be cleared by this function before - * the converter's specific set is filled into the USet. - * @param whichSet A UConverterUnicodeSet selector; - * currently UCNV_ROUNDTRIP_SET is the only supported value. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * - * @see UConverterUnicodeSet - * @see uset_open - * @see uset_close - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -ucnv_getUnicodeSet(const UConverter *cnv, - USet *setFillIn, - UConverterUnicodeSet whichSet, - UErrorCode *pErrorCode); - -/** - * Gets the current calback function used by the converter when an illegal - * or invalid codepage sequence is found. - * Context pointers are always owned by the caller. - * - * @param converter the unicode converter - * @param action fillin: returns the callback function pointer - * @param context fillin: returns the callback's private void* context - * @see ucnv_setToUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getToUCallBack (const UConverter * converter, - UConverterToUCallback *action, - const void **context); - -/** - * Gets the current callback function used by the converter when illegal - * or invalid Unicode sequence is found. - * Context pointers are always owned by the caller. - * - * @param converter the unicode converter - * @param action fillin: returns the callback function pointer - * @param context fillin: returns the callback's private void* context - * @see ucnv_setFromUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getFromUCallBack (const UConverter * converter, - UConverterFromUCallback *action, - const void **context); - -/** - * Changes the callback function used by the converter when - * an illegal or invalid sequence is found. - * Context pointers are always owned by the caller. - * Predefined actions and contexts can be found in the ucnv_err.h header. - * - * @param converter the unicode converter - * @param newAction the new callback function - * @param newContext the new toUnicode callback context pointer. This can be NULL. - * @param oldAction fillin: returns the old callback function pointer. This can be NULL. - * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. - * @param err The error code status - * @see ucnv_getToUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setToUCallBack (UConverter * converter, - UConverterToUCallback newAction, - const void* newContext, - UConverterToUCallback *oldAction, - const void** oldContext, - UErrorCode * err); - -/** - * Changes the current callback function used by the converter when - * an illegal or invalid sequence is found. - * Context pointers are always owned by the caller. - * Predefined actions and contexts can be found in the ucnv_err.h header. - * - * @param converter the unicode converter - * @param newAction the new callback function - * @param newContext the new fromUnicode callback context pointer. This can be NULL. - * @param oldAction fillin: returns the old callback function pointer. This can be NULL. - * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. - * @param err The error code status - * @see ucnv_getFromUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setFromUCallBack (UConverter * converter, - UConverterFromUCallback newAction, - const void *newContext, - UConverterFromUCallback *oldAction, - const void **oldContext, - UErrorCode * err); - -/** - * Converts an array of unicode characters to an array of codepage - * characters. This function is optimized for converting a continuous - * stream of data in buffer-sized chunks, where the entire source and - * target does not fit in available buffers. - * - * The source pointer is an in/out parameter. It starts out pointing where the - * conversion is to begin, and ends up pointing after the last UChar consumed. - * - * Target similarly starts out pointer at the first available byte in the output - * buffer, and ends up pointing after the last byte written to the output. - * - * The converter always attempts to consume the entire source buffer, unless - * (1.) the target buffer is full, or (2.) a failing error is returned from the - * current callback function. When a successful error status has been - * returned, it means that all of the source buffer has been - * consumed. At that point, the caller should reset the source and - * sourceLimit pointers to point to the next chunk. - * - * At the end of the stream (flush==TRUE), the input is completely consumed - * when *source==sourceLimit and no error code is set. - * The converter object is then automatically reset by this function. - * (This means that a converter need not be reset explicitly between data - * streams if it finishes the previous stream without errors.) - * - * This is a stateful conversion. Additionally, even when all source data has - * been consumed, some data may be in the converters' internal state. - * Call this function repeatedly, updating the target pointers with - * the next empty chunk of target in case of a - * U_BUFFER_OVERFLOW_ERROR, and updating the source pointers - * with the next chunk of source when a successful error status is - * returned, until there are no more chunks of source data. - * @param converter the Unicode converter - * @param target I/O parameter. Input : Points to the beginning of the buffer to copy - * codepage characters to. Output : points to after the last codepage character copied - * to target. - * @param targetLimit the pointer just after last of the target buffer - * @param source I/O parameter, pointer to pointer to the source Unicode character buffer. - * @param sourceLimit the pointer just after the last of the source buffer - * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number - * of allocated cells as target. Will fill in offsets from target to source pointer - * e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6] - * For output data carried across calls, and other data without a specific source character - * (such as from escape sequences or callbacks) -1 will be placed for offsets. - * @param flush set to TRUE if the current source buffer is the last available - * chunk of the source, FALSE otherwise. Note that if a failing status is returned, - * this function may have to be called multiple times with flush set to TRUE until - * the source buffer is consumed. - * @param err the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the - * converter is NULL. - * U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is - * still data to be written to the target. - * @see ucnv_fromUChars - * @see ucnv_convert - * @see ucnv_getMinCharSize - * @see ucnv_setToUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_fromUnicode (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - UBool flush, - UErrorCode * err); - -/** - * Converts a buffer of codepage bytes into an array of unicode UChars - * characters. This function is optimized for converting a continuous - * stream of data in buffer-sized chunks, where the entire source and - * target does not fit in available buffers. - * - * The source pointer is an in/out parameter. It starts out pointing where the - * conversion is to begin, and ends up pointing after the last byte of source consumed. - * - * Target similarly starts out pointer at the first available UChar in the output - * buffer, and ends up pointing after the last UChar written to the output. - * It does NOT necessarily keep UChar sequences together. - * - * The converter always attempts to consume the entire source buffer, unless - * (1.) the target buffer is full, or (2.) a failing error is returned from the - * current callback function. When a successful error status has been - * returned, it means that all of the source buffer has been - * consumed. At that point, the caller should reset the source and - * sourceLimit pointers to point to the next chunk. - * - * At the end of the stream (flush==TRUE), the input is completely consumed - * when *source==sourceLimit and no error code is set - * The converter object is then automatically reset by this function. - * (This means that a converter need not be reset explicitly between data - * streams if it finishes the previous stream without errors.) - * - * This is a stateful conversion. Additionally, even when all source data has - * been consumed, some data may be in the converters' internal state. - * Call this function repeatedly, updating the target pointers with - * the next empty chunk of target in case of a - * U_BUFFER_OVERFLOW_ERROR, and updating the source pointers - * with the next chunk of source when a successful error status is - * returned, until there are no more chunks of source data. - * @param converter the Unicode converter - * @param target I/O parameter. Input : Points to the beginning of the buffer to copy - * UChars into. Output : points to after the last UChar copied. - * @param targetLimit the pointer just after the end of the target buffer - * @param source I/O parameter, pointer to pointer to the source codepage buffer. - * @param sourceLimit the pointer to the byte after the end of the source buffer - * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number - * of allocated cells as target. Will fill in offsets from target to source pointer - * e.g: offsets[3] is equal to 6, it means that the target[3] was a result of transcoding source[6] - * For output data carried across calls, and other data without a specific source character - * (such as from escape sequences or callbacks) -1 will be placed for offsets. - * @param flush set to TRUE if the current source buffer is the last available - * chunk of the source, FALSE otherwise. Note that if a failing status is returned, - * this function may have to be called multiple times with flush set to TRUE until - * the source buffer is consumed. - * @param err the error status. U_ILLEGAL_ARGUMENT_ERROR will be set if the - * converter is NULL. - * U_BUFFER_OVERFLOW_ERROR will be set if the target is full and there is - * still data to be written to the target. - * @see ucnv_fromUChars - * @see ucnv_convert - * @see ucnv_getMinCharSize - * @see ucnv_setFromUCallBack - * @see ucnv_getNextUChar - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_toUnicode(UConverter *converter, - UChar **target, - const UChar *targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err); - -/** - * Convert the Unicode string into a codepage string using an existing UConverter. - * The output string is NUL-terminated if possible. - * - * This function is a more convenient but less powerful version of ucnv_fromUnicode(). - * It is only useful for whole strings, not for streaming conversion. - * - * The maximum output buffer capacity required (barring output from callbacks) will be - * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)). - * - * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called) - * @param src the input Unicode string - * @param srcLength the input string length, or -1 if NUL-terminated - * @param dest destination string buffer, can be NULL if destCapacity==0 - * @param destCapacity the number of chars available at dest - * @param pErrorCode normal ICU error code; - * common error codes that may be set by this function include - * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, - * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors - * @return the length of the output string, not counting the terminating NUL; - * if the length is greater than destCapacity, then the string will not fit - * and a buffer of the indicated length would need to be passed in - * @see ucnv_fromUnicode - * @see ucnv_convert - * @see UCNV_GET_MAX_BYTES_FOR_STRING - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_fromUChars(UConverter *cnv, - char *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert the codepage string into a Unicode string using an existing UConverter. - * The output string is NUL-terminated if possible. - * - * This function is a more convenient but less powerful version of ucnv_toUnicode(). - * It is only useful for whole strings, not for streaming conversion. - * - * The maximum output buffer capacity required (barring output from callbacks) will be - * 2*srcLength (each char may be converted into a surrogate pair). - * - * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called) - * @param src the input codepage string - * @param srcLength the input string length, or -1 if NUL-terminated - * @param dest destination string buffer, can be NULL if destCapacity==0 - * @param destCapacity the number of UChars available at dest - * @param pErrorCode normal ICU error code; - * common error codes that may be set by this function include - * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, - * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors - * @return the length of the output string, not counting the terminating NUL; - * if the length is greater than destCapacity, then the string will not fit - * and a buffer of the indicated length would need to be passed in - * @see ucnv_toUnicode - * @see ucnv_convert - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_toUChars(UConverter *cnv, - UChar *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a codepage buffer into Unicode one character at a time. - * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set. - * - * Advantage compared to ucnv_toUnicode() or ucnv_toUChars(): - * - Faster for small amounts of data, for most converters, e.g., - * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets. - * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants, - * it uses ucnv_toUnicode() internally.) - * - Convenient. - * - * Limitations compared to ucnv_toUnicode(): - * - Always assumes flush=TRUE. - * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion, - * that is, for where the input is supplied in multiple buffers, - * because ucnv_getNextUChar() will assume the end of the input at the end - * of the first buffer. - * - Does not provide offset output. - * - * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because - * ucnv_getNextUChar() uses the current state of the converter - * (unlike ucnv_toUChars() which always resets first). - * However, if ucnv_getNextUChar() is called after ucnv_toUnicode() - * stopped in the middle of a character sequence (with flush=FALSE), - * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode() - * internally until the next character boundary. - * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to - * start at a character boundary.) - * - * Instead of using ucnv_getNextUChar(), it is recommended - * to convert using ucnv_toUnicode() or ucnv_toUChars() - * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h) - * or a C++ CharacterIterator or similar. - * This allows streaming conversion and offset output, for example. - * - *

Handling of surrogate pairs and supplementary-plane code points:
- * There are two different kinds of codepages that provide mappings for surrogate characters: - *

    - *
  • Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode - * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff. - * Each valid sequence will result in exactly one returned code point. - * If a sequence results in a single surrogate, then that will be returned - * by itself, even if a neighboring sequence encodes the matching surrogate.
  • - *
  • Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points - * including surrogates. Code points in supplementary planes are represented with - * two sequences, each encoding a surrogate. - * For these codepages, matching pairs of surrogates will be combined into single - * code points for returning from this function. - * (Note that SCSU is actually a mix of these codepage types.)
  • - *

- * - * @param converter an open UConverter - * @param source the address of a pointer to the codepage buffer, will be - * updated to point after the bytes consumed in the conversion call. - * @param sourceLimit points to the end of the input buffer - * @param err fills in error status (see ucnv_toUnicode) - * U_INDEX_OUTOFBOUNDS_ERROR will be set if the input - * is empty or does not convert to any output (e.g.: pure state-change - * codes SI/SO, escape sequences for ISO 2022, - * or if the callback did not output anything, ...). - * This function will not set a U_BUFFER_OVERFLOW_ERROR because - * the "buffer" is the return code. However, there might be subsequent output - * stored in the converter object - * that will be returned in following calls to this function. - * @return a UChar32 resulting from the partial conversion of source - * @see ucnv_toUnicode - * @see ucnv_toUChars - * @see ucnv_convert - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -ucnv_getNextUChar(UConverter * converter, - const char **source, - const char * sourceLimit, - UErrorCode * err); - -/** - * Convert from one external charset to another using two existing UConverters. - * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() - - * are used, "pivoting" through 16-bit Unicode. - * - * Important: For streaming conversion (multiple function calls for successive - * parts of a text stream), the caller must provide a pivot buffer explicitly, - * and must preserve the pivot buffer and associated pointers from one - * call to another. (The buffer may be moved if its contents and the relative - * pointer positions are preserved.) - * - * There is a similar function, ucnv_convert(), - * which has the following limitations: - * - it takes charset names, not converter objects, so that - * - two converters are opened for each call - * - only single-string conversion is possible, not streaming operation - * - it does not provide enough information to find out, - * in case of failure, whether the toUnicode or - * the fromUnicode conversion failed - * - * By contrast, ucnv_convertEx() - * - takes UConverter parameters instead of charset names - * - fully exposes the pivot buffer for streaming conversion and complete error handling - * - * ucnv_convertEx() also provides further convenience: - * - an option to reset the converters at the beginning - * (if reset==TRUE, see parameters; - * also sets *pivotTarget=*pivotSource=pivotStart) - * - allow NUL-terminated input - * (only a single NUL byte, will not work for charsets with multi-byte NULs) - * (if sourceLimit==NULL, see parameters) - * - terminate with a NUL on output - * (only a single NUL byte, not useful for charsets with multi-byte NULs), - * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills - * the target buffer - * - the pivot buffer can be provided internally; - * possible only for whole-string conversion, not streaming conversion; - * in this case, the caller will not be able to get details about where an - * error occurred - * (if pivotStart==NULL, see below) - * - * The function returns when one of the following is true: - * - the entire source text has been converted successfully to the target buffer - * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) - * - a conversion error occurred - * (other U_FAILURE(), see description of pErrorCode) - * - * Limitation compared to the direct use of - * ucnv_fromUnicode() and ucnv_toUnicode(): - * ucnv_convertEx() does not provide offset information. - * - * Limitation compared to ucnv_fromUChars() and ucnv_toUChars(): - * ucnv_convertEx() does not support preflighting directly. - * - * Sample code for converting a single string from - * one external charset to UTF-8, ignoring the location of errors: - * - * \code - * int32_t - * myToUTF8(UConverter *cnv, - * const char *s, int32_t length, - * char *u8, int32_t capacity, - * UErrorCode *pErrorCode) { - * UConverter *utf8Cnv; - * char *target; - * - * if(U_FAILURE(*pErrorCode)) { - * return 0; - * } - * - * utf8Cnv=myGetCachedUTF8Converter(pErrorCode); - * if(U_FAILURE(*pErrorCode)) { - * return 0; - * } - * - * if(length<0) { - * length=strlen(s); - * } - * target=u8; - * ucnv_convertEx(utf8Cnv, cnv, - * &target, u8+capacity, - * &s, s+length, - * NULL, NULL, NULL, NULL, - * TRUE, TRUE, - * pErrorCode); - * - * myReleaseCachedUTF8Converter(utf8Cnv); - * - * // return the output string length, but without preflighting - * return (int32_t)(target-u8); - * } - * \endcode - * - * @param targetCnv Output converter, used to convert from the UTF-16 pivot - * to the target using ucnv_fromUnicode(). - * @param sourceCnv Input converter, used to convert from the source to - * the UTF-16 pivot using ucnv_toUnicode(). - * @param target I/O parameter, same as for ucnv_fromUChars(). - * Input: *target points to the beginning of the target buffer. - * Output: *target points to the first unit after the last char written. - * @param targetLimit Pointer to the first unit after the target buffer. - * @param source I/O parameter, same as for ucnv_toUChars(). - * Input: *source points to the beginning of the source buffer. - * Output: *source points to the first unit after the last char read. - * @param sourceLimit Pointer to the first unit after the source buffer. - * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL, - * then an internal buffer is used and the other pivot - * arguments are ignored and can be NULL as well. - * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for - * conversion from the pivot buffer to the target buffer. - * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for - * conversion from the source buffer to the pivot buffer. - * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit - * and pivotStart[0..ucnv_countAvaiable()]) - * @return a pointer a string (library owned), or NULL if the index is out of bounds. - * @see ucnv_countAvailable - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -ucnv_getAvailableName(int32_t n); - -/** - * Returns a UEnumeration to enumerate all of the canonical converter - * names, as per the alias file, regardless of the ability to open each - * converter. - * - * @return A UEnumeration object for getting all the recognized canonical - * converter names. - * @see ucnv_getAvailableName - * @see uenum_close - * @see uenum_next - * @stable ICU 2.4 - */ -U_STABLE UEnumeration * U_EXPORT2 -ucnv_openAllNames(UErrorCode *pErrorCode); - -/** - * Gives the number of aliases for a given converter or alias name. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * This method only enumerates the listed entries in the alias file. - * @param alias alias name - * @param pErrorCode error status - * @return number of names on alias list for given alias - * @stable ICU 2.0 - */ -U_STABLE uint16_t U_EXPORT2 -ucnv_countAliases(const char *alias, UErrorCode *pErrorCode); - -/** - * Gives the name of the alias at given index of alias list. - * This method only enumerates the listed entries in the alias file. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * @param alias alias name - * @param n index in alias list - * @param pErrorCode result of operation - * @return returns the name of the alias at given index - * @see ucnv_countAliases - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode); - -/** - * Fill-up the list of alias names for the given alias. - * This method only enumerates the listed entries in the alias file. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * @param alias alias name - * @param aliases fill-in list, aliases is a pointer to an array of - * ucnv_countAliases() string-pointers - * (const char *) that will be filled in. - * The strings themselves are owned by the library. - * @param pErrorCode result of operation - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode); - -/** - * Return a new UEnumeration object for enumerating all the - * alias names for a given converter that are recognized by a standard. - * This method only enumerates the listed entries in the alias file. - * The convrtrs.txt file can be modified to change the results of - * this function. - * The first result in this list is the same result given by - * ucnv_getStandardName, which is the default alias for - * the specified standard name. The returned object must be closed with - * uenum_close when you are done with the object. - * - * @param convName original converter name - * @param standard name of the standard governing the names; MIME and IANA - * are such standards - * @param pErrorCode The error code - * @return A UEnumeration object for getting all aliases that are recognized - * by a standard. If any of the parameters are invalid, NULL - * is returned. - * @see ucnv_getStandardName - * @see uenum_close - * @see uenum_next - * @stable ICU 2.2 - */ -U_STABLE UEnumeration * U_EXPORT2 -ucnv_openStandardNames(const char *convName, - const char *standard, - UErrorCode *pErrorCode); - -/** - * Gives the number of standards associated to converter names. - * @return number of standards - * @stable ICU 2.0 - */ -U_STABLE uint16_t U_EXPORT2 -ucnv_countStandards(void); - -/** - * Gives the name of the standard at given index of standard list. - * @param n index in standard list - * @param pErrorCode result of operation - * @return returns the name of the standard at given index. Owned by the library. - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode); - -/** - * Returns a standard name for a given converter name. - *

- * Example alias table:
- * conv alias1 { STANDARD1 } alias2 { STANDARD1* } - *

- * Result of ucnv_getStandardName("conv", "STANDARD1") from example - * alias table:
- * "alias2" - * - * @param name original converter name - * @param standard name of the standard governing the names; MIME and IANA - * are such standards - * @param pErrorCode result of operation - * @return returns the standard converter name; - * if a standard converter name cannot be determined, - * then NULL is returned. Owned by the library. - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode); - -/** - * This function will return the internal canonical converter name of the - * tagged alias. This is the opposite of ucnv_openStandardNames, which - * returns the tagged alias given the canonical name. - *

- * Example alias table:
- * conv alias1 { STANDARD1 } alias2 { STANDARD1* } - *

- * Result of ucnv_getStandardName("alias1", "STANDARD1") from example - * alias table:
- * "conv" - * - * @return returns the canonical converter name; - * if a standard or alias name cannot be determined, - * then NULL is returned. The returned string is - * owned by the library. - * @see ucnv_getStandardName - * @stable ICU 2.4 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode); - -/** - * Returns the current default converter name. If you want to open - * a default converter, you do not need to use this function. - * It is faster if you pass a NULL argument to ucnv_open the - * default converter. - * - * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function - * always returns "UTF-8". - * - * @return returns the current default converter name. - * Storage owned by the library - * @see ucnv_setDefaultName - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getDefaultName(void); - -#ifndef U_HIDE_SYSTEM_API -/** - * This function is not thread safe. DO NOT call this function when ANY ICU - * function is being used from more than one thread! This function sets the - * current default converter name. If this function needs to be called, it - * should be called during application initialization. Most of the time, the - * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument - * is sufficient for your application. - * - * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function - * does nothing. - * - * @param name the converter name to be the default (must be known by ICU). - * @see ucnv_getDefaultName - * @system - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setDefaultName(const char *name); -#endif /* U_HIDE_SYSTEM_API */ - -/** - * Fixes the backslash character mismapping. For example, in SJIS, the backslash - * character in the ASCII portion is also used to represent the yen currency sign. - * When mapping from Unicode character 0x005C, it's unclear whether to map the - * character back to yen or backslash in SJIS. This function will take the input - * buffer and replace all the yen sign characters with backslash. This is necessary - * when the user tries to open a file with the input buffer on Windows. - * This function will test the converter to see whether such mapping is - * required. You can sometimes avoid using this function by using the correct version - * of Shift-JIS. - * - * @param cnv The converter representing the target codepage. - * @param source the input buffer to be fixed - * @param sourceLen the length of the input buffer - * @see ucnv_isAmbiguous - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen); - -/** - * Determines if the converter contains ambiguous mappings of the same - * character or not. - * @param cnv the converter to be tested - * @return TRUE if the converter contains ambiguous mapping of the same - * character, FALSE otherwise. - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -ucnv_isAmbiguous(const UConverter *cnv); - -/** - * Sets the converter to use fallback mappings or not. - * Regardless of this flag, the converter will always use - * fallbacks from Unicode Private Use code points, as well as - * reverse fallbacks (to Unicode). - * For details see ".ucm File Format" - * in the Conversion Data chapter of the ICU User Guide: - * http://www.icu-project.org/userguide/conversion-data.html#ucmformat - * - * @param cnv The converter to set the fallback mapping usage on. - * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback - * mapping, FALSE otherwise. - * @stable ICU 2.0 - * @see ucnv_usesFallback - */ -U_STABLE void U_EXPORT2 -ucnv_setFallback(UConverter *cnv, UBool usesFallback); - -/** - * Determines if the converter uses fallback mappings or not. - * This flag has restrictions, see ucnv_setFallback(). - * - * @param cnv The converter to be tested - * @return TRUE if the converter uses fallback, FALSE otherwise. - * @stable ICU 2.0 - * @see ucnv_setFallback - */ -U_STABLE UBool U_EXPORT2 -ucnv_usesFallback(const UConverter *cnv); - -/** - * Detects Unicode signature byte sequences at the start of the byte stream - * and returns the charset name of the indicated Unicode charset. - * NULL is returned when no Unicode signature is recognized. - * The number of bytes in the signature is output as well. - * - * The caller can ucnv_open() a converter using the charset name. - * The first code unit (UChar) from the start of the stream will be U+FEFF - * (the Unicode BOM/signature character) and can usually be ignored. - * - * For most Unicode charsets it is also possible to ignore the indicated - * number of initial stream bytes and start converting after them. - * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which - * this will not work. Therefore, it is best to ignore the first output UChar - * instead of the input signature bytes. - *

- * Usage: - * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature - * - * @param source The source string in which the signature should be detected. - * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte. - * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature - * of the detected UTF. 0 if not detected. - * Can be a NULL pointer. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return The name of the encoding detected. NULL if encoding is not detected. - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -ucnv_detectUnicodeSignature(const char* source, - int32_t sourceLength, - int32_t *signatureLength, - UErrorCode *pErrorCode); - -/** - * Returns the number of UChars held in the converter's internal state - * because more input is needed for completing the conversion. This function is - * useful for mapping semantics of ICU's converter interface to those of iconv, - * and this information is not needed for normal conversion. - * @param cnv The converter in which the input is held - * @param status ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return The number of UChars in the state. -1 if an error is encountered. - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status); - -/** - * Returns the number of chars held in the converter's internal state - * because more input is needed for completing the conversion. This function is - * useful for mapping semantics of ICU's converter interface to those of iconv, - * and this information is not needed for normal conversion. - * @param cnv The converter in which the input is held as internal state - * @param status ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return The number of chars in the state. -1 if an error is encountered. - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status); - -/** - * Returns whether or not the charset of the converter has a fixed number of bytes - * per charset character. - * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS. - * Another example is UTF-32 which is always 4 bytes per character. - * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit - * but a UTF-32 converter encodes each code point with 4 bytes. - * Note: This method is not intended to be used to determine whether the charset has a - * fixed ratio of bytes to Unicode codes units for any particular Unicode encoding form. - * FALSE is returned with the UErrorCode if error occurs or cnv is NULL. - * @param cnv The converter to be tested - * @param status ICU error code in/out paramter - * @return TRUE if the converter is fixed-width - * @stable ICU 4.8 - */ -U_STABLE UBool U_EXPORT2 -ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status); - -#endif - -#endif -/*_UCNV*/ diff --git a/deps/node/deps/icu-small/source/common/unicode/ucnv_cb.h b/deps/node/deps/icu-small/source/common/unicode/ucnv_cb.h deleted file mode 100644 index 632cc0b3..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucnv_cb.h +++ /dev/null @@ -1,164 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2004, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** - * ucnv_cb.h: - * External APIs for the ICU's codeset conversion library - * Helena Shih - * - * Modification History: - * - * Date Name Description - */ - -/** - * \file - * \brief C UConverter functions to aid the writers of callbacks - * - *

Callback API for UConverter

- * - * These functions are provided here for the convenience of the callback - * writer. If you are just looking for callback functions to use, please - * see ucnv_err.h. DO NOT call these functions directly when you are - * working with converters, unless your code has been called as a callback - * via ucnv_setFromUCallback or ucnv_setToUCallback !! - * - * A note about error codes and overflow. Unlike other ICU functions, - * these functions do not expect the error status to be U_ZERO_ERROR. - * Callbacks must be much more careful about their error codes. - * The error codes used here are in/out parameters, which should be passed - * back in the callback's error parameter. - * - * For example, if you call ucnv_cbfromUWriteBytes to write data out - * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if - * the data did not fit in the target. But this isn't a failing error, - * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error - * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes, - * which will also go into the internal overflow buffers. - * - * Concerning offsets, the 'offset' parameters here are relative to the start - * of SOURCE. For example, Suppose the string "ABCD" was being converted - * from Unicode into a codepage which doesn't have a mapping for 'B'. - * 'A' will be written out correctly, but - * The FromU Callback will be called on an unassigned character for 'B'. - * At this point, this is the state of the world: - * Target: A [..] [points after A] - * Source: A B [C] D [points to C - B has been consumed] - * 0 1 2 3 - * codePoint = "B" [the unassigned codepoint] - * - * Now, suppose a callback wants to write the substitution character '?' to - * the target. It calls ucnv_cbFromUWriteBytes() to write the ?. - * It should pass ZERO as the offset, because the offset as far as the - * callback is concerned is relative to the SOURCE pointer [which points - * before 'C'.] If the callback goes into the args and consumes 'C' also, - * it would call FromUWriteBytes with an offset of 1 (and advance the source - * pointer). - * - */ - -#ifndef UCNV_CB_H -#define UCNV_CB_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" - -/** - * ONLY used by FromU callback functions. - * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers. - * - * @param args callback fromUnicode arguments - * @param source source bytes to write - * @param length length of bytes to write - * @param offsetIndex the relative offset index from callback. - * @param err error status. If U_BUFFER_OVERFLOW is returned, then U_BUFFER_OVERFLOW must - * be returned to the user, because it means that not all data could be written into the target buffer, and some is - * in the converter error buffer. - * @see ucnv_cbFromUWriteSub - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args, - const char* source, - int32_t length, - int32_t offsetIndex, - UErrorCode * err); - -/** - * ONLY used by FromU callback functions. - * This function will write out the correct substitution character sequence - * to the target. - * - * @param args callback fromUnicode arguments - * @param offsetIndex the relative offset index from the current source pointer to be used - * @param err error status. If U_BUFFER_OVERFLOW is returned, then U_BUFFER_OVERFLOW must - * be returned to the user, because it means that not all data could be written into the target buffer, and some is - * in the converter error buffer. - * @see ucnv_cbFromUWriteBytes - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, - int32_t offsetIndex, - UErrorCode * err); - -/** - * ONLY used by fromU callback functions. - * This function will write out the error character(s) to the target UChar buffer. - * - * @param args callback fromUnicode arguments - * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed] - * @param sourceLimit pointer after last UChar to write - * @param offsetIndex the relative offset index from callback which will be set - * @param err error status U_BUFFER_OVERFLOW - * @see ucnv_cbToUWriteSub - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args, - const UChar** source, - const UChar* sourceLimit, - int32_t offsetIndex, - UErrorCode * err); - -/** - * ONLY used by ToU callback functions. - * This function will write out the specified characters to the target - * UChar buffer. - * - * @param args callback toUnicode arguments - * @param source source string to write - * @param length the length of source string - * @param offsetIndex the relative offset index which will be written. - * @param err error status U_BUFFER_OVERFLOW - * @see ucnv_cbToUWriteSub - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args, - const UChar* source, - int32_t length, - int32_t offsetIndex, - UErrorCode * err); - -/** - * ONLY used by ToU callback functions. - * This function will write out the Unicode substitution character (U+FFFD). - * - * @param args callback fromUnicode arguments - * @param offsetIndex the relative offset index from callback. - * @param err error status U_BUFFER_OVERFLOW - * @see ucnv_cbToUWriteUChars - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args, - int32_t offsetIndex, - UErrorCode * err); -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ucnv_err.h b/deps/node/deps/icu-small/source/common/unicode/ucnv_err.h deleted file mode 100644 index 08c96c14..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucnv_err.h +++ /dev/null @@ -1,465 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2009, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** - * - * - * ucnv_err.h: - */ - -/** - * \file - * \brief C UConverter predefined error callbacks - * - *

Error Behaviour Functions

- * Defines some error behaviour functions called by ucnv_{from,to}Unicode - * These are provided as part of ICU and many are stable, but they - * can also be considered only as an example of what can be done with - * callbacks. You may of course write your own. - * - * If you want to write your own, you may also find the functions from - * ucnv_cb.h useful when writing your own callbacks. - * - * These functions, although public, should NEVER be called directly. - * They should be used as parameters to the ucnv_setFromUCallback - * and ucnv_setToUCallback functions, to set the behaviour of a converter - * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. - * - * usage example: 'STOP' doesn't need any context, but newContext - * could be set to something other than 'NULL' if needed. The available - * contexts in this header can modify the default behavior of the callback. - * - * \code - * UErrorCode err = U_ZERO_ERROR; - * UConverter *myConverter = ucnv_open("ibm-949", &err); - * const void *oldContext; - * UConverterFromUCallback oldAction; - * - * - * if (U_SUCCESS(err)) - * { - * ucnv_setFromUCallBack(myConverter, - * UCNV_FROM_U_CALLBACK_STOP, - * NULL, - * &oldAction, - * &oldContext, - * &status); - * } - * \endcode - * - * The code above tells "myConverter" to stop when it encounters an - * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from - * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, - * and ucnv_setToUCallBack would need to be called in order to change - * that behavior too. - * - * Here is an example with a context: - * - * \code - * UErrorCode err = U_ZERO_ERROR; - * UConverter *myConverter = ucnv_open("ibm-949", &err); - * const void *oldContext; - * UConverterFromUCallback oldAction; - * - * - * if (U_SUCCESS(err)) - * { - * ucnv_setToUCallBack(myConverter, - * UCNV_TO_U_CALLBACK_SUBSTITUTE, - * UCNV_SUB_STOP_ON_ILLEGAL, - * &oldAction, - * &oldContext, - * &status); - * } - * \endcode - * - * The code above tells "myConverter" to stop when it encounters an - * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from - * Codepage -> Unicode. Any unmapped and legal characters will be - * substituted to be the default substitution character. - */ - -#ifndef UCNV_ERR_H -#define UCNV_ERR_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -/** Forward declaring the UConverter structure. @stable ICU 2.0 */ -struct UConverter; - -/** @stable ICU 2.0 */ -typedef struct UConverter UConverter; - -/** - * FROM_U, TO_U context options for sub callback - * @stable ICU 2.0 - */ -#define UCNV_SUB_STOP_ON_ILLEGAL "i" - -/** - * FROM_U, TO_U context options for skip callback - * @stable ICU 2.0 - */ -#define UCNV_SKIP_STOP_ON_ILLEGAL "i" - -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_ICU NULL -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_JAVA "J" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) - * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_C "C" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_XML_DEC "D" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_XML_HEX "X" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_UNICODE "U" - -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H, that is, - * a backslash, 1..6 hex digits, and a space) - * @stable ICU 4.0 - */ -#define UCNV_ESCAPE_CSS2 "S" - -/** - * The process condition code to be used with the callbacks. - * Codes which are greater than UCNV_IRREGULAR should be - * passed on to any chained callbacks. - * @stable ICU 2.0 - */ -typedef enum { - UCNV_UNASSIGNED = 0, /**< The code point is unassigned. - The error code U_INVALID_CHAR_FOUND will be set. */ - UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, - \\x81\\x2E is illegal in SJIS because \\x2E - is not a valid trail byte for the \\x81 - lead byte. - Also, starting with Unicode 3.0.1, non-shortest byte sequences - in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) - are also illegal, not just irregular. - The error code U_ILLEGAL_CHAR_FOUND will be set. */ - UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in - the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF - are irregular UTF-8 byte sequences for single surrogate - code points. - The error code U_INVALID_CHAR_FOUND will be set. */ - UCNV_RESET = 3, /**< The callback is called with this reason when a - 'reset' has occurred. Callback should reset all - state. */ - UCNV_CLOSE = 4, /**< Called when the converter is closed. The - callback should release any allocated memory.*/ - UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the - converter. the pointer available as the - 'context' is an alias to the original converters' - context pointer. If the context must be owned - by the new converter, the callback must clone - the data and call ucnv_setFromUCallback - (or setToUCallback) with the correct pointer. - @stable ICU 2.2 - */ -} UConverterCallbackReason; - - -/** - * The structure for the fromUnicode callback function parameter. - * @stable ICU 2.0 - */ -typedef struct { - uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ - UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ - UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ - const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ - const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ - char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ - const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ -} UConverterFromUnicodeArgs; - - -/** - * The structure for the toUnicode callback function parameter. - * @stable ICU 2.0 - */ -typedef struct { - uint16_t size; /**< The size of this struct @stable ICU 2.0 */ - UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ - UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ - const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ - const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ - UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ - const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ -} UConverterToUnicodeArgs; - - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * - * @param context Pointer to the callback's private data - * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err This should always be set to a failure status prior to calling. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - - - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * - * @param context Pointer to the callback's private data - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err This should always be set to a failure status prior to calling. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback skips any ILLEGAL_SEQUENCE, or - * skips only UNASSINGED_SEQUENCE depending on the context parameter - * simply ignoring those characters. - * - * @param context The function currently recognizes the callback options: - * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Skips any ILLEGAL_SEQUENCE - * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or - * UNASSIGNED_SEQUENCE depending on context parameter, with the - * current substitution string for the converter. This is the default - * callback. - * - * @param context The function currently recognizes the callback options: - * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Substitutes any ILLEGAL_SEQUENCE - * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @see ucnv_setSubstChars - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the - * hexadecimal representation of the illegal codepoints - * - * @param context The function currently recognizes the callback options: - *
    - *
  • UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal - * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). - * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * %UD84D%UDC56
  • - *
  • UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal - * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). - * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * \\uD84D\\uDC56
  • - *
  • UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal - * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). - * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * \\U00023456
  • - *
  • UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal - * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly. - * In the Event the converter doesn't support the characters {&,#}[0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * &#144470; and Zero padding is ignored.
  • - *
  • UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal - * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly. - * In the Event the converter doesn't support the characters {&,#,x}[0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * \htmlonly&#x23456;\endhtmlonly
  • - *
- * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback skips any ILLEGAL_SEQUENCE, or - * skips only UNASSINGED_SEQUENCE depending on the context parameter - * simply ignoring those characters. - * - * @param context The function currently recognizes the callback options: - * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Skips any ILLEGAL_SEQUENCE - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or - * UNASSIGNED_SEQUENCE depending on context parameter, with the - * Unicode substitution character, U+FFFD. - * - * @param context The function currently recognizes the callback options: - * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Substitutes any ILLEGAL_SEQUENCE - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the - * hexadecimal representation of the illegal bytes - * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). - * - * @param context This function currently recognizes the callback options: - * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, - * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ - -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -#endif - -#endif - -/*UCNV_ERR_H*/ diff --git a/deps/node/deps/icu-small/source/common/unicode/ucnvsel.h b/deps/node/deps/icu-small/source/common/unicode/ucnvsel.h deleted file mode 100644 index 5fee53f1..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucnvsel.h +++ /dev/null @@ -1,189 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2008-2011, International Business Machines -* Corporation, Google and others. All Rights Reserved. -* -******************************************************************************* -*/ -/* - * Author : eldawy@google.com (Mohamed Eldawy) - * ucnvsel.h - * - * Purpose: To generate a list of encodings capable of handling - * a given Unicode text - * - * Started 09-April-2008 - */ - -#ifndef __ICU_UCNV_SEL_H__ -#define __ICU_UCNV_SEL_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/uset.h" -#include "unicode/utf16.h" -#include "unicode/uenum.h" -#include "unicode/ucnv.h" -#include "unicode/localpointer.h" - -/** - * \file - * - * A converter selector is built with a set of encoding/charset names - * and given an input string returns the set of names of the - * corresponding converters which can convert the string. - * - * A converter selector can be serialized into a buffer and reopened - * from the serialized form. - */ - -/** - * @{ - * The selector data structure - */ -struct UConverterSelector; -typedef struct UConverterSelector UConverterSelector; -/** @} */ - -/** - * Open a selector. - * If converterListSize is 0, build for all available converters. - * If excludedCodePoints is NULL, don't exclude any code points. - * - * @param converterList a pointer to encoding names needed to be involved. - * Can be NULL if converterListSize==0. - * The list and the names will be cloned, and the caller - * retains ownership of the original. - * @param converterListSize number of encodings in above list. - * If 0, builds a selector for all available converters. - * @param excludedCodePoints a set of code points to be excluded from consideration. - * That is, excluded code points in a string do not change - * the selection result. (They might be handled by a callback.) - * Use NULL to exclude nothing. - * @param whichSet what converter set to use? Use this to determine whether - * to consider only roundtrip mappings or also fallbacks. - * @param status an in/out ICU UErrorCode - * @return the new selector - * - * @stable ICU 4.2 - */ -U_STABLE UConverterSelector* U_EXPORT2 -ucnvsel_open(const char* const* converterList, int32_t converterListSize, - const USet* excludedCodePoints, - const UConverterUnicodeSet whichSet, UErrorCode* status); - -/** - * Closes a selector. - * If any Enumerations were returned by ucnv_select*, they become invalid. - * They can be closed before or after calling ucnv_closeSelector, - * but should never be used after the selector is closed. - * - * @see ucnv_selectForString - * @see ucnv_selectForUTF8 - * - * @param sel selector to close - * - * @stable ICU 4.2 - */ -U_STABLE void U_EXPORT2 -ucnvsel_close(UConverterSelector *sel); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUConverterSelectorPointer - * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close); - -U_NAMESPACE_END - -#endif - -/** - * Open a selector from its serialized form. - * The buffer must remain valid and unchanged for the lifetime of the selector. - * This is much faster than creating a selector from scratch. - * Using a serialized form from a different machine (endianness/charset) is supported. - * - * @param buffer pointer to the serialized form of a converter selector; - * must be 32-bit-aligned - * @param length the capacity of this buffer (can be equal to or larger than - * the actual data length) - * @param status an in/out ICU UErrorCode - * @return the new selector - * - * @stable ICU 4.2 - */ -U_STABLE UConverterSelector* U_EXPORT2 -ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status); - -/** - * Serialize a selector into a linear buffer. - * The serialized form is portable to different machines. - * - * @param sel selector to consider - * @param buffer pointer to 32-bit-aligned memory to be filled with the - * serialized form of this converter selector - * @param bufferCapacity the capacity of this buffer - * @param status an in/out ICU UErrorCode - * @return the required buffer capacity to hold serialize data (even if the call fails - * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity) - * - * @stable ICU 4.2 - */ -U_STABLE int32_t U_EXPORT2 -ucnvsel_serialize(const UConverterSelector* sel, - void* buffer, int32_t bufferCapacity, UErrorCode* status); - -/** - * Select converters that can map all characters in a UTF-16 string, - * ignoring the excluded code points. - * - * @param sel a selector - * @param s UTF-16 string - * @param length length of the string, or -1 if NUL-terminated - * @param status an in/out ICU UErrorCode - * @return an enumeration containing encoding names. - * The returned encoding names and their order will be the same as - * supplied when building the selector. - * - * @stable ICU 4.2 - */ -U_STABLE UEnumeration * U_EXPORT2 -ucnvsel_selectForString(const UConverterSelector* sel, - const UChar *s, int32_t length, UErrorCode *status); - -/** - * Select converters that can map all characters in a UTF-8 string, - * ignoring the excluded code points. - * - * @param sel a selector - * @param s UTF-8 string - * @param length length of the string, or -1 if NUL-terminated - * @param status an in/out ICU UErrorCode - * @return an enumeration containing encoding names. - * The returned encoding names and their order will be the same as - * supplied when building the selector. - * - * @stable ICU 4.2 - */ -U_STABLE UEnumeration * U_EXPORT2 -ucnvsel_selectForUTF8(const UConverterSelector* sel, - const char *s, int32_t length, UErrorCode *status); - -#endif /* !UCONFIG_NO_CONVERSION */ - -#endif /* __ICU_UCNV_SEL_H__ */ diff --git a/deps/node/deps/icu-small/source/common/unicode/uconfig.h b/deps/node/deps/icu-small/source/common/unicode/uconfig.h deleted file mode 100644 index 3a7d2db9..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uconfig.h +++ /dev/null @@ -1,444 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: uconfig.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002sep19 -* created by: Markus W. Scherer -*/ - -#ifndef __UCONFIG_H__ -#define __UCONFIG_H__ - - -/*! - * \file - * \brief User-configurable settings - * - * Miscellaneous switches: - * - * A number of macros affect a variety of minor aspects of ICU. - * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h) - * and moved here to make them easier to find. - * - * Switches for excluding parts of ICU library code modules: - * - * Changing these macros allows building partial, smaller libraries for special purposes. - * By default, all modules are built. - * The switches are fairly coarse, controlling large modules. - * Basic services cannot be turned off. - * - * Building with any of these options does not guarantee that the - * ICU build process will completely work. It is recommended that - * the ICU libraries and data be built using the normal build. - * At that time you should remove the data used by those services. - * After building the ICU data library, you should rebuild the ICU - * libraries with these switches customized to your needs. - * - * @stable ICU 2.4 - */ - -/** - * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h" - * prior to determining default settings for uconfig variables. - * - * @internal ICU 4.0 - */ -#if defined(UCONFIG_USE_LOCAL) -#include "uconfig_local.h" -#endif - -/** - * \def U_DEBUG - * Determines whether to include debugging code. - * Automatically set on Windows, but most compilers do not have - * related predefined macros. - * @internal - */ -#ifdef U_DEBUG - /* Use the predefined value. */ -#elif defined(_DEBUG) - /* - * _DEBUG is defined by Visual Studio debug compilation. - * Do *not* test for its NDEBUG macro: It is an orthogonal macro - * which disables assert(). - */ -# define U_DEBUG 1 -# else -# define U_DEBUG 0 -#endif - -/** - * Determines whether to enable auto cleanup of libraries. - * @internal - */ -#ifndef UCLN_NO_AUTO_CLEANUP -#define UCLN_NO_AUTO_CLEANUP 1 -#endif - -/** - * \def U_DISABLE_RENAMING - * Determines whether to disable renaming or not. - * @internal - */ -#ifndef U_DISABLE_RENAMING -#define U_DISABLE_RENAMING 0 -#endif - -/** - * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS - * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h. - * utypes.h includes those headers if this macro is defined to 0. - * Otherwise, each those headers must be included explicitly when using one of their macros. - * Defaults to 0 for backward compatibility, except inside ICU. - * @stable ICU 49 - */ -#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS - /* Use the predefined value. */ -#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \ - defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \ - defined(U_TOOLUTIL_IMPLEMENTATION) -# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1 -#else -# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0 -#endif - -/** - * \def U_OVERRIDE_CXX_ALLOCATION - * Determines whether to override new and delete. - * ICU is normally built such that all of its C++ classes, via their UMemory base, - * override operators new and delete to use its internal, customizable, - * non-exception-throwing memory allocation functions. (Default value 1 for this macro.) - * - * This is especially important when the application and its libraries use multiple heaps. - * For example, on Windows, this allows the ICU DLL to be used by - * applications that statically link the C Runtime library. - * - * @stable ICU 2.2 - */ -#ifndef U_OVERRIDE_CXX_ALLOCATION -#define U_OVERRIDE_CXX_ALLOCATION 1 -#endif - -/** - * \def U_ENABLE_TRACING - * Determines whether to enable tracing. - * @internal - */ -#ifndef U_ENABLE_TRACING -#define U_ENABLE_TRACING 0 -#endif - -/** - * \def UCONFIG_ENABLE_PLUGINS - * Determines whether to enable ICU plugins. - * @internal - */ -#ifndef UCONFIG_ENABLE_PLUGINS -#define UCONFIG_ENABLE_PLUGINS 0 -#endif - -/** - * \def U_ENABLE_DYLOAD - * Whether to enable Dynamic loading in ICU. - * @internal - */ -#ifndef U_ENABLE_DYLOAD -#define U_ENABLE_DYLOAD 1 -#endif - -/** - * \def U_CHECK_DYLOAD - * Whether to test Dynamic loading as an OS capability. - * @internal - */ -#ifndef U_CHECK_DYLOAD -#define U_CHECK_DYLOAD 1 -#endif - -/** - * \def U_DEFAULT_SHOW_DRAFT - * Do we allow ICU users to use the draft APIs by default? - * @internal - */ -#ifndef U_DEFAULT_SHOW_DRAFT -#define U_DEFAULT_SHOW_DRAFT 1 -#endif - -/*===========================================================================*/ -/* Custom icu entry point renaming */ -/*===========================================================================*/ - -/** - * \def U_HAVE_LIB_SUFFIX - * 1 if a custom library suffix is set. - * @internal - */ -#ifdef U_HAVE_LIB_SUFFIX - /* Use the predefined value. */ -#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN) -# define U_HAVE_LIB_SUFFIX 1 -#endif - -/** - * \def U_LIB_SUFFIX_C_NAME_STRING - * Defines the library suffix as a string with C syntax. - * @internal - */ -#ifdef U_LIB_SUFFIX_C_NAME_STRING - /* Use the predefined value. */ -#elif defined(U_LIB_SUFFIX_C_NAME) -# define CONVERT_TO_STRING(s) #s -# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME) -#else -# define U_LIB_SUFFIX_C_NAME_STRING "" -#endif - -/* common/i18n library switches --------------------------------------------- */ - -/** - * \def UCONFIG_ONLY_COLLATION - * This switch turns off modules that are not needed for collation. - * - * It does not turn off legacy conversion because that is necessary - * for ICU to work on EBCDIC platforms (for the default converter). - * If you want "only collation" and do not build for EBCDIC, - * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_ONLY_COLLATION -# define UCONFIG_ONLY_COLLATION 0 -#endif - -#if UCONFIG_ONLY_COLLATION - /* common library */ -# define UCONFIG_NO_BREAK_ITERATION 1 -# define UCONFIG_NO_IDNA 1 - - /* i18n library */ -# if UCONFIG_NO_COLLATION -# error Contradictory collation switches in uconfig.h. -# endif -# define UCONFIG_NO_FORMATTING 1 -# define UCONFIG_NO_TRANSLITERATION 1 -# define UCONFIG_NO_REGULAR_EXPRESSIONS 1 -#endif - -/* common library switches -------------------------------------------------- */ - -/** - * \def UCONFIG_NO_FILE_IO - * This switch turns off all file access in the common library - * where file access is only used for data loading. - * ICU data must then be provided in the form of a data DLL (or with an - * equivalent way to link to the data residing in an executable, - * as in building a combined library with both the common library's code and - * the data), or via udata_setCommonData(). - * Application data must be provided via udata_setAppData() or by using - * "open" functions that take pointers to data, for example ucol_openBinary(). - * - * File access is not used at all in the i18n library. - * - * File access cannot be turned off for the icuio library or for the ICU - * test suites and ICU tools. - * - * @stable ICU 3.6 - */ -#ifndef UCONFIG_NO_FILE_IO -# define UCONFIG_NO_FILE_IO 0 -#endif - -#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR) -# error Contradictory file io switches in uconfig.h. -#endif - -/** - * \def UCONFIG_NO_CONVERSION - * ICU will not completely build (compiling the tools fails) with this - * switch turned on. - * This switch turns off all converters. - * - * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 - * in utypes.h if char* strings in your environment are always in UTF-8. - * - * @stable ICU 3.2 - * @see U_CHARSET_IS_UTF8 - */ -#ifndef UCONFIG_NO_CONVERSION -# define UCONFIG_NO_CONVERSION 0 -#endif - -#if UCONFIG_NO_CONVERSION -# define UCONFIG_NO_LEGACY_CONVERSION 1 -#endif - -/** - * \def UCONFIG_ONLY_HTML_CONVERSION - * This switch turns off all of the converters NOT listed in - * the HTML encoding standard: - * http://www.w3.org/TR/encoding/#names-and-labels - * - * This is not possible on EBCDIC platforms - * because they need ibm-37 or ibm-1047 default converters. - * - * @stable ICU 55 - */ -#ifndef UCONFIG_ONLY_HTML_CONVERSION -# define UCONFIG_ONLY_HTML_CONVERSION 0 -#endif - -/** - * \def UCONFIG_NO_LEGACY_CONVERSION - * This switch turns off all converters except for - * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) - * - US-ASCII - * - ISO-8859-1 - * - * Turning off legacy conversion is not possible on EBCDIC platforms - * because they need ibm-37 or ibm-1047 default converters. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_LEGACY_CONVERSION -# define UCONFIG_NO_LEGACY_CONVERSION 0 -#endif - -/** - * \def UCONFIG_NO_NORMALIZATION - * This switch turns off normalization. - * It implies turning off several other services as well, for example - * collation and IDNA. - * - * @stable ICU 2.6 - */ -#ifndef UCONFIG_NO_NORMALIZATION -# define UCONFIG_NO_NORMALIZATION 0 -#endif - -#if UCONFIG_NO_NORMALIZATION - /* common library */ - /* ICU 50 CJK dictionary BreakIterator uses normalization */ -# define UCONFIG_NO_BREAK_ITERATION 1 - /* IDNA (UTS #46) is implemented via normalization */ -# define UCONFIG_NO_IDNA 1 - - /* i18n library */ -# if UCONFIG_ONLY_COLLATION -# error Contradictory collation switches in uconfig.h. -# endif -# define UCONFIG_NO_COLLATION 1 -# define UCONFIG_NO_TRANSLITERATION 1 -#endif - -/** - * \def UCONFIG_NO_BREAK_ITERATION - * This switch turns off break iteration. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_BREAK_ITERATION -# define UCONFIG_NO_BREAK_ITERATION 0 -#endif - -/** - * \def UCONFIG_NO_IDNA - * This switch turns off IDNA. - * - * @stable ICU 2.6 - */ -#ifndef UCONFIG_NO_IDNA -# define UCONFIG_NO_IDNA 0 -#endif - -/** - * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE - * Determines the default UMessagePatternApostropheMode. - * See the documentation for that enum. - * - * @stable ICU 4.8 - */ -#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE -# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL -#endif - -/* i18n library switches ---------------------------------------------------- */ - -/** - * \def UCONFIG_NO_COLLATION - * This switch turns off collation and collation-based string search. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_COLLATION -# define UCONFIG_NO_COLLATION 0 -#endif - -/** - * \def UCONFIG_NO_FORMATTING - * This switch turns off formatting and calendar/timezone services. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_FORMATTING -# define UCONFIG_NO_FORMATTING 0 -#endif - -/** - * \def UCONFIG_NO_TRANSLITERATION - * This switch turns off transliteration. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_TRANSLITERATION -# define UCONFIG_NO_TRANSLITERATION 0 -#endif - -/** - * \def UCONFIG_NO_REGULAR_EXPRESSIONS - * This switch turns off regular expressions. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS -# define UCONFIG_NO_REGULAR_EXPRESSIONS 0 -#endif - -/** - * \def UCONFIG_NO_SERVICE - * This switch turns off service registration. - * - * @stable ICU 3.2 - */ -#ifndef UCONFIG_NO_SERVICE -# define UCONFIG_NO_SERVICE 0 -#endif - -/** - * \def UCONFIG_HAVE_PARSEALLINPUT - * This switch turns on the "parse all input" attribute. Binary incompatible. - * - * @internal - */ -#ifndef UCONFIG_HAVE_PARSEALLINPUT -# define UCONFIG_HAVE_PARSEALLINPUT 1 -#endif - -/** - * \def UCONFIG_NO_FILTERED_BREAK_ITERATION - * This switch turns off filtered break iteration code. - * - * @internal - */ -#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION -# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0 -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ucpmap.h b/deps/node/deps/icu-small/source/common/unicode/ucpmap.h deleted file mode 100644 index f2c42b6b..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucpmap.h +++ /dev/null @@ -1,162 +0,0 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// ucpmap.h -// created: 2018sep03 Markus W. Scherer - -#ifndef __UCPMAP_H__ -#define __UCPMAP_H__ - -#include "unicode/utypes.h" - -#ifndef U_HIDE_DRAFT_API - -U_CDECL_BEGIN - -/** - * \file - * - * This file defines an abstract map from Unicode code points to integer values. - * - * @see UCPMap - * @see UCPTrie - * @see UMutableCPTrie - */ - -/** - * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. - * - * @see UCPTrie - * @see UMutableCPTrie - * @draft ICU 63 - */ -typedef struct UCPMap UCPMap; - -/** - * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates. - * Most users should use UCPMAP_RANGE_NORMAL. - * - * @see ucpmap_getRange - * @see ucptrie_getRange - * @see umutablecptrie_getRange - * @draft ICU 63 - */ -enum UCPMapRangeOption { - /** - * ucpmap_getRange() enumerates all same-value ranges as stored in the map. - * Most users should use this option. - * @draft ICU 63 - */ - UCPMAP_RANGE_NORMAL, - /** - * ucpmap_getRange() enumerates all same-value ranges as stored in the map, - * except that lead surrogates (U+D800..U+DBFF) are treated as having the - * surrogateValue, which is passed to getRange() as a separate parameter. - * The surrogateValue is not transformed via filter(). - * See U_IS_LEAD(c). - * - * Most users should use UCPMAP_RANGE_NORMAL instead. - * - * This option is useful for maps that map surrogate code *units* to - * special values optimized for UTF-16 string processing - * or for special error behavior for unpaired surrogates, - * but those values are not to be associated with the lead surrogate code *points*. - * @draft ICU 63 - */ - UCPMAP_RANGE_FIXED_LEAD_SURROGATES, - /** - * ucpmap_getRange() enumerates all same-value ranges as stored in the map, - * except that all surrogates (U+D800..U+DFFF) are treated as having the - * surrogateValue, which is passed to getRange() as a separate parameter. - * The surrogateValue is not transformed via filter(). - * See U_IS_SURROGATE(c). - * - * Most users should use UCPMAP_RANGE_NORMAL instead. - * - * This option is useful for maps that map surrogate code *units* to - * special values optimized for UTF-16 string processing - * or for special error behavior for unpaired surrogates, - * but those values are not to be associated with the lead surrogate code *points*. - * @draft ICU 63 - */ - UCPMAP_RANGE_FIXED_ALL_SURROGATES -}; -#ifndef U_IN_DOXYGEN -typedef enum UCPMapRangeOption UCPMapRangeOption; -#endif - -/** - * Returns the value for a code point as stored in the map, with range checking. - * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF. - * - * @param map the map - * @param c the code point - * @return the map value, - * or an implementation-defined error value if the code point is not in the range 0..U+10FFFF - * @draft ICU 63 - */ -U_CAPI uint32_t U_EXPORT2 -ucpmap_get(const UCPMap *map, UChar32 c); - -/** - * Callback function type: Modifies a map value. - * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). - * The modified value will be returned by the getRange function. - * - * Can be used to ignore some of the value bits, - * make a filter for one of several values, - * return a value index computed from the map value, etc. - * - * @param context an opaque pointer, as passed into the getRange function - * @param value a value from the map - * @return the modified value - * @draft ICU 63 - */ -typedef uint32_t U_CALLCONV -UCPMapValueFilter(const void *context, uint32_t value); - -/** - * Returns the last code point such that all those from start to there have the same value. - * Can be used to efficiently iterate over all same-value ranges in a map. - * (This is normally faster than iterating over code points and get()ting each value, - * but much slower than a data structure that stores ranges directly.) - * - * If the UCPMapValueFilter function pointer is not NULL, then - * the value to be delivered is passed through that function, and the return value is the end - * of the range where all values are modified to the same actual value. - * The value is unchanged if that function pointer is NULL. - * - * Example: - * \code - * UChar32 start = 0, end; - * uint32_t value; - * while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, - * NULL, NULL, &value)) >= 0) { - * // Work with the range start..end and its value. - * start = end + 1; - * } - * \endcode - * - * @param map the map - * @param start range start - * @param option defines whether surrogates are treated normally, - * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL - * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL - * @param filter a pointer to a function that may modify the map data value, - * or NULL if the values from the map are to be used unmodified - * @param context an opaque pointer that is passed on to the filter function - * @param pValue if not NULL, receives the value that every code point start..end has; - * may have been modified by filter(context, map value) - * if that function pointer is not NULL - * @return the range end code point, or -1 if start is not a valid code point - * @draft ICU 63 - */ -U_CAPI UChar32 U_EXPORT2 -ucpmap_getRange(const UCPMap *map, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue); - -U_CDECL_END - -#endif // U_HIDE_DRAFT_API -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ucptrie.h b/deps/node/deps/icu-small/source/common/unicode/ucptrie.h deleted file mode 100644 index 2718c984..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucptrie.h +++ /dev/null @@ -1,646 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// ucptrie.h (modified from utrie2.h) -// created: 2017dec29 Markus W. Scherer - -#ifndef __UCPTRIE_H__ -#define __UCPTRIE_H__ - -#include "unicode/utypes.h" - -#ifndef U_HIDE_DRAFT_API - -#include "unicode/localpointer.h" -#include "unicode/ucpmap.h" -#include "unicode/utf8.h" - -U_CDECL_BEGIN - -/** - * \file - * - * This file defines an immutable Unicode code point trie. - * - * @see UCPTrie - * @see UMutableCPTrie - */ - -#ifndef U_IN_DOXYGEN -/** @internal */ -typedef union UCPTrieData { - /** @internal */ - const void *ptr0; - /** @internal */ - const uint16_t *ptr16; - /** @internal */ - const uint32_t *ptr32; - /** @internal */ - const uint8_t *ptr8; -} UCPTrieData; -#endif - -/** - * Immutable Unicode code point trie structure. - * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. - * For details see http://site.icu-project.org/design/struct/utrie - * - * Do not access UCPTrie fields directly; use public functions and macros. - * Functions are easy to use: They support all trie types and value widths. - * - * When performance is really important, macros provide faster access. - * Most macros are specific to either "fast" or "small" tries, see UCPTrieType. - * There are "fast" macros for special optimized use cases. - * - * The macros will return bogus values, or may crash, if used on the wrong type or value width. - * - * @see UMutableCPTrie - * @draft ICU 63 - */ -struct UCPTrie { -#ifndef U_IN_DOXYGEN - /** @internal */ - const uint16_t *index; - /** @internal */ - UCPTrieData data; - - /** @internal */ - int32_t indexLength; - /** @internal */ - int32_t dataLength; - /** Start of the last range which ends at U+10FFFF. @internal */ - UChar32 highStart; - /** highStart>>12 @internal */ - uint16_t shifted12HighStart; - - /** @internal */ - int8_t type; // UCPTrieType - /** @internal */ - int8_t valueWidth; // UCPTrieValueWidth - - /** padding/reserved @internal */ - uint32_t reserved32; - /** padding/reserved @internal */ - uint16_t reserved16; - - /** - * Internal index-3 null block offset. - * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. - * @internal - */ - uint16_t index3NullOffset; - /** - * Internal data null block offset, not shifted. - * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. - * @internal - */ - int32_t dataNullOffset; - /** @internal */ - uint32_t nullValue; - -#ifdef UCPTRIE_DEBUG - /** @internal */ - const char *name; -#endif -#endif -}; -#ifndef U_IN_DOXYGEN -typedef struct UCPTrie UCPTrie; -#endif - -/** - * Selectors for the type of a UCPTrie. - * Different trade-offs for size vs. speed. - * - * @see umutablecptrie_buildImmutable - * @see ucptrie_openFromBinary - * @see ucptrie_getType - * @draft ICU 63 - */ -enum UCPTrieType { - /** - * For ucptrie_openFromBinary() to accept any type. - * ucptrie_getType() will return the actual type. - * @draft ICU 63 - */ - UCPTRIE_TYPE_ANY = -1, - /** - * Fast/simple/larger BMP data structure. Use functions and "fast" macros. - * @draft ICU 63 - */ - UCPTRIE_TYPE_FAST, - /** - * Small/slower BMP data structure. Use functions and "small" macros. - * @draft ICU 63 - */ - UCPTRIE_TYPE_SMALL -}; -#ifndef U_IN_DOXYGEN -typedef enum UCPTrieType UCPTrieType; -#endif - -/** - * Selectors for the number of bits in a UCPTrie data value. - * - * @see umutablecptrie_buildImmutable - * @see ucptrie_openFromBinary - * @see ucptrie_getValueWidth - * @draft ICU 63 - */ -enum UCPTrieValueWidth { - /** - * For ucptrie_openFromBinary() to accept any data value width. - * ucptrie_getValueWidth() will return the actual data value width. - * @draft ICU 63 - */ - UCPTRIE_VALUE_BITS_ANY = -1, - /** - * The trie stores 16 bits per data value. - * It returns them as unsigned values 0..0xffff=65535. - * @draft ICU 63 - */ - UCPTRIE_VALUE_BITS_16, - /** - * The trie stores 32 bits per data value. - * @draft ICU 63 - */ - UCPTRIE_VALUE_BITS_32, - /** - * The trie stores 8 bits per data value. - * It returns them as unsigned values 0..0xff=255. - * @draft ICU 63 - */ - UCPTRIE_VALUE_BITS_8 -}; -#ifndef U_IN_DOXYGEN -typedef enum UCPTrieValueWidth UCPTrieValueWidth; -#endif - -/** - * Opens a trie from its binary form, stored in 32-bit-aligned memory. - * Inverse of ucptrie_toBinary(). - * - * The memory must remain valid and unchanged as long as the trie is used. - * You must ucptrie_close() the trie once you are done using it. - * - * @param type selects the trie type; results in an - * U_INVALID_FORMAT_ERROR if it does not match the binary data; - * use UCPTRIE_TYPE_ANY to accept any type - * @param valueWidth selects the number of bits in a data value; results in an - * U_INVALID_FORMAT_ERROR if it does not match the binary data; - * use UCPTRIE_VALUE_BITS_ANY to accept any data value width - * @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie - * @param length the number of bytes available at data; - * can be more than necessary - * @param pActualLength receives the actual number of bytes at data taken up by the trie data; - * can be NULL - * @param pErrorCode an in/out ICU UErrorCode - * @return the trie - * - * @see umutablecptrie_open - * @see umutablecptrie_buildImmutable - * @see ucptrie_toBinary - * @draft ICU 63 - */ -U_CAPI UCPTrie * U_EXPORT2 -ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, - const void *data, int32_t length, int32_t *pActualLength, - UErrorCode *pErrorCode); - -/** - * Closes a trie and releases associated memory. - * - * @param trie the trie - * @draft ICU 63 - */ -U_CAPI void U_EXPORT2 -ucptrie_close(UCPTrie *trie); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUCPTriePointer - * "Smart pointer" class, closes a UCPTrie via ucptrie_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @draft ICU 63 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close); - -U_NAMESPACE_END - -#endif - -/** - * Returns the trie type. - * - * @param trie the trie - * @return the trie type - * @see ucptrie_openFromBinary - * @see UCPTRIE_TYPE_ANY - * @draft ICU 63 - */ -U_CAPI UCPTrieType U_EXPORT2 -ucptrie_getType(const UCPTrie *trie); - -/** - * Returns the number of bits in a trie data value. - * - * @param trie the trie - * @return the number of bits in a trie data value - * @see ucptrie_openFromBinary - * @see UCPTRIE_VALUE_BITS_ANY - * @draft ICU 63 - */ -U_CAPI UCPTrieValueWidth U_EXPORT2 -ucptrie_getValueWidth(const UCPTrie *trie); - -/** - * Returns the value for a code point as stored in the trie, with range checking. - * Returns the trie error value if c is not in the range 0..U+10FFFF. - * - * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower. - * Easier to use because, unlike the macros, this function works on all UCPTrie - * objects, for all types and value widths. - * - * @param trie the trie - * @param c the code point - * @return the trie value, - * or the trie error value if the code point is not in the range 0..U+10FFFF - * @draft ICU 63 - */ -U_CAPI uint32_t U_EXPORT2 -ucptrie_get(const UCPTrie *trie, UChar32 c); - -/** - * Returns the last code point such that all those from start to there have the same value. - * Can be used to efficiently iterate over all same-value ranges in a trie. - * (This is normally faster than iterating over code points and get()ting each value, - * but much slower than a data structure that stores ranges directly.) - * - * If the UCPMapValueFilter function pointer is not NULL, then - * the value to be delivered is passed through that function, and the return value is the end - * of the range where all values are modified to the same actual value. - * The value is unchanged if that function pointer is NULL. - * - * Example: - * \code - * UChar32 start = 0, end; - * uint32_t value; - * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, - * NULL, NULL, &value)) >= 0) { - * // Work with the range start..end and its value. - * start = end + 1; - * } - * \endcode - * - * @param trie the trie - * @param start range start - * @param option defines whether surrogates are treated normally, - * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL - * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL - * @param filter a pointer to a function that may modify the trie data value, - * or NULL if the values from the trie are to be used unmodified - * @param context an opaque pointer that is passed on to the filter function - * @param pValue if not NULL, receives the value that every code point start..end has; - * may have been modified by filter(context, trie value) - * if that function pointer is not NULL - * @return the range end code point, or -1 if start is not a valid code point - * @draft ICU 63 - */ -U_CAPI UChar32 U_EXPORT2 -ucptrie_getRange(const UCPTrie *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue); - -/** - * Writes a memory-mappable form of the trie into 32-bit aligned memory. - * Inverse of ucptrie_openFromBinary(). - * - * @param trie the trie - * @param data a pointer to 32-bit-aligned memory to be filled with the trie data; - * can be NULL if capacity==0 - * @param capacity the number of bytes available at data, or 0 for pure preflighting - * @param pErrorCode an in/out ICU UErrorCode; - * U_BUFFER_OVERFLOW_ERROR if the capacity is too small - * @return the number of bytes written or (if buffer overflow) needed for the trie - * - * @see ucptrie_openFromBinary() - * @draft ICU 63 - */ -U_CAPI int32_t U_EXPORT2 -ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode); - -/** - * Macro parameter value for a trie with 16-bit data values. - * Use the name of this macro as a "dataAccess" parameter in other macros. - * Do not use this macro in any other way. - * - * @see UCPTRIE_VALUE_BITS_16 - * @draft ICU 63 - */ -#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i]) - -/** - * Macro parameter value for a trie with 32-bit data values. - * Use the name of this macro as a "dataAccess" parameter in other macros. - * Do not use this macro in any other way. - * - * @see UCPTRIE_VALUE_BITS_32 - * @draft ICU 63 - */ -#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i]) - -/** - * Macro parameter value for a trie with 8-bit data values. - * Use the name of this macro as a "dataAccess" parameter in other macros. - * Do not use this macro in any other way. - * - * @see UCPTRIE_VALUE_BITS_8 - * @draft ICU 63 - */ -#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i]) - -/** - * Returns a trie value for a code point, with range checking. - * Returns the trie error value if c is not in the range 0..U+10FFFF. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param c (UChar32, in) the input code point - * @return The code point's trie value. - * @draft ICU 63 - */ -#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) - -/** - * Returns a 16-bit trie value for a code point, with range checking. - * Returns the trie error value if c is not in the range U+0000..U+10FFFF. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param c (UChar32, in) the input code point - * @return The code point's trie value. - * @draft ICU 63 - */ -#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \ - dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) - -/** - * UTF-16: Reads the next code point (UChar32 c, out), post-increments src, - * and gets a value from the trie. - * Sets the trie error value if c is an unpaired surrogate. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param src (const UChar *, in/out) the source text pointer - * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated - * @param c (UChar32, out) variable for the code point - * @param result (out) variable for the trie lookup result - * @draft ICU 63 - */ -#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) { \ - (c) = *(src)++; \ - int32_t __index; \ - if (!U16_IS_SURROGATE(c)) { \ - __index = _UCPTRIE_FAST_INDEX(trie, c); \ - } else { \ - uint16_t __c2; \ - if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \ - ++(src); \ - (c) = U16_GET_SUPPLEMENTARY((c), __c2); \ - __index = _UCPTRIE_SMALL_INDEX(trie, c); \ - } else { \ - __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ - } \ - } \ - (result) = dataAccess(trie, __index); \ -} - -/** - * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, - * and gets a value from the trie. - * Sets the trie error value if c is an unpaired surrogate. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param start (const UChar *, in) the start pointer for the text - * @param src (const UChar *, in/out) the source text pointer - * @param c (UChar32, out) variable for the code point - * @param result (out) variable for the trie lookup result - * @draft ICU 63 - */ -#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) { \ - (c) = *--(src); \ - int32_t __index; \ - if (!U16_IS_SURROGATE(c)) { \ - __index = _UCPTRIE_FAST_INDEX(trie, c); \ - } else { \ - uint16_t __c2; \ - if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \ - --(src); \ - (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \ - __index = _UCPTRIE_SMALL_INDEX(trie, c); \ - } else { \ - __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ - } \ - } \ - (result) = dataAccess(trie, __index); \ -} - -/** - * UTF-8: Post-increments src and gets a value from the trie. - * Sets the trie error value for an ill-formed byte sequence. - * - * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point - * because it would be more work to do so and is often not needed. - * If the trie value differs from the error value, then the byte sequence is well-formed, - * and the code point can be assembled without revalidation. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param src (const char *, in/out) the source text pointer - * @param limit (const char *, in) the limit pointer for the text (must not be NULL) - * @param result (out) variable for the trie lookup result - * @draft ICU 63 - */ -#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) { \ - int32_t __lead = (uint8_t)*(src)++; \ - if (!U8_IS_SINGLE(__lead)) { \ - uint8_t __t1, __t2, __t3; \ - if ((src) != (limit) && \ - (__lead >= 0xe0 ? \ - __lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \ - U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \ - ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \ - (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \ - : /* U+10000..U+10FFFF */ \ - (__lead -= 0xf0) <= 4 && \ - U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \ - (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \ - (__t2 = *(src) - 0x80) <= 0x3f && \ - ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \ - (__lead = __lead >= (trie)->shifted12HighStart ? \ - (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ - ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \ - : /* U+0080..U+07FF */ \ - __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \ - (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \ - ++(src); \ - } else { \ - __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \ - } \ - } \ - (result) = dataAccess(trie, __lead); \ -} - -/** - * UTF-8: Pre-decrements src and gets a value from the trie. - * Sets the trie error value for an ill-formed byte sequence. - * - * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point - * because it would be more work to do so and is often not needed. - * If the trie value differs from the error value, then the byte sequence is well-formed, - * and the code point can be assembled without revalidation. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param start (const char *, in) the start pointer for the text - * @param src (const char *, in/out) the source text pointer - * @param result (out) variable for the trie lookup result - * @draft ICU 63 - */ -#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) { \ - int32_t __index = (uint8_t)*--(src); \ - if (!U8_IS_SINGLE(__index)) { \ - __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \ - (const uint8_t *)(src)); \ - (src) -= __index & 7; \ - __index >>= 3; \ - } \ - (result) = dataAccess(trie, __index); \ -} - -/** - * Returns a trie value for an ASCII code point, without range checking. - * - * @param trie (const UCPTrie *, in) the trie (of either fast or small type) - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param c (UChar32, in) the input code point; must be U+0000..U+007F - * @return The ASCII code point's trie value. - * @draft ICU 63 - */ -#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c) - -/** - * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. - * Can be used to look up a value for a UTF-16 code unit if other parts of - * the string processing check for surrogates. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF - * @return The BMP code point's trie value. - * @draft ICU 63 - */ -#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) - -/** - * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), - * without range checking. - * - * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST - * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width - * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF - * @return The supplementary code point's trie value. - * @draft ICU 63 - */ -#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) - -/* Internal definitions ----------------------------------------------------- */ - -#ifndef U_IN_DOXYGEN - -/** - * Internal implementation constants. - * These are needed for the API macros, but users should not use these directly. - * @internal - */ -enum { - /** @internal */ - UCPTRIE_FAST_SHIFT = 6, - - /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ - UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT, - - /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ - UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1, - - /** @internal */ - UCPTRIE_SMALL_MAX = 0xfff, - - /** - * Offset from dataLength (to be subtracted) for fetching the - * value returned for out-of-range code points and ill-formed UTF-8/16. - * @internal - */ - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1, - /** - * Offset from dataLength (to be subtracted) for fetching the - * value returned for code points highStart..U+10FFFF. - * @internal - */ - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2 -}; - -/* Internal functions and macros -------------------------------------------- */ -// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API - -/** @internal */ -U_INTERNAL int32_t U_EXPORT2 -ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c); - -/** @internal */ -U_INTERNAL int32_t U_EXPORT2 -ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3); - -/** - * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations. - * Do not call directly. - * @internal - */ -U_INTERNAL int32_t U_EXPORT2 -ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, - const uint8_t *start, const uint8_t *src); - -/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */ -#define _UCPTRIE_FAST_INDEX(trie, c) \ - ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)) - -/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */ -#define _UCPTRIE_SMALL_INDEX(trie, c) \ - ((c) >= (trie)->highStart ? \ - (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ - ucptrie_internalSmallIndex(trie, c)) - -/** - * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF. - * Returns the data index. - * @internal - */ -#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \ - ((uint32_t)(c) <= (uint32_t)(fastMax) ? \ - _UCPTRIE_FAST_INDEX(trie, c) : \ - (uint32_t)(c) <= 0x10ffff ? \ - _UCPTRIE_SMALL_INDEX(trie, c) : \ - (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET) - -U_CDECL_END - -#endif // U_IN_DOXYGEN -#endif // U_HIDE_DRAFT_API -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ucurr.h b/deps/node/deps/icu-small/source/common/unicode/ucurr.h deleted file mode 100644 index 1021adc8..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ucurr.h +++ /dev/null @@ -1,434 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ -#ifndef _UCURR_H_ -#define _UCURR_H_ - -#include "unicode/utypes.h" -#include "unicode/uenum.h" - -/** - * \file - * \brief C API: Encapsulates information about a currency. - * - * The ucurr API encapsulates information about a currency, as defined by - * ISO 4217. A currency is represented by a 3-character string - * containing its ISO 4217 code. This API can return various data - * necessary the proper display of a currency: - * - *
  • A display symbol, for a specific locale - *
  • The number of fraction digits to display - *
  • A rounding increment - *
- * - * The DecimalFormat class uses these data to display - * currencies. - * @author Alan Liu - * @since ICU 2.2 - */ - -#if !UCONFIG_NO_FORMATTING - -/** - * Currency Usage used for Decimal Format - * @stable ICU 54 - */ -enum UCurrencyUsage { - /** - * a setting to specify currency usage which determines currency digit - * and rounding for standard usage, for example: "50.00 NT$" - * used as DEFAULT value - * @stable ICU 54 - */ - UCURR_USAGE_STANDARD=0, - /** - * a setting to specify currency usage which determines currency digit - * and rounding for cash usage, for example: "50 NT$" - * @stable ICU 54 - */ - UCURR_USAGE_CASH=1, -#ifndef U_HIDE_DEPRECATED_API - /** - * One higher than the last enum UCurrencyUsage constant. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UCURR_USAGE_COUNT=2 -#endif // U_HIDE_DEPRECATED_API -}; -/** Currency Usage used for Decimal Format */ -typedef enum UCurrencyUsage UCurrencyUsage; - -/** - * Finds a currency code for the given locale. - * @param locale the locale for which to retrieve a currency code. - * Currency can be specified by the "currency" keyword - * in which case it overrides the default currency code - * @param buff fill in buffer. Can be NULL for preflighting. - * @param buffCapacity capacity of the fill in buffer. Can be 0 for - * preflighting. If it is non-zero, the buff parameter - * must not be NULL. - * @param ec error code - * @return length of the currency string. It should always be 3. If 0, - * currency couldn't be found or the input values are - * invalid. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -ucurr_forLocale(const char* locale, - UChar* buff, - int32_t buffCapacity, - UErrorCode* ec); - -/** - * Selector constants for ucurr_getName(). - * - * @see ucurr_getName - * @stable ICU 2.6 - */ -typedef enum UCurrNameStyle { - /** - * Selector for ucurr_getName indicating a symbolic name for a - * currency, such as "$" for USD. - * @stable ICU 2.6 - */ - UCURR_SYMBOL_NAME, - - /** - * Selector for ucurr_getName indicating the long name for a - * currency, such as "US Dollar" for USD. - * @stable ICU 2.6 - */ - UCURR_LONG_NAME - -#ifndef U_HIDE_DRAFT_API - , - /** - * Selector for getName() indicating the narrow currency symbol. - * The narrow currency symbol is similar to the regular currency - * symbol, but it always takes the shortest form: for example, - * "$" instead of "US$" for USD in en-CA. - * - * @draft ICU 61 - */ - UCURR_NARROW_SYMBOL_NAME -#endif // U_HIDE_DRAFT_API -} UCurrNameStyle; - -#if !UCONFIG_NO_SERVICE -/** - * @stable ICU 2.6 - */ -typedef const void* UCurrRegistryKey; - -/** - * Register an (existing) ISO 4217 currency code for the given locale. - * Only the country code and the two variants EURO and PRE_EURO are - * recognized. - * @param isoCode the three-letter ISO 4217 currency code - * @param locale the locale for which to register this currency code - * @param status the in/out status code - * @return a registry key that can be used to unregister this currency code, or NULL - * if there was an error. - * @stable ICU 2.6 - */ -U_STABLE UCurrRegistryKey U_EXPORT2 -ucurr_register(const UChar* isoCode, - const char* locale, - UErrorCode* status); -/** - * Unregister the previously-registered currency definitions using the - * URegistryKey returned from ucurr_register. Key becomes invalid after - * a successful call and should not be used again. Any currency - * that might have been hidden by the original ucurr_register call is - * restored. - * @param key the registry key returned by a previous call to ucurr_register - * @param status the in/out status code, no special meanings are assigned - * @return TRUE if the currency for this key was successfully unregistered - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -ucurr_unregister(UCurrRegistryKey key, UErrorCode* status); -#endif /* UCONFIG_NO_SERVICE */ - -/** - * Returns the display name for the given currency in the - * given locale. For example, the display name for the USD - * currency object in the en_US locale is "$". - * @param currency null-terminated 3-letter ISO 4217 code - * @param locale locale in which to display currency - * @param nameStyle selector for which kind of name to return - * @param isChoiceFormat fill-in set to TRUE if the returned value - * is a ChoiceFormat pattern; otherwise it is a static string - * @param len fill-in parameter to receive length of result - * @param ec error code - * @return pointer to display string of 'len' UChars. If the resource - * data contains no entry for 'currency', then 'currency' itself is - * returned. If *isChoiceFormat is TRUE, then the result is a - * ChoiceFormat pattern. Otherwise it is a static string. - * @stable ICU 2.6 - */ -U_STABLE const UChar* U_EXPORT2 -ucurr_getName(const UChar* currency, - const char* locale, - UCurrNameStyle nameStyle, - UBool* isChoiceFormat, - int32_t* len, - UErrorCode* ec); - -/** - * Returns the plural name for the given currency in the - * given locale. For example, the plural name for the USD - * currency object in the en_US locale is "US dollar" or "US dollars". - * @param currency null-terminated 3-letter ISO 4217 code - * @param locale locale in which to display currency - * @param isChoiceFormat fill-in set to TRUE if the returned value - * is a ChoiceFormat pattern; otherwise it is a static string - * @param pluralCount plural count - * @param len fill-in parameter to receive length of result - * @param ec error code - * @return pointer to display string of 'len' UChars. If the resource - * data contains no entry for 'currency', then 'currency' itself is - * returned. - * @stable ICU 4.2 - */ -U_STABLE const UChar* U_EXPORT2 -ucurr_getPluralName(const UChar* currency, - const char* locale, - UBool* isChoiceFormat, - const char* pluralCount, - int32_t* len, - UErrorCode* ec); - -/** - * Returns the number of the number of fraction digits that should - * be displayed for the given currency. - * This is equivalent to ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec); - * @param currency null-terminated 3-letter ISO 4217 code - * @param ec input-output error code - * @return a non-negative number of fraction digits to be - * displayed, or 0 if there is an error - * @stable ICU 3.0 - */ -U_STABLE int32_t U_EXPORT2 -ucurr_getDefaultFractionDigits(const UChar* currency, - UErrorCode* ec); - -/** - * Returns the number of the number of fraction digits that should - * be displayed for the given currency with usage. - * @param currency null-terminated 3-letter ISO 4217 code - * @param usage enum usage for the currency - * @param ec input-output error code - * @return a non-negative number of fraction digits to be - * displayed, or 0 if there is an error - * @stable ICU 54 - */ -U_STABLE int32_t U_EXPORT2 -ucurr_getDefaultFractionDigitsForUsage(const UChar* currency, - const UCurrencyUsage usage, - UErrorCode* ec); - -/** - * Returns the rounding increment for the given currency, or 0.0 if no - * rounding is done by the currency. - * This is equivalent to ucurr_getRoundingIncrementForUsage(currency,UCURR_USAGE_STANDARD,ec); - * @param currency null-terminated 3-letter ISO 4217 code - * @param ec input-output error code - * @return the non-negative rounding increment, or 0.0 if none, - * or 0.0 if there is an error - * @stable ICU 3.0 - */ -U_STABLE double U_EXPORT2 -ucurr_getRoundingIncrement(const UChar* currency, - UErrorCode* ec); - -/** - * Returns the rounding increment for the given currency, or 0.0 if no - * rounding is done by the currency given usage. - * @param currency null-terminated 3-letter ISO 4217 code - * @param usage enum usage for the currency - * @param ec input-output error code - * @return the non-negative rounding increment, or 0.0 if none, - * or 0.0 if there is an error - * @stable ICU 54 - */ -U_STABLE double U_EXPORT2 -ucurr_getRoundingIncrementForUsage(const UChar* currency, - const UCurrencyUsage usage, - UErrorCode* ec); - -/** - * Selector constants for ucurr_openCurrencies(). - * - * @see ucurr_openCurrencies - * @stable ICU 3.2 - */ -typedef enum UCurrCurrencyType { - /** - * Select all ISO-4217 currency codes. - * @stable ICU 3.2 - */ - UCURR_ALL = INT32_MAX, - /** - * Select only ISO-4217 commonly used currency codes. - * These currencies can be found in common use, and they usually have - * bank notes or coins associated with the currency code. - * This does not include fund codes, precious metals and other - * various ISO-4217 codes limited to special financial products. - * @stable ICU 3.2 - */ - UCURR_COMMON = 1, - /** - * Select ISO-4217 uncommon currency codes. - * These codes respresent fund codes, precious metals and other - * various ISO-4217 codes limited to special financial products. - * A fund code is a monetary resource associated with a currency. - * @stable ICU 3.2 - */ - UCURR_UNCOMMON = 2, - /** - * Select only deprecated ISO-4217 codes. - * These codes are no longer in general public use. - * @stable ICU 3.2 - */ - UCURR_DEPRECATED = 4, - /** - * Select only non-deprecated ISO-4217 codes. - * These codes are in general public use. - * @stable ICU 3.2 - */ - UCURR_NON_DEPRECATED = 8 -} UCurrCurrencyType; - -/** - * Provides a UEnumeration object for listing ISO-4217 codes. - * @param currType You can use one of several UCurrCurrencyType values for this - * variable. You can also | (or) them together to get a specific list of - * currencies. Most people will want to use the (UCURR_CURRENCY|UCURR_NON_DEPRECATED) value to - * get a list of current currencies. - * @param pErrorCode Error code - * @stable ICU 3.2 - */ -U_STABLE UEnumeration * U_EXPORT2 -ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode); - -/** - * Queries if the given ISO 4217 3-letter code is available on the specified date range. - * - * Note: For checking availability of a currency on a specific date, specify the date on both 'from' and 'to' - * - * When 'from' is U_DATE_MIN and 'to' is U_DATE_MAX, this method checks if the specified currency is available any time. - * If 'from' and 'to' are same UDate value, this method checks if the specified currency is available on that date. - * - * @param isoCode - * The ISO 4217 3-letter code. - * - * @param from - * The lower bound of the date range, inclusive. When 'from' is U_DATE_MIN, check the availability - * of the currency any date before 'to' - * - * @param to - * The upper bound of the date range, inclusive. When 'to' is U_DATE_MAX, check the availability of - * the currency any date after 'from' - * - * @param errorCode - * ICU error code - * - * @return TRUE if the given ISO 4217 3-letter code is supported on the specified date range. - * - * @stable ICU 4.8 - */ -U_STABLE UBool U_EXPORT2 -ucurr_isAvailable(const UChar* isoCode, - UDate from, - UDate to, - UErrorCode* errorCode); - -/** - * Finds the number of valid currency codes for the - * given locale and date. - * @param locale the locale for which to retrieve the - * currency count. - * @param date the date for which to retrieve the - * currency count for the given locale. - * @param ec error code - * @return the number of currency codes for the - * given locale and date. If 0, currency - * codes couldn't be found for the input - * values are invalid. - * @stable ICU 4.0 - */ -U_STABLE int32_t U_EXPORT2 -ucurr_countCurrencies(const char* locale, - UDate date, - UErrorCode* ec); - -/** - * Finds a currency code for the given locale and date - * @param locale the locale for which to retrieve a currency code. - * Currency can be specified by the "currency" keyword - * in which case it overrides the default currency code - * @param date the date for which to retrieve a currency code for - * the given locale. - * @param index the index within the available list of currency codes - * for the given locale on the given date. - * @param buff fill in buffer. Can be NULL for preflighting. - * @param buffCapacity capacity of the fill in buffer. Can be 0 for - * preflighting. If it is non-zero, the buff parameter - * must not be NULL. - * @param ec error code - * @return length of the currency string. It should always be 3. - * If 0, currency couldn't be found or the input values are - * invalid. - * @stable ICU 4.0 - */ -U_STABLE int32_t U_EXPORT2 -ucurr_forLocaleAndDate(const char* locale, - UDate date, - int32_t index, - UChar* buff, - int32_t buffCapacity, - UErrorCode* ec); - -/** - * Given a key and a locale, returns an array of string values in a preferred - * order that would make a difference. These are all and only those values where - * the open (creation) of the service with the locale formed from the input locale - * plus input keyword and that value has different behavior than creation with the - * input locale alone. - * @param key one of the keys supported by this service. For now, only - * "currency" is supported. - * @param locale the locale - * @param commonlyUsed if set to true it will return only commonly used values - * with the given locale in preferred order. Otherwise, - * it will return all the available values for the locale. - * @param status error status - * @return a string enumeration over keyword values for the given key and the locale. - * @stable ICU 4.2 - */ -U_STABLE UEnumeration* U_EXPORT2 -ucurr_getKeywordValuesForLocale(const char* key, - const char* locale, - UBool commonlyUsed, - UErrorCode* status); - -/** - * Returns the ISO 4217 numeric code for the currency. - *

Note: If the ISO 4217 numeric code is not assigned for the currency or - * the currency is unknown, this function returns 0. - * - * @param currency null-terminated 3-letter ISO 4217 code - * @return The ISO 4217 numeric code of the currency - * @stable ICU 49 - */ -U_STABLE int32_t U_EXPORT2 -ucurr_getNumericCode(const UChar* currency); - -#endif /* #if !UCONFIG_NO_FORMATTING */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/udata.h b/deps/node/deps/icu-small/source/common/unicode/udata.h deleted file mode 100644 index 6419c359..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/udata.h +++ /dev/null @@ -1,437 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: udata.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999oct25 -* created by: Markus W. Scherer -*/ - -#ifndef __UDATA_H__ -#define __UDATA_H__ - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" - -U_CDECL_BEGIN - -/** - * \file - * \brief C API: Data loading interface - * - *

Information about data loading interface

- * - * This API is used to find and efficiently load data for ICU and applications - * using ICU. It provides an abstract interface that specifies a data type and - * name to find and load the data. Normally this API is used by other ICU APIs - * to load required data out of the ICU data library, but it can be used to - * load data out of other places. - * - * See the User Guide Data Management chapter. - */ - -#ifndef U_HIDE_INTERNAL_API -/** - * Character used to separate package names from tree names - * @internal ICU 3.0 - */ -#define U_TREE_SEPARATOR '-' - -/** - * String used to separate package names from tree names - * @internal ICU 3.0 - */ -#define U_TREE_SEPARATOR_STRING "-" - -/** - * Character used to separate parts of entry names - * @internal ICU 3.0 - */ -#define U_TREE_ENTRY_SEP_CHAR '/' - -/** - * String used to separate parts of entry names - * @internal ICU 3.0 - */ -#define U_TREE_ENTRY_SEP_STRING "/" - -/** - * Alias for standard ICU data - * @internal ICU 3.0 - */ -#define U_ICUDATA_ALIAS "ICUDATA" - -#endif /* U_HIDE_INTERNAL_API */ - -/** - * UDataInfo contains the properties about the requested data. - * This is meta data. - * - *

This structure may grow in the future, indicated by the - * size field.

- * - *

ICU data must be at least 8-aligned, and should be 16-aligned. - * The UDataInfo struct begins 4 bytes after the start of the data item, - * so it is 4-aligned. - * - *

The platform data property fields help determine if a data - * file can be efficiently used on a given machine. - * The particular fields are of importance only if the data - * is affected by the properties - if there is integer data - * with word sizes > 1 byte, char* text, or UChar* text.

- * - *

The implementation for the udata_open[Choice]() - * functions may reject data based on the value in isBigEndian. - * No other field is used by the udata API implementation.

- * - *

The dataFormat may be used to identify - * the kind of data, e.g. a converter table.

- * - *

The formatVersion field should be used to - * make sure that the format can be interpreted. - * It may be a good idea to check only for the one or two highest - * of the version elements to allow the data memory to - * get more or somewhat rearranged contents, for as long - * as the using code can still interpret the older contents.

- * - *

The dataVersion field is intended to be a - * common place to store the source version of the data; - * for data from the Unicode character database, this could - * reflect the Unicode version.

- * - * @stable ICU 2.0 - */ -typedef struct { - /** sizeof(UDataInfo) - * @stable ICU 2.0 */ - uint16_t size; - - /** unused, set to 0 - * @stable ICU 2.0*/ - uint16_t reservedWord; - - /* platform data properties */ - /** 0 for little-endian machine, 1 for big-endian - * @stable ICU 2.0 */ - uint8_t isBigEndian; - - /** see U_CHARSET_FAMILY values in utypes.h - * @stable ICU 2.0*/ - uint8_t charsetFamily; - - /** sizeof(UChar), one of { 1, 2, 4 } - * @stable ICU 2.0*/ - uint8_t sizeofUChar; - - /** unused, set to 0 - * @stable ICU 2.0*/ - uint8_t reservedByte; - - /** data format identifier - * @stable ICU 2.0*/ - uint8_t dataFormat[4]; - - /** versions: [0] major [1] minor [2] milli [3] micro - * @stable ICU 2.0*/ - uint8_t formatVersion[4]; - - /** versions: [0] major [1] minor [2] milli [3] micro - * @stable ICU 2.0*/ - uint8_t dataVersion[4]; -} UDataInfo; - -/* API for reading data -----------------------------------------------------*/ - -/** - * Forward declaration of the data memory type. - * @stable ICU 2.0 - */ -typedef struct UDataMemory UDataMemory; - -/** - * Callback function for udata_openChoice(). - * @param context parameter passed into udata_openChoice(). - * @param type The type of the data as passed into udata_openChoice(). - * It may be NULL. - * @param name The name of the data as passed into udata_openChoice(). - * @param pInfo A pointer to the UDataInfo structure - * of data that has been loaded and will be returned - * by udata_openChoice() if this function - * returns TRUE. - * @return TRUE if the current data memory is acceptable - * @stable ICU 2.0 - */ -typedef UBool U_CALLCONV -UDataMemoryIsAcceptable(void *context, - const char *type, const char *name, - const UDataInfo *pInfo); - - -/** - * Convenience function. - * This function works the same as udata_openChoice - * except that any data that matches the type and name - * is assumed to be acceptable. - * @param path Specifies an absolute path and/or a basename for the - * finding of the data in the file system. - * NULL for ICU data. - * @param type A string that specifies the type of data to be loaded. - * For example, resource bundles are loaded with type "res", - * conversion tables with type "cnv". - * This may be NULL or empty. - * @param name A string that specifies the name of the data. - * @param pErrorCode An ICU UErrorCode parameter. It must not be NULL. - * @return A pointer (handle) to a data memory object, or NULL - * if an error occurs. Call udata_getMemory() - * to get a pointer to the actual data. - * - * @see udata_openChoice - * @stable ICU 2.0 - */ -U_STABLE UDataMemory * U_EXPORT2 -udata_open(const char *path, const char *type, const char *name, - UErrorCode *pErrorCode); - -/** - * Data loading function. - * This function is used to find and load efficiently data for - * ICU and applications using ICU. - * It provides an abstract interface that allows to specify a data - * type and name to find and load the data. - * - *

The implementation depends on platform properties and user preferences - * and may involve loading shared libraries (DLLs), mapping - * files into memory, or fopen()/fread() files. - * It may also involve using static memory or database queries etc. - * Several or all data items may be combined into one entity - * (DLL, memory-mappable file).

- * - *

The data is always preceded by a header that includes - * a UDataInfo structure. - * The caller's isAcceptable() function is called to make - * sure that the data is useful. It may be called several times if it - * rejects the data and there is more than one location with data - * matching the type and name.

- * - *

If path==NULL, then ICU data is loaded. - * Otherwise, it is separated into a basename and a basename-less directory string. - * The basename is used as the data package name, and the directory is - * logically prepended to the ICU data directory string.

- * - *

For details about ICU data loading see the User Guide - * Data Management chapter. (http://icu-project.org/userguide/icudata.html)

- * - * @param path Specifies an absolute path and/or a basename for the - * finding of the data in the file system. - * NULL for ICU data. - * @param type A string that specifies the type of data to be loaded. - * For example, resource bundles are loaded with type "res", - * conversion tables with type "cnv". - * This may be NULL or empty. - * @param name A string that specifies the name of the data. - * @param isAcceptable This function is called to verify that loaded data - * is useful for the client code. If it returns FALSE - * for all data items, then udata_openChoice() - * will return with an error. - * @param context Arbitrary parameter to be passed into isAcceptable. - * @param pErrorCode An ICU UErrorCode parameter. It must not be NULL. - * @return A pointer (handle) to a data memory object, or NULL - * if an error occurs. Call udata_getMemory() - * to get a pointer to the actual data. - * @stable ICU 2.0 - */ -U_STABLE UDataMemory * U_EXPORT2 -udata_openChoice(const char *path, const char *type, const char *name, - UDataMemoryIsAcceptable *isAcceptable, void *context, - UErrorCode *pErrorCode); - -/** - * Close the data memory. - * This function must be called to allow the system to - * release resources associated with this data memory. - * @param pData The pointer to data memory object - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -udata_close(UDataMemory *pData); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUDataMemoryPointer - * "Smart pointer" class, closes a UDataMemory via udata_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close); - -U_NAMESPACE_END - -#endif - -/** - * Get the pointer to the actual data inside the data memory. - * The data is read-only. - * - * ICU data must be at least 8-aligned, and should be 16-aligned. - * - * @param pData The pointer to data memory object - * @stable ICU 2.0 - */ -U_STABLE const void * U_EXPORT2 -udata_getMemory(UDataMemory *pData); - -/** - * Get the information from the data memory header. - * This allows to get access to the header containing - * platform data properties etc. which is not part of - * the data itself and can therefore not be accessed - * via the pointer that udata_getMemory() returns. - * - * @param pData pointer to the data memory object - * @param pInfo pointer to a UDataInfo object; - * its size field must be set correctly, - * typically to sizeof(UDataInfo). - * - * *pInfo will be filled with the UDataInfo structure - * in the data memory object. If this structure is smaller than - * pInfo->size, then the size will be - * adjusted and only part of the structure will be filled. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -udata_getInfo(UDataMemory *pData, UDataInfo *pInfo); - -/** - * This function bypasses the normal ICU data loading process and - * allows you to force ICU's system data to come out of a user-specified - * area in memory. - * - * ICU data must be at least 8-aligned, and should be 16-aligned. - * See http://userguide.icu-project.org/icudata - * - * The format of this data is that of the icu common data file, as is - * generated by the pkgdata tool with mode=common or mode=dll. - * You can read in a whole common mode file and pass the address to the start of the - * data, or (with the appropriate link options) pass in the pointer to - * the data that has been loaded from a dll by the operating system, - * as shown in this code: - * - * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT []; - * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool - * UErrorCode status = U_ZERO_ERROR; - * - * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status); - * - * It is important that the declaration be as above. The entry point - * must not be declared as an extern void*. - * - * Starting with ICU 4.4, it is possible to set several data packages, - * one per call to this function. - * udata_open() will look for data in the multiple data packages in the order - * in which they were set. - * The position of the linked-in or default-name ICU .data package in the - * search list depends on when the first data item is loaded that is not contained - * in the already explicitly set packages. - * If data was loaded implicitly before the first call to this function - * (for example, via opening a converter, constructing a UnicodeString - * from default-codepage data, using formatting or collation APIs, etc.), - * then the default data will be first in the list. - * - * This function has no effect on application (non ICU) data. See udata_setAppData() - * for similar functionality for application data. - * - * @param data pointer to ICU common data - * @param err outgoing error status U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -udata_setCommonData(const void *data, UErrorCode *err); - - -/** - * This function bypasses the normal ICU data loading process for application-specific - * data and allows you to force the it to come out of a user-specified - * pointer. - * - * ICU data must be at least 8-aligned, and should be 16-aligned. - * See http://userguide.icu-project.org/icudata - * - * The format of this data is that of the icu common data file, like 'icudt26l.dat' - * or the corresponding shared library (DLL) file. - * The application must read in or otherwise construct an image of the data and then - * pass the address of it to this function. - * - * - * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if - * data with the specifed path that has already been opened, or - * if setAppData with the same path has already been called. - * Any such calls to setAppData will have no effect. - * - * - * @param packageName the package name by which the application will refer - * to (open) this data - * @param data pointer to the data - * @param err outgoing error status U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR - * @see udata_setCommonData - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -udata_setAppData(const char *packageName, const void *data, UErrorCode *err); - -/** - * Possible settings for udata_setFileAccess() - * @see udata_setFileAccess - * @stable ICU 3.4 - */ -typedef enum UDataFileAccess { - /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */ - UDATA_FILES_FIRST, - /** An alias for the default access mode. @stable ICU 3.4 */ - UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST, - /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */ - UDATA_ONLY_PACKAGES, - /** ICU loads data from packages first, and only from single files - if the data cannot be found in a package. @stable ICU 3.4 */ - UDATA_PACKAGES_FIRST, - /** ICU does not access the file system for data loading. @stable ICU 3.4 */ - UDATA_NO_FILES, -#ifndef U_HIDE_DEPRECATED_API - /** - * Number of real UDataFileAccess values. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UDATA_FILE_ACCESS_COUNT -#endif // U_HIDE_DEPRECATED_API -} UDataFileAccess; - -/** - * This function may be called to control how ICU loads data. It must be called - * before any ICU data is loaded, including application data loaded with - * ures/ResourceBundle or udata APIs. This function is not multithread safe. - * The results of calling it while other threads are loading data are undefined. - * @param access The type of file access to be used - * @param status Error code. - * @see UDataFileAccess - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -udata_setFileAccess(UDataFileAccess access, UErrorCode *status); - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/udisplaycontext.h b/deps/node/deps/icu-small/source/common/unicode/udisplaycontext.h deleted file mode 100644 index 398481c6..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/udisplaycontext.h +++ /dev/null @@ -1,172 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -***************************************************************************************** -* Copyright (C) 2014-2016, International Business Machines -* Corporation and others. All Rights Reserved. -***************************************************************************************** -*/ - -#ifndef UDISPLAYCONTEXT_H -#define UDISPLAYCONTEXT_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -/** - * \file - * \brief C API: Display context types (enum values) - */ - -/** - * Display context types, for getting values of a particular setting. - * Note, the specific numeric values are internal and may change. - * @stable ICU 51 - */ -enum UDisplayContextType { - /** - * Type to retrieve the dialect handling setting, e.g. - * UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES. - * @stable ICU 51 - */ - UDISPCTX_TYPE_DIALECT_HANDLING = 0, - /** - * Type to retrieve the capitalization context setting, e.g. - * UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, - * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc. - * @stable ICU 51 - */ - UDISPCTX_TYPE_CAPITALIZATION = 1, - /** - * Type to retrieve the display length setting, e.g. - * UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT. - * @stable ICU 54 - */ - UDISPCTX_TYPE_DISPLAY_LENGTH = 2, - /** - * Type to retrieve the substitute handling setting, e.g. - * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE. - * @stable ICU 58 - */ - UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3 -}; -/** -* @stable ICU 51 -*/ -typedef enum UDisplayContextType UDisplayContextType; - -/** - * Display context settings. - * Note, the specific numeric values are internal and may change. - * @stable ICU 51 - */ -enum UDisplayContext { - /** - * ================================ - * DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or - * UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING - * to get the value. - */ - /** - * A possible setting for DIALECT_HANDLING: - * use standard names when generating a locale name, - * e.g. en_GB displays as 'English (United Kingdom)'. - * @stable ICU 51 - */ - UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0, - /** - * A possible setting for DIALECT_HANDLING: - * use dialect names, when generating a locale name, - * e.g. en_GB displays as 'British English'. - * @stable ICU 51 - */ - UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1, - /** - * ================================ - * CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE, - * UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, - * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, - * UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or - * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. - * Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value. - */ - /** - * The capitalization context to be used is unknown (this is the default value). - * @stable ICU 51 - */ - UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0, - /** - * The capitalization context if a date, date symbol or display name is to be - * formatted with capitalization appropriate for the middle of a sentence. - * @stable ICU 51 - */ - UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1, - /** - * The capitalization context if a date, date symbol or display name is to be - * formatted with capitalization appropriate for the beginning of a sentence. - * @stable ICU 51 - */ - UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2, - /** - * The capitalization context if a date, date symbol or display name is to be - * formatted with capitalization appropriate for a user-interface list or menu item. - * @stable ICU 51 - */ - UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3, - /** - * The capitalization context if a date, date symbol or display name is to be - * formatted with capitalization appropriate for stand-alone usage such as an - * isolated name on a calendar page. - * @stable ICU 51 - */ - UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4, - /** - * ================================ - * DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or - * UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH - * to get the value. - */ - /** - * A possible setting for DISPLAY_LENGTH: - * use full names when generating a locale name, - * e.g. "United States" for US. - * @stable ICU 54 - */ - UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0, - /** - * A possible setting for DISPLAY_LENGTH: - * use short names when generating a locale name, - * e.g. "U.S." for US. - * @stable ICU 54 - */ - UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1, - /** - * ================================ - * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or - * UDISPCTX_NO_SUBSTITUTE. Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING - * to get the value. - */ - /** - * A possible setting for SUBSTITUTE_HANDLING: - * Returns a fallback value (e.g., the input code) when no data is available. - * This is the default value. - * @stable ICU 58 - */ - UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0, - /** - * A possible setting for SUBSTITUTE_HANDLING: - * Returns a null value when no data is available. - * @stable ICU 58 - */ - UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1 - -}; -/** -* @stable ICU 51 -*/ -typedef enum UDisplayContext UDisplayContext; - -#endif /* #if !UCONFIG_NO_FORMATTING */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uenum.h b/deps/node/deps/icu-small/source/common/unicode/uenum.h deleted file mode 100644 index eb8ecdf8..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uenum.h +++ /dev/null @@ -1,208 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uenum.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2002jul08 -* created by: Vladimir Weinstein -*/ - -#ifndef __UENUM_H -#define __UENUM_H - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" - -#if U_SHOW_CPLUSPLUS_API -U_NAMESPACE_BEGIN -class StringEnumeration; -U_NAMESPACE_END -#endif - -/** - * \file - * \brief C API: String Enumeration - */ - -/** - * An enumeration object. - * For usage in C programs. - * @stable ICU 2.2 - */ -struct UEnumeration; -/** structure representing an enumeration object instance @stable ICU 2.2 */ -typedef struct UEnumeration UEnumeration; - -/** - * Disposes of resources in use by the iterator. If en is NULL, - * does nothing. After this call, any char* or UChar* pointer - * returned by uenum_unext() or uenum_next() is invalid. - * @param en UEnumeration structure pointer - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uenum_close(UEnumeration* en); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUEnumerationPointer - * "Smart pointer" class, closes a UEnumeration via uenum_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close); - -U_NAMESPACE_END - -#endif - -/** - * Returns the number of elements that the iterator traverses. If - * the iterator is out-of-sync with its service, status is set to - * U_ENUM_OUT_OF_SYNC_ERROR. - * This is a convenience function. It can end up being very - * expensive as all the items might have to be pre-fetched (depending - * on the type of data being traversed). Use with caution and only - * when necessary. - * @param en UEnumeration structure pointer - * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the - * iterator is out of sync. - * @return number of elements in the iterator - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -uenum_count(UEnumeration* en, UErrorCode* status); - -/** - * Returns the next element in the iterator's list. If there are - * no more elements, returns NULL. If the iterator is out-of-sync - * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and - * NULL is returned. If the native service string is a char* string, - * it is converted to UChar* with the invariant converter. - * The result is terminated by (UChar)0. - * @param en the iterator object - * @param resultLength pointer to receive the length of the result - * (not including the terminating \\0). - * If the pointer is NULL it is ignored. - * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if - * the iterator is out of sync with its service. - * @return a pointer to the string. The string will be - * zero-terminated. The return pointer is owned by this iterator - * and must not be deleted by the caller. The pointer is valid - * until the next call to any uenum_... method, including - * uenum_next() or uenum_unext(). When all strings have been - * traversed, returns NULL. - * @stable ICU 2.2 - */ -U_STABLE const UChar* U_EXPORT2 -uenum_unext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -/** - * Returns the next element in the iterator's list. If there are - * no more elements, returns NULL. If the iterator is out-of-sync - * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and - * NULL is returned. If the native service string is a UChar* - * string, it is converted to char* with the invariant converter. - * The result is terminated by (char)0. If the conversion fails - * (because a character cannot be converted) then status is set to - * U_INVARIANT_CONVERSION_ERROR and the return value is undefined - * (but non-NULL). - * @param en the iterator object - * @param resultLength pointer to receive the length of the result - * (not including the terminating \\0). - * If the pointer is NULL it is ignored. - * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if - * the iterator is out of sync with its service. Set to - * U_INVARIANT_CONVERSION_ERROR if the underlying native string is - * UChar* and conversion to char* with the invariant converter - * fails. This error pertains only to current string, so iteration - * might be able to continue successfully. - * @return a pointer to the string. The string will be - * zero-terminated. The return pointer is owned by this iterator - * and must not be deleted by the caller. The pointer is valid - * until the next call to any uenum_... method, including - * uenum_next() or uenum_unext(). When all strings have been - * traversed, returns NULL. - * @stable ICU 2.2 - */ -U_STABLE const char* U_EXPORT2 -uenum_next(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -/** - * Resets the iterator to the current list of service IDs. This - * re-establishes sync with the service and rewinds the iterator - * to start at the first element. - * @param en the iterator object - * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if - * the iterator is out of sync with its service. - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uenum_reset(UEnumeration* en, UErrorCode* status); - -#if U_SHOW_CPLUSPLUS_API - -/** - * Given a StringEnumeration, wrap it in a UEnumeration. The - * StringEnumeration is adopted; after this call, the caller must not - * delete it (regardless of error status). - * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration. - * @param ec the error code. - * @return a UEnumeration wrapping the adopted StringEnumeration. - * @stable ICU 4.2 - */ -U_STABLE UEnumeration* U_EXPORT2 -uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec); - -#endif - -/** - * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null. - * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. - * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration - * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller. - * @param count length of the array - * @param ec error code - * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory. - * @see uenum_close - * @stable ICU 50 - */ -U_STABLE UEnumeration* U_EXPORT2 -uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count, - UErrorCode* ec); - -/** - * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null. - * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. - * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration - * @param strings array of char* strings (each null terminated). All storage is owned by the caller. - * @param count length of the array - * @param ec error code - * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory - * @see uenum_close - * @stable ICU 50 - */ -U_STABLE UEnumeration* U_EXPORT2 -uenum_openCharStringsEnumeration(const char* const strings[], int32_t count, - UErrorCode* ec); - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uidna.h b/deps/node/deps/icu-small/source/common/unicode/uidna.h deleted file mode 100644 index cb79ba85..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uidna.h +++ /dev/null @@ -1,772 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * - * Copyright (C) 2003-2014, International Business Machines - * Corporation and others. All Rights Reserved. - * - ******************************************************************************* - * file name: uidna.h - * encoding: UTF-8 - * tab size: 8 (not used) - * indentation:4 - * - * created on: 2003feb1 - * created by: Ram Viswanadha - */ - -#ifndef __UIDNA_H__ -#define __UIDNA_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -#include "unicode/localpointer.h" -#include "unicode/parseerr.h" - -/** - * \file - * \brief C API: Internationalizing Domain Names in Applications (IDNA) - * - * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h. - * - * The C API functions which do take a UIDNA * service object pointer - * implement UTS #46 and IDNA2008. - * - * IDNA2003 is obsolete. - * The C API functions which do not take a service object pointer - * implement IDNA2003. They are all deprecated. - */ - -/* - * IDNA option bit set values. - */ -enum { - /** - * Default options value: None of the other options are set. - * For use in static worker and factory methods. - * @stable ICU 2.6 - */ - UIDNA_DEFAULT=0, -#ifndef U_HIDE_DEPRECATED_API - /** - * Option to allow unassigned code points in domain names and labels. - * For use in static worker and factory methods. - *

This option is ignored by the UTS46 implementation. - * (UTS #46 disallows unassigned code points.) - * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. - */ - UIDNA_ALLOW_UNASSIGNED=1, -#endif /* U_HIDE_DEPRECATED_API */ - /** - * Option to check whether the input conforms to the STD3 ASCII rules, - * for example the restriction of labels to LDH characters - * (ASCII Letters, Digits and Hyphen-Minus). - * For use in static worker and factory methods. - * @stable ICU 2.6 - */ - UIDNA_USE_STD3_RULES=2, - /** - * IDNA option to check for whether the input conforms to the BiDi rules. - * For use in static worker and factory methods. - *

This option is ignored by the IDNA2003 implementation. - * (IDNA2003 always performs a BiDi check.) - * @stable ICU 4.6 - */ - UIDNA_CHECK_BIDI=4, - /** - * IDNA option to check for whether the input conforms to the CONTEXTJ rules. - * For use in static worker and factory methods. - *

This option is ignored by the IDNA2003 implementation. - * (The CONTEXTJ check is new in IDNA2008.) - * @stable ICU 4.6 - */ - UIDNA_CHECK_CONTEXTJ=8, - /** - * IDNA option for nontransitional processing in ToASCII(). - * For use in static worker and factory methods. - *

By default, ToASCII() uses transitional processing. - *

This option is ignored by the IDNA2003 implementation. - * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) - * @stable ICU 4.6 - */ - UIDNA_NONTRANSITIONAL_TO_ASCII=0x10, - /** - * IDNA option for nontransitional processing in ToUnicode(). - * For use in static worker and factory methods. - *

By default, ToUnicode() uses transitional processing. - *

This option is ignored by the IDNA2003 implementation. - * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) - * @stable ICU 4.6 - */ - UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20, - /** - * IDNA option to check for whether the input conforms to the CONTEXTO rules. - * For use in static worker and factory methods. - *

This option is ignored by the IDNA2003 implementation. - * (The CONTEXTO check is new in IDNA2008.) - *

This is for use by registries for IDNA2008 conformance. - * UTS #46 does not require the CONTEXTO check. - * @stable ICU 49 - */ - UIDNA_CHECK_CONTEXTO=0x40 -}; - -/** - * Opaque C service object type for the new IDNA API. - * @stable ICU 4.6 - */ -struct UIDNA; -typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */ - -/** - * Returns a UIDNA instance which implements UTS #46. - * Returns an unmodifiable instance, owned by the caller. - * Cache it for multiple operations, and uidna_close() it when done. - * The instance is thread-safe, that is, it can be used concurrently. - * - * For details about the UTS #46 implementation see the IDNA C++ class in idna.h. - * - * @param options Bit set to modify the processing and error checking. - * See option bit set values in uidna.h. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the UTS #46 UIDNA instance, if successful - * @stable ICU 4.6 - */ -U_STABLE UIDNA * U_EXPORT2 -uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode); - -/** - * Closes a UIDNA instance. - * @param idna UIDNA instance to be closed - * @stable ICU 4.6 - */ -U_STABLE void U_EXPORT2 -uidna_close(UIDNA *idna); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUIDNAPointer - * "Smart pointer" class, closes a UIDNA via uidna_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.6 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close); - -U_NAMESPACE_END - -#endif - -/** - * Output container for IDNA processing errors. - * Initialize with UIDNA_INFO_INITIALIZER: - * \code - * UIDNAInfo info = UIDNA_INFO_INITIALIZER; - * int32_t length = uidna_nameToASCII(..., &info, &errorCode); - * if(U_SUCCESS(errorCode) && info.errors!=0) { ... } - * \endcode - * @stable ICU 4.6 - */ -typedef struct UIDNAInfo { - /** sizeof(UIDNAInfo) @stable ICU 4.6 */ - int16_t size; - /** - * Set to TRUE if transitional and nontransitional processing produce different results. - * For details see C++ IDNAInfo::isTransitionalDifferent(). - * @stable ICU 4.6 - */ - UBool isTransitionalDifferent; - UBool reservedB3; /**< Reserved field, do not use. @internal */ - /** - * Bit set indicating IDNA processing errors. 0 if no errors. - * See UIDNA_ERROR_... constants. - * @stable ICU 4.6 - */ - uint32_t errors; - int32_t reservedI2; /**< Reserved field, do not use. @internal */ - int32_t reservedI3; /**< Reserved field, do not use. @internal */ -} UIDNAInfo; - -/** - * Static initializer for a UIDNAInfo struct. - * @stable ICU 4.6 - */ -#define UIDNA_INFO_INITIALIZER { \ - (int16_t)sizeof(UIDNAInfo), \ - FALSE, FALSE, \ - 0, 0, 0 } - -/** - * Converts a single domain name label into its ASCII form for DNS lookup. - * If any processing step fails, then pInfo->errors will be non-zero and - * the result might not be an ASCII string. - * The label might be modified according to the types of errors. - * Labels with severe errors will be left in (or turned into) their Unicode form. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param idna UIDNA instance - * @param label Input domain name label - * @param length Label length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_labelToASCII(const UIDNA *idna, - const UChar *label, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/** - * Converts a single domain name label into its Unicode form for human-readable display. - * If any processing step fails, then pInfo->errors will be non-zero. - * The label might be modified according to the types of errors. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param idna UIDNA instance - * @param label Input domain name label - * @param length Label length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_labelToUnicode(const UIDNA *idna, - const UChar *label, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/** - * Converts a whole domain name into its ASCII form for DNS lookup. - * If any processing step fails, then pInfo->errors will be non-zero and - * the result might not be an ASCII string. - * The domain name might be modified according to the types of errors. - * Labels with severe errors will be left in (or turned into) their Unicode form. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param idna UIDNA instance - * @param name Input domain name - * @param length Domain name length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_nameToASCII(const UIDNA *idna, - const UChar *name, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/** - * Converts a whole domain name into its Unicode form for human-readable display. - * If any processing step fails, then pInfo->errors will be non-zero. - * The domain name might be modified according to the types of errors. - * - * The UErrorCode indicates an error only in exceptional cases, - * such as a U_MEMORY_ALLOCATION_ERROR. - * - * @param idna UIDNA instance - * @param name Input domain name - * @param length Domain name length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_nameToUnicode(const UIDNA *idna, - const UChar *name, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/* UTF-8 versions of the processing methods --------------------------------- */ - -/** - * Converts a single domain name label into its ASCII form for DNS lookup. - * UTF-8 version of uidna_labelToASCII(), same behavior. - * - * @param idna UIDNA instance - * @param label Input domain name label - * @param length Label length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_labelToASCII_UTF8(const UIDNA *idna, - const char *label, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/** - * Converts a single domain name label into its Unicode form for human-readable display. - * UTF-8 version of uidna_labelToUnicode(), same behavior. - * - * @param idna UIDNA instance - * @param label Input domain name label - * @param length Label length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_labelToUnicodeUTF8(const UIDNA *idna, - const char *label, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/** - * Converts a whole domain name into its ASCII form for DNS lookup. - * UTF-8 version of uidna_nameToASCII(), same behavior. - * - * @param idna UIDNA instance - * @param name Input domain name - * @param length Domain name length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_nameToASCII_UTF8(const UIDNA *idna, - const char *name, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/** - * Converts a whole domain name into its Unicode form for human-readable display. - * UTF-8 version of uidna_nameToUnicode(), same behavior. - * - * @param idna UIDNA instance - * @param name Input domain name - * @param length Domain name length, or -1 if NUL-terminated - * @param dest Destination string buffer - * @param capacity Destination buffer capacity - * @param pInfo Output container of IDNA processing details. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return destination string length - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -uidna_nameToUnicodeUTF8(const UIDNA *idna, - const char *name, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode); - -/* - * IDNA error bit set values. - * When a domain name or label fails a processing step or does not meet the - * validity criteria, then one or more of these error bits are set. - */ -enum { - /** - * A non-final domain name label (or the whole domain name) is empty. - * @stable ICU 4.6 - */ - UIDNA_ERROR_EMPTY_LABEL=1, - /** - * A domain name label is longer than 63 bytes. - * (See STD13/RFC1034 3.1. Name space specifications and terminology.) - * This is only checked in ToASCII operations, and only if the output label is all-ASCII. - * @stable ICU 4.6 - */ - UIDNA_ERROR_LABEL_TOO_LONG=2, - /** - * A domain name is longer than 255 bytes in its storage form. - * (See STD13/RFC1034 3.1. Name space specifications and terminology.) - * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII. - * @stable ICU 4.6 - */ - UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4, - /** - * A label starts with a hyphen-minus ('-'). - * @stable ICU 4.6 - */ - UIDNA_ERROR_LEADING_HYPHEN=8, - /** - * A label ends with a hyphen-minus ('-'). - * @stable ICU 4.6 - */ - UIDNA_ERROR_TRAILING_HYPHEN=0x10, - /** - * A label contains hyphen-minus ('-') in the third and fourth positions. - * @stable ICU 4.6 - */ - UIDNA_ERROR_HYPHEN_3_4=0x20, - /** - * A label starts with a combining mark. - * @stable ICU 4.6 - */ - UIDNA_ERROR_LEADING_COMBINING_MARK=0x40, - /** - * A label or domain name contains disallowed characters. - * @stable ICU 4.6 - */ - UIDNA_ERROR_DISALLOWED=0x80, - /** - * A label starts with "xn--" but does not contain valid Punycode. - * That is, an xn-- label failed Punycode decoding. - * @stable ICU 4.6 - */ - UIDNA_ERROR_PUNYCODE=0x100, - /** - * A label contains a dot=full stop. - * This can occur in an input string for a single-label function. - * @stable ICU 4.6 - */ - UIDNA_ERROR_LABEL_HAS_DOT=0x200, - /** - * An ACE label does not contain a valid label string. - * The label was successfully ACE (Punycode) decoded but the resulting - * string had severe validation errors. For example, - * it might contain characters that are not allowed in ACE labels, - * or it might not be normalized. - * @stable ICU 4.6 - */ - UIDNA_ERROR_INVALID_ACE_LABEL=0x400, - /** - * A label does not meet the IDNA BiDi requirements (for right-to-left characters). - * @stable ICU 4.6 - */ - UIDNA_ERROR_BIDI=0x800, - /** - * A label does not meet the IDNA CONTEXTJ requirements. - * @stable ICU 4.6 - */ - UIDNA_ERROR_CONTEXTJ=0x1000, - /** - * A label does not meet the IDNA CONTEXTO requirements for punctuation characters. - * Some punctuation characters "Would otherwise have been DISALLOWED" - * but are allowed in certain contexts. (RFC 5892) - * @stable ICU 49 - */ - UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000, - /** - * A label does not meet the IDNA CONTEXTO requirements for digits. - * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx). - * @stable ICU 49 - */ - UIDNA_ERROR_CONTEXTO_DIGITS=0x4000 -}; - -#ifndef U_HIDE_DEPRECATED_API - -/* IDNA2003 API ------------------------------------------------------------- */ - -/** - * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. - * This operation is done on single labels before sending it to something that expects - * ASCII names. A label is an individual part of a domain name. Labels are usually - * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com". - * - * IDNA2003 API Overview: - * - * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC - * (http://www.ietf.org/rfc/rfc3490.txt). - * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels - * containing non-ASCII code points are processed by the - * ToASCII operation before passing it to resolver libraries. Domain names - * that are obtained from resolver libraries are processed by the - * ToUnicode operation before displaying the domain name to the user. - * IDNA requires that implementations process input strings with Nameprep - * (http://www.ietf.org/rfc/rfc3491.txt), - * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), - * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). - * Implementations of IDNA MUST fully implement Nameprep and Punycode; - * neither Nameprep nor Punycode are optional. - * The input and output of ToASCII and ToUnicode operations are Unicode - * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations - * multiple times to an input string will yield the same result as applying the operation - * once. - * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) - * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string). - * - * @param src Input UChar array containing label in Unicode. - * @param srcLength Number of UChars in src, or -1 if NUL-terminated. - * @param dest Output UChar array with ASCII (ACE encoded) label. - * @param destCapacity Size of dest. - * @param options A bit set of options: - * - * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points - * and do not use STD3 ASCII rules - * If unassigned code points are found the operation fails with - * U_UNASSIGNED_ERROR error code. - * - * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations - * If this option is set, the unassigned code points are in the input - * are treated as normal Unicode code points. - * - * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions - * If this option is set and the input does not satisfy STD3 rules, - * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR - * - * @param parseError Pointer to UParseError struct to receive information on position - * of error if an error is encountered. Can be NULL. - * @param status ICU in/out error code parameter. - * U_INVALID_CHAR_FOUND if src contains - * unmatched single surrogates. - * U_INDEX_OUTOFBOUNDS_ERROR if src contains - * too many code points. - * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. - */ -U_DEPRECATED int32_t U_EXPORT2 -uidna_toASCII(const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status); - - -/** - * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. - * This operation is done on single labels before sending it to something that expects - * Unicode names. A label is an individual part of a domain name. Labels are usually - * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com". - * - * @param src Input UChar array containing ASCII (ACE encoded) label. - * @param srcLength Number of UChars in src, or -1 if NUL-terminated. - * @param dest Output Converted UChar array containing Unicode equivalent of label. - * @param destCapacity Size of dest. - * @param options A bit set of options: - * - * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points - * and do not use STD3 ASCII rules - * If unassigned code points are found the operation fails with - * U_UNASSIGNED_ERROR error code. - * - * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations - * If this option is set, the unassigned code points are in the input - * are treated as normal Unicode code points. Note: This option is - * required on toUnicode operation because the RFC mandates - * verification of decoded ACE input by applying toASCII and comparing - * its output with source - * - * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions - * If this option is set and the input does not satisfy STD3 rules, - * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR - * - * @param parseError Pointer to UParseError struct to receive information on position - * of error if an error is encountered. Can be NULL. - * @param status ICU in/out error code parameter. - * U_INVALID_CHAR_FOUND if src contains - * unmatched single surrogates. - * U_INDEX_OUTOFBOUNDS_ERROR if src contains - * too many code points. - * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. - */ -U_DEPRECATED int32_t U_EXPORT2 -uidna_toUnicode(const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status); - - -/** - * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. - * This operation is done on complete domain names, e.g: "www.example.com". - * It is important to note that this operation can fail. If it fails, then the input - * domain name cannot be used as an Internationalized Domain Name and the application - * should have methods defined to deal with the failure. - * - * Note: IDNA RFC specifies that a conformant application should divide a domain name - * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, - * and then convert. This function does not offer that level of granularity. The options once - * set will apply to all labels in the domain name - * - * @param src Input UChar array containing IDN in Unicode. - * @param srcLength Number of UChars in src, or -1 if NUL-terminated. - * @param dest Output UChar array with ASCII (ACE encoded) IDN. - * @param destCapacity Size of dest. - * @param options A bit set of options: - * - * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points - * and do not use STD3 ASCII rules - * If unassigned code points are found the operation fails with - * U_UNASSIGNED_CODE_POINT_FOUND error code. - * - * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations - * If this option is set, the unassigned code points are in the input - * are treated as normal Unicode code points. - * - * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions - * If this option is set and the input does not satisfy STD3 rules, - * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR - * - * @param parseError Pointer to UParseError struct to receive information on position - * of error if an error is encountered. Can be NULL. - * @param status ICU in/out error code parameter. - * U_INVALID_CHAR_FOUND if src contains - * unmatched single surrogates. - * U_INDEX_OUTOFBOUNDS_ERROR if src contains - * too many code points. - * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. - */ -U_DEPRECATED int32_t U_EXPORT2 -uidna_IDNToASCII( const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status); - -/** - * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. - * This operation is done on complete domain names, e.g: "www.example.com". - * - * Note: IDNA RFC specifies that a conformant application should divide a domain name - * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, - * and then convert. This function does not offer that level of granularity. The options once - * set will apply to all labels in the domain name - * - * @param src Input UChar array containing IDN in ASCII (ACE encoded) form. - * @param srcLength Number of UChars in src, or -1 if NUL-terminated. - * @param dest Output UChar array containing Unicode equivalent of source IDN. - * @param destCapacity Size of dest. - * @param options A bit set of options: - * - * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points - * and do not use STD3 ASCII rules - * If unassigned code points are found the operation fails with - * U_UNASSIGNED_CODE_POINT_FOUND error code. - * - * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations - * If this option is set, the unassigned code points are in the input - * are treated as normal Unicode code points. - * - * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions - * If this option is set and the input does not satisfy STD3 rules, - * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR - * - * @param parseError Pointer to UParseError struct to receive information on position - * of error if an error is encountered. Can be NULL. - * @param status ICU in/out error code parameter. - * U_INVALID_CHAR_FOUND if src contains - * unmatched single surrogates. - * U_INDEX_OUTOFBOUNDS_ERROR if src contains - * too many code points. - * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. - */ -U_DEPRECATED int32_t U_EXPORT2 -uidna_IDNToUnicode( const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status); - -/** - * IDNA2003: Compare two IDN strings for equivalence. - * This function splits the domain names into labels and compares them. - * According to IDN RFC, whenever two labels are compared, they are - * considered equal if and only if their ASCII forms (obtained by - * applying toASCII) match using an case-insensitive ASCII comparison. - * Two domain names are considered a match if and only if all labels - * match regardless of whether label separators match. - * - * @param s1 First source string. - * @param length1 Length of first source string, or -1 if NUL-terminated. - * - * @param s2 Second source string. - * @param length2 Length of second source string, or -1 if NUL-terminated. - * @param options A bit set of options: - * - * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points - * and do not use STD3 ASCII rules - * If unassigned code points are found the operation fails with - * U_UNASSIGNED_CODE_POINT_FOUND error code. - * - * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations - * If this option is set, the unassigned code points are in the input - * are treated as normal Unicode code points. - * - * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions - * If this option is set and the input does not satisfy STD3 rules, - * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR - * - * @param status ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return <0 or 0 or >0 as usual for string comparisons - * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. - */ -U_DEPRECATED int32_t U_EXPORT2 -uidna_compare( const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - int32_t options, - UErrorCode* status); - -#endif /* U_HIDE_DEPRECATED_API */ - -#endif /* #if !UCONFIG_NO_IDNA */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uiter.h b/deps/node/deps/icu-small/source/common/unicode/uiter.h deleted file mode 100644 index 3b853720..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uiter.h +++ /dev/null @@ -1,709 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011 International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uiter.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jan18 -* created by: Markus W. Scherer -*/ - -#ifndef __UITER_H__ -#define __UITER_H__ - -/** - * \file - * \brief C API: Unicode Character Iteration - * - * @see UCharIterator - */ - -#include "unicode/utypes.h" - -#if U_SHOW_CPLUSPLUS_API - U_NAMESPACE_BEGIN - - class CharacterIterator; - class Replaceable; - - U_NAMESPACE_END -#endif - -U_CDECL_BEGIN - -struct UCharIterator; -typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ - -/** - * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). - * @see UCharIteratorMove - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef enum UCharIteratorOrigin { - UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH -} UCharIteratorOrigin; - -/** Constants for UCharIterator. @stable ICU 2.6 */ -enum { - /** - * Constant value that may be returned by UCharIteratorMove - * indicating that the final UTF-16 index is not known, but that the move succeeded. - * This can occur when moving relative to limit or length, or - * when moving relative to the current index after a setState() - * when the current UTF-16 index is not known. - * - * It would be very inefficient to have to count from the beginning of the text - * just to get the current/limit/length index after moving relative to it. - * The actual index can be determined with getIndex(UITER_CURRENT) - * which will count the UChars if necessary. - * - * @stable ICU 2.6 - */ - UITER_UNKNOWN_INDEX=-2 -}; - - -/** - * Constant for UCharIterator getState() indicating an error or - * an unknown state. - * Returned by uiter_getState()/UCharIteratorGetState - * when an error occurs. - * Also, some UCharIterator implementations may not be able to return - * a valid state for each position. This will be clearly documented - * for each such iterator (none of the public ones here). - * - * @stable ICU 2.6 - */ -#define UITER_NO_STATE ((uint32_t)0xffffffff) - -/** - * Function type declaration for UCharIterator.getIndex(). - * - * Gets the current position, or the start or limit of the - * iteration range. - * - * This function may perform slowly for UITER_CURRENT after setState() was called, - * or for UITER_LENGTH, because an iterator implementation may have to count - * UChars if the underlying storage is not UTF-16. - * - * @param iter the UCharIterator structure ("this pointer") - * @param origin get the 0, start, limit, length, or current index - * @return the requested index, or U_SENTINEL in an error condition - * - * @see UCharIteratorOrigin - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef int32_t U_CALLCONV -UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); - -/** - * Function type declaration for UCharIterator.move(). - * - * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). - * - * Moves the current position relative to the start or limit of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code units forward - * or backward by specifying a positive or negative delta. - * Out of bounds movement will be pinned to the start or limit. - * - * This function may perform slowly for moving relative to UITER_LENGTH - * because an iterator implementation may have to count the rest of the - * UChars if the native storage is not UTF-16. - * - * When moving relative to the limit or length, or - * relative to the current position after setState() was called, - * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient - * determination of the actual UTF-16 index. - * The actual index can be determined with getIndex(UITER_CURRENT) - * which will count the UChars if necessary. - * See UITER_UNKNOWN_INDEX for details. - * - * @param iter the UCharIterator structure ("this pointer") - * @param delta can be positive, zero, or negative - * @param origin move relative to the 0, start, limit, length, or current index - * @return the new index, or U_SENTINEL on an error condition, - * or UITER_UNKNOWN_INDEX when the index is not known. - * - * @see UCharIteratorOrigin - * @see UCharIterator - * @see UITER_UNKNOWN_INDEX - * @stable ICU 2.1 - */ -typedef int32_t U_CALLCONV -UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); - -/** - * Function type declaration for UCharIterator.hasNext(). - * - * Check if current() and next() can still - * return another code unit. - * - * @param iter the UCharIterator structure ("this pointer") - * @return boolean value for whether current() and next() can still return another code unit - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UBool U_CALLCONV -UCharIteratorHasNext(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.hasPrevious(). - * - * Check if previous() can still return another code unit. - * - * @param iter the UCharIterator structure ("this pointer") - * @return boolean value for whether previous() can still return another code unit - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UBool U_CALLCONV -UCharIteratorHasPrevious(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.current(). - * - * Return the code unit at the current position, - * or U_SENTINEL if there is none (index is at the limit). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code unit - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UChar32 U_CALLCONV -UCharIteratorCurrent(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.next(). - * - * Return the code unit at the current index and increment - * the index (post-increment, like s[i++]), - * or return U_SENTINEL if there is none (index is at the limit). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code unit (and post-increment the current index) - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UChar32 U_CALLCONV -UCharIteratorNext(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.previous(). - * - * Decrement the index and return the code unit from there - * (pre-decrement, like s[--i]), - * or return U_SENTINEL if there is none (index is at the start). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the previous code unit (after pre-decrementing the current index) - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UChar32 U_CALLCONV -UCharIteratorPrevious(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.reservedFn(). - * Reserved for future use. - * - * @param iter the UCharIterator structure ("this pointer") - * @param something some integer argument - * @return some integer - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef int32_t U_CALLCONV -UCharIteratorReserved(UCharIterator *iter, int32_t something); - -/** - * Function type declaration for UCharIterator.getState(). - * - * Get the "state" of the iterator in the form of a single 32-bit word. - * It is recommended that the state value be calculated to be as small as - * is feasible. For strings with limited lengths, fewer than 32 bits may - * be sufficient. - * - * This is used together with setState()/UCharIteratorSetState - * to save and restore the iterator position more efficiently than with - * getIndex()/move(). - * - * The iterator state is defined as a uint32_t value because it is designed - * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state - * of the character iterator. - * - * With some UCharIterator implementations (e.g., UTF-8), - * getting and setting the UTF-16 index with existing functions - * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but - * relatively slow because the iterator has to "walk" from a known index - * to the requested one. - * This takes more time the farther it needs to go. - * - * An opaque state value allows an iterator implementation to provide - * an internal index (UTF-8: the source byte array index) for - * fast, constant-time restoration. - * - * After calling setState(), a getIndex(UITER_CURRENT) may be slow because - * the UTF-16 index may not be restored as well, but the iterator can deliver - * the correct text contents and move relative to the current position - * without performance degradation. - * - * Some UCharIterator implementations may not be able to return - * a valid state for each position, in which case they return UITER_NO_STATE instead. - * This will be clearly documented for each such iterator (none of the public ones here). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the state word - * - * @see UCharIterator - * @see UCharIteratorSetState - * @see UITER_NO_STATE - * @stable ICU 2.6 - */ -typedef uint32_t U_CALLCONV -UCharIteratorGetState(const UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.setState(). - * - * Restore the "state" of the iterator using a state word from a getState() call. - * The iterator object need not be the same one as for which getState() was called, - * but it must be of the same type (set up using the same uiter_setXYZ function) - * and it must iterate over the same string - * (binary identical regardless of memory address). - * For more about the state word see UCharIteratorGetState. - * - * After calling setState(), a getIndex(UITER_CURRENT) may be slow because - * the UTF-16 index may not be restored as well, but the iterator can deliver - * the correct text contents and move relative to the current position - * without performance degradation. - * - * @param iter the UCharIterator structure ("this pointer") - * @param state the state word from a getState() call - * on a same-type, same-string iterator - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @see UCharIterator - * @see UCharIteratorGetState - * @stable ICU 2.6 - */ -typedef void U_CALLCONV -UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); - - -/** - * C API for code unit iteration. - * This can be used as a C wrapper around - * CharacterIterator, Replaceable, or implemented using simple strings, etc. - * - * There are two roles for using UCharIterator: - * - * A "provider" sets the necessary function pointers and controls the "protected" - * fields of the UCharIterator structure. A "provider" passes a UCharIterator - * into C APIs that need a UCharIterator as an abstract, flexible string interface. - * - * Implementations of such C APIs are "callers" of UCharIterator functions; - * they only use the "public" function pointers and never access the "protected" - * fields directly. - * - * The current() and next() functions only check the current index against the - * limit, and previous() only checks the current index against the start, - * to see if the iterator already reached the end of the iteration range. - * - * The assumption - in all iterators - is that the index is moved via the API, - * which means it won't go out of bounds, or the index is modified by - * user code that knows enough about the iterator implementation to set valid - * index values. - * - * UCharIterator functions return code unit values 0..0xffff, - * or U_SENTINEL if the iteration bounds are reached. - * - * @stable ICU 2.1 - */ -struct UCharIterator { - /** - * (protected) Pointer to string or wrapped object or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - const void *context; - - /** - * (protected) Length of string or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t length; - - /** - * (protected) Start index or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t start; - - /** - * (protected) Current index or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t index; - - /** - * (protected) Limit index or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t limit; - - /** - * (protected) Used by UTF-8 iterators and possibly others. - * @stable ICU 2.1 - */ - int32_t reservedField; - - /** - * (public) Returns the current position or the - * start or limit index of the iteration range. - * - * @see UCharIteratorGetIndex - * @stable ICU 2.1 - */ - UCharIteratorGetIndex *getIndex; - - /** - * (public) Moves the current position relative to the start or limit of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code units forward - * or backward by specifying a positive or negative delta. - * - * @see UCharIteratorMove - * @stable ICU 2.1 - */ - UCharIteratorMove *move; - - /** - * (public) Check if current() and next() can still - * return another code unit. - * - * @see UCharIteratorHasNext - * @stable ICU 2.1 - */ - UCharIteratorHasNext *hasNext; - - /** - * (public) Check if previous() can still return another code unit. - * - * @see UCharIteratorHasPrevious - * @stable ICU 2.1 - */ - UCharIteratorHasPrevious *hasPrevious; - - /** - * (public) Return the code unit at the current position, - * or U_SENTINEL if there is none (index is at the limit). - * - * @see UCharIteratorCurrent - * @stable ICU 2.1 - */ - UCharIteratorCurrent *current; - - /** - * (public) Return the code unit at the current index and increment - * the index (post-increment, like s[i++]), - * or return U_SENTINEL if there is none (index is at the limit). - * - * @see UCharIteratorNext - * @stable ICU 2.1 - */ - UCharIteratorNext *next; - - /** - * (public) Decrement the index and return the code unit from there - * (pre-decrement, like s[--i]), - * or return U_SENTINEL if there is none (index is at the start). - * - * @see UCharIteratorPrevious - * @stable ICU 2.1 - */ - UCharIteratorPrevious *previous; - - /** - * (public) Reserved for future use. Currently NULL. - * - * @see UCharIteratorReserved - * @stable ICU 2.1 - */ - UCharIteratorReserved *reservedFn; - - /** - * (public) Return the state of the iterator, to be restored later with setState(). - * This function pointer is NULL if the iterator does not implement it. - * - * @see UCharIteratorGet - * @stable ICU 2.6 - */ - UCharIteratorGetState *getState; - - /** - * (public) Restore the iterator state from the state word from a call - * to getState(). - * This function pointer is NULL if the iterator does not implement it. - * - * @see UCharIteratorSet - * @stable ICU 2.6 - */ - UCharIteratorSetState *setState; -}; - -/** - * Helper function for UCharIterator to get the code point - * at the current index. - * - * Return the code point that includes the code unit at the current position, - * or U_SENTINEL if there is none (index is at the limit). - * If the current code unit is a lead or trail surrogate, - * then the following or preceding surrogate is used to form - * the code point value. - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code point - * - * @see UCharIterator - * @see U16_GET - * @see UnicodeString::char32At() - * @stable ICU 2.1 - */ -U_STABLE UChar32 U_EXPORT2 -uiter_current32(UCharIterator *iter); - -/** - * Helper function for UCharIterator to get the next code point. - * - * Return the code point at the current index and increment - * the index (post-increment, like s[i++]), - * or return U_SENTINEL if there is none (index is at the limit). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code point (and post-increment the current index) - * - * @see UCharIterator - * @see U16_NEXT - * @stable ICU 2.1 - */ -U_STABLE UChar32 U_EXPORT2 -uiter_next32(UCharIterator *iter); - -/** - * Helper function for UCharIterator to get the previous code point. - * - * Decrement the index and return the code point from there - * (pre-decrement, like s[--i]), - * or return U_SENTINEL if there is none (index is at the start). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the previous code point (after pre-decrementing the current index) - * - * @see UCharIterator - * @see U16_PREV - * @stable ICU 2.1 - */ -U_STABLE UChar32 U_EXPORT2 -uiter_previous32(UCharIterator *iter); - -/** - * Get the "state" of the iterator in the form of a single 32-bit word. - * This is a convenience function that calls iter->getState(iter) - * if iter->getState is not NULL; - * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. - * - * Some UCharIterator implementations may not be able to return - * a valid state for each position, in which case they return UITER_NO_STATE instead. - * This will be clearly documented for each such iterator (none of the public ones here). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the state word - * - * @see UCharIterator - * @see UCharIteratorGetState - * @see UITER_NO_STATE - * @stable ICU 2.6 - */ -U_STABLE uint32_t U_EXPORT2 -uiter_getState(const UCharIterator *iter); - -/** - * Restore the "state" of the iterator using a state word from a getState() call. - * This is a convenience function that calls iter->setState(iter, state, pErrorCode) - * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. - * - * @param iter the UCharIterator structure ("this pointer") - * @param state the state word from a getState() call - * on a same-type, same-string iterator - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @see UCharIterator - * @see UCharIteratorSetState - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); - -/** - * Set up a UCharIterator to iterate over a string. - * - * Sets the UCharIterator function pointers for iteration over the string s - * with iteration boundaries start=index=0 and length=limit=string length. - * The "provider" may set the start, index, and limit values at any time - * within the range 0..length. - * The length field will be ignored. - * - * The string pointer s is set into UCharIterator.context without copying - * or reallocating the string contents. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param s String to iterate over - * @param length Length of s, or -1 if NUL-terminated - * - * @see UCharIterator - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); - -/** - * Set up a UCharIterator to iterate over a UTF-16BE string - * (byte vector with a big-endian pair of bytes per UChar). - * - * Everything works just like with a normal UChar iterator (uiter_setString), - * except that UChars are assembled from byte pairs, - * and that the length argument here indicates an even number of bytes. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param s UTF-16BE string to iterate over - * @param length Length of s as an even number of bytes, or -1 if NUL-terminated - * (NUL means pair of 0 bytes at even index from s) - * - * @see UCharIterator - * @see uiter_setString - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); - -/** - * Set up a UCharIterator to iterate over a UTF-8 string. - * - * Sets the UCharIterator function pointers for iteration over the UTF-8 string s - * with UTF-8 iteration boundaries 0 and length. - * The implementation counts the UTF-16 index on the fly and - * lazily evaluates the UTF-16 length of the text. - * - * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. - * When the reservedField is not 0, then it contains a supplementary code point - * and the UTF-16 index is between the two corresponding surrogates. - * At that point, the UTF-8 index is behind that code point. - * - * The UTF-8 string pointer s is set into UCharIterator.context without copying - * or reallocating the string contents. - * - * getState() returns a state value consisting of - * - the current UTF-8 source byte index (bits 31..1) - * - a flag (bit 0) that indicates whether the UChar position is in the middle - * of a surrogate pair - * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) - * - * getState() cannot also encode the UTF-16 index in the state value. - * move(relative to limit or length), or - * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. - * - * @param iter UCharIterator structure to be set for iteration - * @param s UTF-8 string to iterate over - * @param length Length of s in bytes, or -1 if NUL-terminated - * - * @see UCharIterator - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); - -#if U_SHOW_CPLUSPLUS_API - -/** - * Set up a UCharIterator to wrap around a C++ CharacterIterator. - * - * Sets the UCharIterator function pointers for iteration using the - * CharacterIterator charIter. - * - * The CharacterIterator pointer charIter is set into UCharIterator.context - * without copying or cloning the CharacterIterator object. - * The other "protected" UCharIterator fields are set to 0 and will be ignored. - * The iteration index and boundaries are controlled by the CharacterIterator. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param charIter CharacterIterator to wrap - * - * @see UCharIterator - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); - -/** - * Set up a UCharIterator to iterate over a C++ Replaceable. - * - * Sets the UCharIterator function pointers for iteration over the - * Replaceable rep with iteration boundaries start=index=0 and - * length=limit=rep->length(). - * The "provider" may set the start, index, and limit values at any time - * within the range 0..length=rep->length(). - * The length field will be ignored. - * - * The Replaceable pointer rep is set into UCharIterator.context without copying - * or cloning/reallocating the Replaceable object. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param rep Replaceable to iterate over - * - * @see UCharIterator - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); - -#endif - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uldnames.h b/deps/node/deps/icu-small/source/common/unicode/uldnames.h deleted file mode 100644 index 3a3c0a06..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uldnames.h +++ /dev/null @@ -1,304 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2016, International Business Machines Corporation and -* others. All Rights Reserved. -******************************************************************************* -*/ - -#ifndef __ULDNAMES_H__ -#define __ULDNAMES_H__ - -/** - * \file - * \brief C API: Provides display names of Locale ids and their components. - */ - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" -#include "unicode/uscript.h" -#include "unicode/udisplaycontext.h" - -/** - * Enum used in LocaleDisplayNames::createInstance. - * @stable ICU 4.4 - */ -typedef enum { - /** - * Use standard names when generating a locale name, - * e.g. en_GB displays as 'English (United Kingdom)'. - * @stable ICU 4.4 - */ - ULDN_STANDARD_NAMES = 0, - /** - * Use dialect names, when generating a locale name, - * e.g. en_GB displays as 'British English'. - * @stable ICU 4.4 - */ - ULDN_DIALECT_NAMES -} UDialectHandling; - -/** - * Opaque C service object type for the locale display names API - * @stable ICU 4.4 - */ -struct ULocaleDisplayNames; - -/** - * C typedef for struct ULocaleDisplayNames. - * @stable ICU 4.4 - */ -typedef struct ULocaleDisplayNames ULocaleDisplayNames; - -#if !UCONFIG_NO_FORMATTING - -/** - * Returns an instance of LocaleDisplayNames that returns names - * formatted for the provided locale, using the provided - * dialectHandling. The usual value for dialectHandling is - * ULOC_STANDARD_NAMES. - * - * @param locale the display locale - * @param dialectHandling how to select names for locales - * @return a ULocaleDisplayNames instance - * @param pErrorCode the status code - * @stable ICU 4.4 - */ -U_STABLE ULocaleDisplayNames * U_EXPORT2 -uldn_open(const char * locale, - UDialectHandling dialectHandling, - UErrorCode *pErrorCode); - -/** - * Closes a ULocaleDisplayNames instance obtained from uldn_open(). - * @param ldn the ULocaleDisplayNames instance to be closed - * @stable ICU 4.4 - */ -U_STABLE void U_EXPORT2 -uldn_close(ULocaleDisplayNames *ldn); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalULocaleDisplayNamesPointer - * "Smart pointer" class, closes a ULocaleDisplayNames via uldn_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleDisplayNamesPointer, ULocaleDisplayNames, uldn_close); - -U_NAMESPACE_END - -#endif - -/* getters for state */ - -/** - * Returns the locale used to determine the display names. This is - * not necessarily the same locale passed to {@link #uldn_open}. - * @param ldn the LocaleDisplayNames instance - * @return the display locale - * @stable ICU 4.4 - */ -U_STABLE const char * U_EXPORT2 -uldn_getLocale(const ULocaleDisplayNames *ldn); - -/** - * Returns the dialect handling used in the display names. - * @param ldn the LocaleDisplayNames instance - * @return the dialect handling enum - * @stable ICU 4.4 - */ -U_STABLE UDialectHandling U_EXPORT2 -uldn_getDialectHandling(const ULocaleDisplayNames *ldn); - -/* names for entire locales */ - -/** - * Returns the display name of the provided locale. - * @param ldn the LocaleDisplayNames instance - * @param locale the locale whose display name to return - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_localeDisplayName(const ULocaleDisplayNames *ldn, - const char *locale, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/* names for components of a locale */ - -/** - * Returns the display name of the provided language code. - * @param ldn the LocaleDisplayNames instance - * @param lang the language code whose display name to return - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_languageDisplayName(const ULocaleDisplayNames *ldn, - const char *lang, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/** - * Returns the display name of the provided script. - * @param ldn the LocaleDisplayNames instance - * @param script the script whose display name to return - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, - const char *script, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/** - * Returns the display name of the provided script code. - * @param ldn the LocaleDisplayNames instance - * @param scriptCode the script code whose display name to return - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, - UScriptCode scriptCode, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/** - * Returns the display name of the provided region code. - * @param ldn the LocaleDisplayNames instance - * @param region the region code whose display name to return - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_regionDisplayName(const ULocaleDisplayNames *ldn, - const char *region, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/** - * Returns the display name of the provided variant - * @param ldn the LocaleDisplayNames instance - * @param variant the variant whose display name to return - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_variantDisplayName(const ULocaleDisplayNames *ldn, - const char *variant, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/** - * Returns the display name of the provided locale key - * @param ldn the LocaleDisplayNames instance - * @param key the locale key whose display name to return - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_keyDisplayName(const ULocaleDisplayNames *ldn, - const char *key, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/** - * Returns the display name of the provided value (used with the provided key). - * @param ldn the LocaleDisplayNames instance - * @param key the locale key - * @param value the locale key's value - * @param result receives the display name - * @param maxResultSize the size of the result buffer - * @param pErrorCode the status code - * @return the actual buffer size needed for the display name. If it's - * greater than maxResultSize, the returned name will be truncated. - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, - const char *key, - const char *value, - UChar *result, - int32_t maxResultSize, - UErrorCode *pErrorCode); - -/** -* Returns an instance of LocaleDisplayNames that returns names formatted -* for the provided locale, using the provided UDisplayContext settings. -* -* @param locale The display locale -* @param contexts List of one or more context settings (e.g. for dialect -* handling, capitalization, etc. -* @param length Number of items in the contexts list -* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates -* a failure status, the function will do nothing; otherwise this will be -* updated with any new status from the function. -* @return a ULocaleDisplayNames instance -* @stable ICU 51 -*/ -U_STABLE ULocaleDisplayNames * U_EXPORT2 -uldn_openForContext(const char * locale, UDisplayContext *contexts, - int32_t length, UErrorCode *pErrorCode); - -/** -* Returns the UDisplayContext value for the specified UDisplayContextType. -* @param ldn the ULocaleDisplayNames instance -* @param type the UDisplayContextType whose value to return -* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates -* a failure status, the function will do nothing; otherwise this will be -* updated with any new status from the function. -* @return the UDisplayContextValue for the specified type. -* @stable ICU 51 -*/ -U_STABLE UDisplayContext U_EXPORT2 -uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type, - UErrorCode *pErrorCode); - -#endif /* !UCONFIG_NO_FORMATTING */ -#endif /* __ULDNAMES_H__ */ diff --git a/deps/node/deps/icu-small/source/common/unicode/uloc.h b/deps/node/deps/icu-small/source/common/unicode/uloc.h deleted file mode 100644 index 55310708..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uloc.h +++ /dev/null @@ -1,1272 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File ULOC.H -* -* Modification History: -* -* Date Name Description -* 04/01/97 aliu Creation. -* 08/22/98 stephen JDK 1.2 sync. -* 12/08/98 rtg New C API for Locale -* 03/30/99 damiba overhaul -* 03/31/99 helena Javadoc for uloc functions. -* 04/15/99 Madhu Updated Javadoc -******************************************************************************** -*/ - -#ifndef ULOC_H -#define ULOC_H - -#include "unicode/utypes.h" -#include "unicode/uenum.h" - -/** - * \file - * \brief C API: Locale - * - *

ULoc C API for Locale

- * A Locale represents a specific geographical, political, - * or cultural region. An operation that requires a Locale to perform - * its task is called locale-sensitive and uses the Locale - * to tailor information for the user. For example, displaying a number - * is a locale-sensitive operation--the number should be formatted - * according to the customs/conventions of the user's native country, - * region, or culture. In the C APIs, a locales is simply a const char string. - * - *

- * You create a Locale with one of the three options listed below. - * Each of the component is separated by '_' in the locale string. - * \htmlonly

\endhtmlonly - *
- * \code
- *       newLanguage
- *
- *       newLanguage + newCountry
- *
- *       newLanguage + newCountry + newVariant
- * \endcode
- * 
- * \htmlonly
\endhtmlonly - * The first option is a valid ISO - * Language Code. These codes are the lower-case two-letter - * codes as defined by ISO-639. - * You can find a full list of these codes at a number of sites, such as: - *
- * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt - * - *

- * The second option includes an additional ISO Country - * Code. These codes are the upper-case two-letter codes - * as defined by ISO-3166. - * You can find a full list of these codes at a number of sites, such as: - *
- * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html - * - *

- * The third option requires another additional information--the - * Variant. - * The Variant codes are vendor and browser-specific. - * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX. - * Where there are two variants, separate them with an underscore, and - * put the most important one first. For - * example, a Traditional Spanish collation might be referenced, with - * "ES", "ES", "Traditional_WIN". - * - *

- * Because a Locale is just an identifier for a region, - * no validity check is performed when you specify a Locale. - * If you want to see whether particular resources are available for the - * Locale you asked for, you must query those resources. For - * example, ask the UNumberFormat for the locales it supports - * using its getAvailable method. - *
Note: When you ask for a resource for a particular - * locale, you get back the best available match, not necessarily - * precisely what you asked for. For more information, look at - * UResourceBundle. - * - *

- * The Locale provides a number of convenient constants - * that you can use to specify the commonly used - * locales. For example, the following refers to a locale - * for the United States: - * \htmlonly

\endhtmlonly - *
- * \code
- *       ULOC_US
- * \endcode
- * 
- * \htmlonly
\endhtmlonly - * - *

- * Once you've specified a locale you can query it for information about - * itself. Use uloc_getCountry to get the ISO Country Code and - * uloc_getLanguage to get the ISO Language Code. You can - * use uloc_getDisplayCountry to get the - * name of the country suitable for displaying to the user. Similarly, - * you can use uloc_getDisplayLanguage to get the name of - * the language suitable for displaying to the user. Interestingly, - * the uloc_getDisplayXXX methods are themselves locale-sensitive - * and have two versions: one that uses the default locale and one - * that takes a locale as an argument and displays the name or country in - * a language appropriate to that locale. - * - *

- * The ICU provides a number of services that perform locale-sensitive - * operations. For example, the unum_xxx functions format - * numbers, currency, or percentages in a locale-sensitive manner. - *

- * \htmlonly
\endhtmlonly - *
- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     UNumberFormat *nf;
- *     const char* myLocale = "fr_FR";
- *
- *     nf = unum_open( UNUM_DEFAULT, NULL, success );
- *     unum_close(nf);
- *     nf = unum_open( UNUM_CURRENCY, NULL, success );
- *     unum_close(nf);
- *     nf = unum_open( UNUM_PERCENT, NULL, success );
- *     unum_close(nf);
- * \endcode
- * 
- * \htmlonly
\endhtmlonly - * Each of these methods has two variants; one with an explicit locale - * and one without; the latter using the default locale. - * \htmlonly
\endhtmlonly - *
- * \code
- *
- *     nf = unum_open( UNUM_DEFAULT, myLocale, success );
- *     unum_close(nf);
- *     nf = unum_open( UNUM_CURRENCY, myLocale, success );
- *     unum_close(nf);
- *     nf = unum_open( UNUM_PERCENT, myLocale, success );
- *     unum_close(nf);
- * \endcode
- * 
- * \htmlonly
\endhtmlonly - * A Locale is the mechanism for identifying the kind of services - * (UNumberFormat) that you would like to get. The locale is - * just a mechanism for identifying these services. - * - *

- * Each international service that performs locale-sensitive operations - * allows you - * to get all the available objects of that type. You can sift - * through these objects by language, country, or variant, - * and use the display names to present a menu to the user. - * For example, you can create a menu of all the collation objects - * suitable for a given language. Such classes implement these - * three class methods: - * \htmlonly

\endhtmlonly - *
- * \code
- *       const char* uloc_getAvailable(int32_t index);
- *       int32_t uloc_countAvailable();
- *       int32_t
- *       uloc_getDisplayName(const char* localeID,
- *                 const char* inLocaleID,
- *                 UChar* result,
- *                 int32_t maxResultSize,
- *                  UErrorCode* err);
- *
- * \endcode
- * 
- * \htmlonly
\endhtmlonly - *

- * Concerning POSIX/RFC1766 Locale IDs, - * the getLanguage/getCountry/getVariant/getName functions do understand - * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT - * and if there is not an ICU-stype variant, uloc_getVariant() for example - * will return the one listed after the \@at sign. As well, the hyphen - * "-" is recognized as a country/variant separator similarly to RFC1766. - * So for example, "en-us" will be interpreted as en_US. - * As a result, uloc_getName() is far from a no-op, and will have the - * effect of converting POSIX/RFC1766 IDs into ICU form, although it does - * NOT map any of the actual codes (i.e. russian->ru) in any way. - * Applications should call uloc_getName() at the point where a locale ID - * is coming from an external source (user entry, OS, web browser) - * and pass the resulting string to other ICU functions. For example, - * don't use de-de\@EURO as an argument to resourcebundle. - * - * @see UResourceBundle - */ - -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_CHINESE "zh" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_ENGLISH "en" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_FRENCH "fr" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_GERMAN "de" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_ITALIAN "it" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_JAPANESE "ja" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_KOREAN "ko" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_SIMPLIFIED_CHINESE "zh_CN" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_TRADITIONAL_CHINESE "zh_TW" - -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_CANADA "en_CA" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_CANADA_FRENCH "fr_CA" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_CHINA "zh_CN" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_PRC "zh_CN" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_FRANCE "fr_FR" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_GERMANY "de_DE" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_ITALY "it_IT" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_JAPAN "ja_JP" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_KOREA "ko_KR" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_TAIWAN "zh_TW" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_UK "en_GB" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_US "en_US" - -/** - * Useful constant for the maximum size of the language part of a locale ID. - * (including the terminating NULL). - * @stable ICU 2.0 - */ -#define ULOC_LANG_CAPACITY 12 - -/** - * Useful constant for the maximum size of the country part of a locale ID - * (including the terminating NULL). - * @stable ICU 2.0 - */ -#define ULOC_COUNTRY_CAPACITY 4 -/** - * Useful constant for the maximum size of the whole locale ID - * (including the terminating NULL and all keywords). - * @stable ICU 2.0 - */ -#define ULOC_FULLNAME_CAPACITY 157 - -/** - * Useful constant for the maximum size of the script part of a locale ID - * (including the terminating NULL). - * @stable ICU 2.8 - */ -#define ULOC_SCRIPT_CAPACITY 6 - -/** - * Useful constant for the maximum size of keywords in a locale - * @stable ICU 2.8 - */ -#define ULOC_KEYWORDS_CAPACITY 96 - -/** - * Useful constant for the maximum total size of keywords and their values in a locale - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 - -/** - * Invariant character separating keywords from the locale string - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_SEPARATOR '@' - -/** - * Unicode code point for '@' separating keywords from the locale string. - * @see ULOC_KEYWORD_SEPARATOR - * @stable ICU 4.6 - */ -#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40 - -/** - * Invariant character for assigning value to a keyword - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_ASSIGN '=' - -/** - * Unicode code point for '=' for assigning value to a keyword. - * @see ULOC_KEYWORD_ASSIGN - * @stable ICU 4.6 - */ -#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D - -/** - * Invariant character separating keywords - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_ITEM_SEPARATOR ';' - -/** - * Unicode code point for ';' separating keywords - * @see ULOC_KEYWORD_ITEM_SEPARATOR - * @stable ICU 4.6 - */ -#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B - -/** - * Constants for *_getLocale() - * Allow user to select whether she wants information on - * requested, valid or actual locale. - * For example, a collator for "en_US_CALIFORNIA" was - * requested. In the current state of ICU (2.0), - * the requested locale is "en_US_CALIFORNIA", - * the valid locale is "en_US" (most specific locale supported by ICU) - * and the actual locale is "root" (the collation data comes unmodified - * from the UCA) - * The locale is considered supported by ICU if there is a core ICU bundle - * for that locale (although it may be empty). - * @stable ICU 2.1 - */ -typedef enum { - /** This is locale the data actually comes from - * @stable ICU 2.1 - */ - ULOC_ACTUAL_LOCALE = 0, - /** This is the most specific locale supported by ICU - * @stable ICU 2.1 - */ - ULOC_VALID_LOCALE = 1, - -#ifndef U_HIDE_DEPRECATED_API - /** This is the requested locale - * @deprecated ICU 2.8 - */ - ULOC_REQUESTED_LOCALE = 2, - - /** - * One more than the highest normal ULocDataLocaleType value. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - ULOC_DATA_LOCALE_TYPE_LIMIT = 3 -#endif // U_HIDE_DEPRECATED_API -} ULocDataLocaleType; - -#ifndef U_HIDE_SYSTEM_API -/** - * Gets ICU's default locale. - * The returned string is a snapshot in time, and will remain valid - * and unchanged even when uloc_setDefault() is called. - * The returned storage is owned by ICU, and must not be altered or deleted - * by the caller. - * - * @return the ICU default locale - * @system - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getDefault(void); - -/** - * Sets ICU's default locale. - * By default (without calling this function), ICU's default locale will be based - * on information obtained from the underlying system environment. - *

- * Changes to ICU's default locale do not propagate back to the - * system environment. - *

- * Changes to ICU's default locale to not affect any ICU services that - * may already be open based on the previous default locale value. - * - * @param localeID the new ICU default locale. A value of NULL will try to get - * the system's default locale. - * @param status the error information if the setting of default locale fails - * @system - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -uloc_setDefault(const char* localeID, - UErrorCode* status); -#endif /* U_HIDE_SYSTEM_API */ - -/** - * Gets the language code for the specified locale. - * - * @param localeID the locale to get the ISO language code with - * @param language the language code for localeID - * @param languageCapacity the size of the language buffer to store the - * language code with - * @param err error information if retrieving the language code failed - * @return the actual buffer size needed for the language code. If it's greater - * than languageCapacity, the returned language code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getLanguage(const char* localeID, - char* language, - int32_t languageCapacity, - UErrorCode* err); - -/** - * Gets the script code for the specified locale. - * - * @param localeID the locale to get the ISO language code with - * @param script the language code for localeID - * @param scriptCapacity the size of the language buffer to store the - * language code with - * @param err error information if retrieving the language code failed - * @return the actual buffer size needed for the language code. If it's greater - * than scriptCapacity, the returned language code will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getScript(const char* localeID, - char* script, - int32_t scriptCapacity, - UErrorCode* err); - -/** - * Gets the country code for the specified locale. - * - * @param localeID the locale to get the country code with - * @param country the country code for localeID - * @param countryCapacity the size of the country buffer to store the - * country code with - * @param err error information if retrieving the country code failed - * @return the actual buffer size needed for the country code. If it's greater - * than countryCapacity, the returned country code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getCountry(const char* localeID, - char* country, - int32_t countryCapacity, - UErrorCode* err); - -/** - * Gets the variant code for the specified locale. - * - * @param localeID the locale to get the variant code with - * @param variant the variant code for localeID - * @param variantCapacity the size of the variant buffer to store the - * variant code with - * @param err error information if retrieving the variant code failed - * @return the actual buffer size needed for the variant code. If it's greater - * than variantCapacity, the returned variant code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getVariant(const char* localeID, - char* variant, - int32_t variantCapacity, - UErrorCode* err); - - -/** - * Gets the full name for the specified locale. - * Note: This has the effect of 'canonicalizing' the ICU locale ID to - * a certain extent. Upper and lower case are set as needed. - * It does NOT map aliased names in any way. - * See the top of this header file. - * This API supports preflighting. - * - * @param localeID the locale to get the full name with - * @param name fill in buffer for the name without keywords. - * @param nameCapacity capacity of the fill in buffer. - * @param err error information if retrieving the full name failed - * @return the actual buffer size needed for the full name. If it's greater - * than nameCapacity, the returned full name will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getName(const char* localeID, - char* name, - int32_t nameCapacity, - UErrorCode* err); - -/** - * Gets the full name for the specified locale. - * Note: This has the effect of 'canonicalizing' the string to - * a certain extent. Upper and lower case are set as needed, - * and if the components were in 'POSIX' format they are changed to - * ICU format. It does NOT map aliased names in any way. - * See the top of this header file. - * - * @param localeID the locale to get the full name with - * @param name the full name for localeID - * @param nameCapacity the size of the name buffer to store the - * full name with - * @param err error information if retrieving the full name failed - * @return the actual buffer size needed for the full name. If it's greater - * than nameCapacity, the returned full name will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_canonicalize(const char* localeID, - char* name, - int32_t nameCapacity, - UErrorCode* err); - -/** - * Gets the ISO language code for the specified locale. - * - * @param localeID the locale to get the ISO language code with - * @return language the ISO language code for localeID - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getISO3Language(const char* localeID); - - -/** - * Gets the ISO country code for the specified locale. - * - * @param localeID the locale to get the ISO country code with - * @return country the ISO country code for localeID - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getISO3Country(const char* localeID); - -/** - * Gets the Win32 LCID value for the specified locale. - * If the ICU locale is not recognized by Windows, 0 will be returned. - * - * LCIDs were deprecated with Windows Vista and Microsoft recommends - * that developers use BCP47 style tags instead (uloc_toLanguageTag). - * - * @param localeID the locale to get the Win32 LCID value with - * @return country the Win32 LCID for localeID - * @stable ICU 2.0 - */ -U_STABLE uint32_t U_EXPORT2 -uloc_getLCID(const char* localeID); - -/** - * Gets the language name suitable for display for the specified locale. - * - * @param locale the locale to get the ISO language code with - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". - * @param language the displayable language code for localeID - * @param languageCapacity the size of the language buffer to store the - * displayable language code with - * @param status error information if retrieving the displayable language code failed - * @return the actual buffer size needed for the displayable language code. If it's greater - * than languageCapacity, the returned language code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayLanguage(const char* locale, - const char* displayLocale, - UChar* language, - int32_t languageCapacity, - UErrorCode* status); - -/** - * Gets the script name suitable for display for the specified locale. - * - * @param locale the locale to get the displayable script code with. NULL may be used to specify the default. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "", while passing Locale::getGerman() - * for inLocale would result in "". NULL may be used to specify the default. - * @param script the displayable script for the localeID - * @param scriptCapacity the size of the script buffer to store the - * displayable script code with - * @param status error information if retrieving the displayable script code failed - * @return the actual buffer size needed for the displayable script code. If it's greater - * than scriptCapacity, the returned displayable script code will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayScript(const char* locale, - const char* displayLocale, - UChar* script, - int32_t scriptCapacity, - UErrorCode* status); - -/** - * Gets the country name suitable for display for the specified locale. - * Warning: this is for the region part of a valid locale ID; it cannot just be the region code (like "FR"). - * To get the display name for a region alone, or for other options, use ULocaleDisplayNames instead. - * - * @param locale the locale to get the displayable country code with. NULL may be used to specify the default. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param country the displayable country code for localeID - * @param countryCapacity the size of the country buffer to store the - * displayable country code with - * @param status error information if retrieving the displayable country code failed - * @return the actual buffer size needed for the displayable country code. If it's greater - * than countryCapacity, the returned displayable country code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayCountry(const char* locale, - const char* displayLocale, - UChar* country, - int32_t countryCapacity, - UErrorCode* status); - - -/** - * Gets the variant name suitable for display for the specified locale. - * - * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param variant the displayable variant code for localeID - * @param variantCapacity the size of the variant buffer to store the - * displayable variant code with - * @param status error information if retrieving the displayable variant code failed - * @return the actual buffer size needed for the displayable variant code. If it's greater - * than variantCapacity, the returned displayable variant code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayVariant(const char* locale, - const char* displayLocale, - UChar* variant, - int32_t variantCapacity, - UErrorCode* status); - -/** - * Gets the keyword name suitable for display for the specified locale. - * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display - * string for the keyword collation. - * Usage: - * - * UErrorCode status = U_ZERO_ERROR; - * const char* keyword =NULL; - * int32_t keywordLen = 0; - * int32_t keywordCount = 0; - * UChar displayKeyword[256]; - * int32_t displayKeywordLen = 0; - * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status); - * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){ - * if(U_FAILURE(status)){ - * ...something went wrong so handle the error... - * break; - * } - * // the uenum_next returns NUL terminated string - * keyword = uenum_next(keywordEnum, &keywordLen, &status); - * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256); - * ... do something interesting ..... - * } - * uenum_close(keywordEnum); - * - * @param keyword The keyword whose display string needs to be returned. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param dest the buffer to which the displayable keyword should be written. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param status error information if retrieving the displayable string failed. - * Should not be NULL and should not indicate failure on entry. - * @return the actual buffer size needed for the displayable variant code. - * @see #uloc_openKeywords - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayKeyword(const char* keyword, - const char* displayLocale, - UChar* dest, - int32_t destCapacity, - UErrorCode* status); -/** - * Gets the value of the keyword suitable for display for the specified locale. - * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display - * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword. - * - * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default. - * @param keyword The keyword for whose value should be used. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param dest the buffer to which the displayable keyword should be written. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param status error information if retrieving the displayable string failed. - * Should not be NULL and must not indicate failure on entry. - * @return the actual buffer size needed for the displayable variant code. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayKeywordValue( const char* locale, - const char* keyword, - const char* displayLocale, - UChar* dest, - int32_t destCapacity, - UErrorCode* status); -/** - * Gets the full name suitable for display for the specified locale. - * - * @param localeID the locale to get the displayable name with. NULL may be used to specify the default. - * @param inLocaleID Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param result the displayable name for localeID - * @param maxResultSize the size of the name buffer to store the - * displayable full name with - * @param err error information if retrieving the displayable name failed - * @return the actual buffer size needed for the displayable name. If it's greater - * than maxResultSize, the returned displayable name will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayName(const char* localeID, - const char* inLocaleID, - UChar* result, - int32_t maxResultSize, - UErrorCode* err); - - -/** - * Gets the specified locale from a list of all available locales. - * The return value is a pointer to an item of - * a locale name array. Both this array and the pointers - * it contains are owned by ICU and should not be deleted or written through - * by the caller. The locale name is terminated by a null pointer. - * @param n the specific locale name index of the available locale list - * @return a specified locale name of all available locales - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getAvailable(int32_t n); - -/** - * Gets the size of the all available locale list. - * - * @return the size of the locale list - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void); - -/** - * - * Gets a list of all available 2-letter language codes defined in ISO 639, - * plus additional 3-letter codes determined to be useful for locale generation as - * defined by Unicode CLDR. This is a pointer - * to an array of pointers to arrays of char. All of these pointers are owned - * by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - * @return a list of all available language codes - * @stable ICU 2.0 - */ -U_STABLE const char* const* U_EXPORT2 -uloc_getISOLanguages(void); - -/** - * - * Gets a list of all available 2-letter country codes defined in ISO 639. This is a - * pointer to an array of pointers to arrays of char. All of these pointers are - * owned by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - * @return a list of all available country codes - * @stable ICU 2.0 - */ -U_STABLE const char* const* U_EXPORT2 -uloc_getISOCountries(void); - -/** - * Truncate the locale ID string to get the parent locale ID. - * Copies the part of the string before the last underscore. - * The parent locale ID will be an empty string if there is no - * underscore, or if there is only one underscore at localeID[0]. - * - * @param localeID Input locale ID string. - * @param parent Output string buffer for the parent locale ID. - * @param parentCapacity Size of the output buffer. - * @param err A UErrorCode value. - * @return The length of the parent locale ID. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getParent(const char* localeID, - char* parent, - int32_t parentCapacity, - UErrorCode* err); - - - - -/** - * Gets the full name for the specified locale, like uloc_getName(), - * but without keywords. - * - * Note: This has the effect of 'canonicalizing' the string to - * a certain extent. Upper and lower case are set as needed, - * and if the components were in 'POSIX' format they are changed to - * ICU format. It does NOT map aliased names in any way. - * See the top of this header file. - * - * This API strips off the keyword part, so "de_DE\@collation=phonebook" - * will become "de_DE". - * This API supports preflighting. - * - * @param localeID the locale to get the full name with - * @param name fill in buffer for the name without keywords. - * @param nameCapacity capacity of the fill in buffer. - * @param err error information if retrieving the full name failed - * @return the actual buffer size needed for the full name. If it's greater - * than nameCapacity, the returned full name will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getBaseName(const char* localeID, - char* name, - int32_t nameCapacity, - UErrorCode* err); - -/** - * Gets an enumeration of keywords for the specified locale. Enumeration - * must get disposed of by the client using uenum_close function. - * - * @param localeID the locale to get the variant code with - * @param status error information if retrieving the keywords failed - * @return enumeration of keywords or NULL if there are no keywords. - * @stable ICU 2.8 - */ -U_STABLE UEnumeration* U_EXPORT2 -uloc_openKeywords(const char* localeID, - UErrorCode* status); - -/** - * Get the value for a keyword. Locale name does not need to be normalized. - * - * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK") - * @param keywordName name of the keyword for which we want the value; must not be - * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. - * @param buffer receiving buffer - * @param bufferCapacity capacity of receiving buffer - * @param status containing error code: e.g. buffer not big enough or ill-formed localeID - * or keywordName parameters. - * @return the length of keyword value - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getKeywordValue(const char* localeID, - const char* keywordName, - char* buffer, int32_t bufferCapacity, - UErrorCode* status); - - -/** - * Sets or removes the value of the specified keyword. - * - * For removing all keywords, use uloc_getBaseName(). - * - * NOTE: Unlike almost every other ICU function which takes a - * buffer, this function will NOT truncate the output text, and will - * not update the buffer with unterminated text setting a status of - * U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received, - * it means a terminated version of the updated locale ID would not fit - * in the buffer, and the original buffer is untouched. This is done to - * prevent incorrect or possibly even malformed locales from being generated - * and used. - * - * @param keywordName name of the keyword to be set; must not be - * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. - * @param keywordValue value of the keyword to be set. If 0-length or - * NULL, will result in the keyword being removed; no error is given if - * that keyword does not exist. Otherwise, must consist only of - * [A-Za-z0-9] and [/_+-]. - * @param buffer input buffer containing well-formed locale ID to be - * modified. - * @param bufferCapacity capacity of receiving buffer - * @param status containing error code: e.g. buffer not big enough - * or ill-formed keywordName or keywordValue parameters, or ill-formed - * locale ID in buffer on input. - * @return the length needed for the buffer - * @see uloc_getKeywordValue - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_setKeywordValue(const char* keywordName, - const char* keywordValue, - char* buffer, int32_t bufferCapacity, - UErrorCode* status); - -/** - * Returns whether the locale's script is written right-to-left. - * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). - * If no likely script is known, then FALSE is returned. - * - * A script is right-to-left according to the CLDR script metadata - * which corresponds to whether the script's letters have Bidi_Class=R or AL. - * - * Returns TRUE for "ar" and "en-Hebr", FALSE for "zh" and "fa-Cyrl". - * - * @param locale input locale ID - * @return TRUE if the locale's script is written right-to-left - * @stable ICU 54 - */ -U_STABLE UBool U_EXPORT2 -uloc_isRightToLeft(const char *locale); - -/** - * enums for the return value for the character and line orientation - * functions. - * @stable ICU 4.0 - */ -typedef enum { - ULOC_LAYOUT_LTR = 0, /* left-to-right. */ - ULOC_LAYOUT_RTL = 1, /* right-to-left. */ - ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */ - ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */ - ULOC_LAYOUT_UNKNOWN -} ULayoutType; - -/** - * Get the layout character orientation for the specified locale. - * - * @param localeId locale name - * @param status Error status - * @return an enum indicating the layout orientation for characters. - * @stable ICU 4.0 - */ -U_STABLE ULayoutType U_EXPORT2 -uloc_getCharacterOrientation(const char* localeId, - UErrorCode *status); - -/** - * Get the layout line orientation for the specified locale. - * - * @param localeId locale name - * @param status Error status - * @return an enum indicating the layout orientation for lines. - * @stable ICU 4.0 - */ -U_STABLE ULayoutType U_EXPORT2 -uloc_getLineOrientation(const char* localeId, - UErrorCode *status); - -/** - * enums for the 'outResult' parameter return value - * @see uloc_acceptLanguageFromHTTP - * @see uloc_acceptLanguage - * @stable ICU 3.2 - */ -typedef enum { - ULOC_ACCEPT_FAILED = 0, /* No exact match was found. */ - ULOC_ACCEPT_VALID = 1, /* An exact match was found. */ - ULOC_ACCEPT_FALLBACK = 2 /* A fallback was found, for example, - Accept list contained 'ja_JP' - which matched available locale 'ja'. */ -} UAcceptResult; - - -/** - * Based on a HTTP header from a web browser and a list of available locales, - * determine an acceptable locale for the user. - * @param result - buffer to accept the result locale - * @param resultAvailable the size of the result buffer. - * @param outResult - An out parameter that contains the fallback status - * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP. - * @param availableLocales - list of available locales to match - * @param status Error status, may be BUFFER_OVERFLOW_ERROR - * @return length needed for the locale. - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, - UAcceptResult *outResult, - const char *httpAcceptLanguage, - UEnumeration* availableLocales, - UErrorCode *status); - -/** - * Based on a list of available locales, - * determine an acceptable locale for the user. - * @param result - buffer to accept the result locale - * @param resultAvailable the size of the result buffer. - * @param outResult - An out parameter that contains the fallback status - * @param acceptList - list of acceptable languages - * @param acceptListCount - count of acceptList items - * @param availableLocales - list of available locales to match - * @param status Error status, may be BUFFER_OVERFLOW_ERROR - * @return length needed for the locale. - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_acceptLanguage(char *result, int32_t resultAvailable, - UAcceptResult *outResult, const char **acceptList, - int32_t acceptListCount, - UEnumeration* availableLocales, - UErrorCode *status); - - -/** - * Gets the ICU locale ID for the specified Win32 LCID value. - * - * @param hostID the Win32 LCID to translate - * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated - * if there is room. - * @param localeCapacity the size of the output buffer - * @param status an error is returned if the LCID is unrecognized or the output buffer - * is too small - * @return actual the actual size of the locale ID, not including NUL-termination - * @stable ICU 3.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity, - UErrorCode *status); - - -/** - * Add the likely subtags for a provided locale ID, per the algorithm described - * in the following CLDR technical report: - * - * http://www.unicode.org/reports/tr35/#Likely_Subtags - * - * If localeID is already in the maximal form, or there is no data available - * for maximization, it will be copied to the output buffer. For example, - * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. - * - * Examples: - * - * "en" maximizes to "en_Latn_US" - * - * "de" maximizes to "de_Latn_US" - * - * "sr" maximizes to "sr_Cyrl_RS" - * - * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) - * - * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) - * - * @param localeID The locale to maximize - * @param maximizedLocaleID The maximized locale - * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer - * @param err Error information if maximizing the locale failed. If the length - * of the localeID and the null-terminator is greater than the maximum allowed size, - * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. - * @return The actual buffer size needed for the maximized locale. If it's - * greater than maximizedLocaleIDCapacity, the returned ID will be truncated. - * On error, the return value is -1. - * @stable ICU 4.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_addLikelySubtags(const char* localeID, - char* maximizedLocaleID, - int32_t maximizedLocaleIDCapacity, - UErrorCode* err); - - -/** - * Minimize the subtags for a provided locale ID, per the algorithm described - * in the following CLDR technical report: - * - * http://www.unicode.org/reports/tr35/#Likely_Subtags - * - * If localeID is already in the minimal form, or there is no data available - * for minimization, it will be copied to the output buffer. Since the - * minimization algorithm relies on proper maximization, see the comments - * for uloc_addLikelySubtags for reasons why there might not be any data. - * - * Examples: - * - * "en_Latn_US" minimizes to "en" - * - * "de_Latn_US" minimizes to "de" - * - * "sr_Cyrl_RS" minimizes to "sr" - * - * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the - * script, and minimizing to "zh" would imply "zh_Hans_CN".) - * - * @param localeID The locale to minimize - * @param minimizedLocaleID The minimized locale - * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer - * @param err Error information if minimizing the locale failed. If the length - * of the localeID and the null-terminator is greater than the maximum allowed size, - * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. - * @return The actual buffer size needed for the minimized locale. If it's - * greater than minimizedLocaleIDCapacity, the returned ID will be truncated. - * On error, the return value is -1. - * @stable ICU 4.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_minimizeSubtags(const char* localeID, - char* minimizedLocaleID, - int32_t minimizedLocaleIDCapacity, - UErrorCode* err); - -/** - * Returns a locale ID for the specified BCP47 language tag string. - * If the specified language tag contains any ill-formed subtags, - * the first such subtag and all following subtags are ignored. - *

- * This implements the 'Language-Tag' production of BCP47, and so - * supports grandfathered (regular and irregular) as well as private - * use language tags. Private use tags are represented as 'x-whatever', - * and grandfathered tags are converted to their canonical replacements - * where they exist. Note that a few grandfathered tags have no modern - * replacement, these will be converted using the fallback described in - * the first paragraph, so some information might be lost. - * @param langtag the input BCP47 language tag. - * @param localeID the output buffer receiving a locale ID for the - * specified BCP47 language tag. - * @param localeIDCapacity the size of the locale ID output buffer. - * @param parsedLength if not NULL, successfully parsed length - * for the input language tag is set. - * @param err error information if receiving the locald ID - * failed. - * @return the length of the locale ID. - * @stable ICU 4.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_forLanguageTag(const char* langtag, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* err); - -/** - * Returns a well-formed language tag for this locale ID. - *

- * Note: When strict is FALSE, any locale - * fields which do not satisfy the BCP47 syntax requirement will - * be omitted from the result. When strict is - * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the - * err if any locale fields do not satisfy the - * BCP47 syntax requirement. - * @param localeID the input locale ID - * @param langtag the output buffer receiving BCP47 language - * tag for the locale ID. - * @param langtagCapacity the size of the BCP47 language tag - * output buffer. - * @param strict boolean value indicating if the function returns - * an error for an ill-formed input locale ID. - * @param err error information if receiving the language - * tag failed. - * @return The length of the BCP47 language tag. - * @stable ICU 4.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_toLanguageTag(const char* localeID, - char* langtag, - int32_t langtagCapacity, - UBool strict, - UErrorCode* err); - -/** - * Converts the specified keyword (legacy key, or BCP 47 Unicode locale - * extension key) to the equivalent BCP 47 Unicode locale extension key. - * For example, BCP 47 Unicode locale extension key "co" is returned for - * the input keyword "collation". - *

- * When the specified keyword is unknown, but satisfies the BCP syntax, - * then the pointer to the input keyword itself will be returned. - * For example, - * uloc_toUnicodeLocaleKey("ZZ") returns "ZZ". - * - * @param keyword the input locale keyword (either legacy key - * such as "collation" or BCP 47 Unicode locale extension - * key such as "co"). - * @return the well-formed BCP 47 Unicode locale extension key, - * or NULL if the specified locale keyword cannot be - * mapped to a well-formed BCP 47 Unicode locale extension - * key. - * @see uloc_toLegacyKey - * @stable ICU 54 - */ -U_STABLE const char* U_EXPORT2 -uloc_toUnicodeLocaleKey(const char* keyword); - -/** - * Converts the specified keyword value (legacy type, or BCP 47 - * Unicode locale extension type) to the well-formed BCP 47 Unicode locale - * extension type for the specified keyword (category). For example, BCP 47 - * Unicode locale extension type "phonebk" is returned for the input - * keyword value "phonebook", with the keyword "collation" (or "co"). - *

- * When the specified keyword is not recognized, but the specified value - * satisfies the syntax of the BCP 47 Unicode locale extension type, - * or when the specified keyword allows 'variable' type and the specified - * value satisfies the syntax, then the pointer to the input type value itself - * will be returned. - * For example, - * uloc_toUnicodeLocaleType("Foo", "Bar") returns "Bar", - * uloc_toUnicodeLocaleType("variableTop", "00A4") returns "00A4". - * - * @param keyword the locale keyword (either legacy key such as - * "collation" or BCP 47 Unicode locale extension - * key such as "co"). - * @param value the locale keyword value (either legacy type - * such as "phonebook" or BCP 47 Unicode locale extension - * type such as "phonebk"). - * @return the well-formed BCP47 Unicode locale extension type, - * or NULL if the locale keyword value cannot be mapped to - * a well-formed BCP 47 Unicode locale extension type. - * @see uloc_toLegacyType - * @stable ICU 54 - */ -U_STABLE const char* U_EXPORT2 -uloc_toUnicodeLocaleType(const char* keyword, const char* value); - -/** - * Converts the specified keyword (BCP 47 Unicode locale extension key, or - * legacy key) to the legacy key. For example, legacy key "collation" is - * returned for the input BCP 47 Unicode locale extension key "co". - * - * @param keyword the input locale keyword (either BCP 47 Unicode locale - * extension key or legacy key). - * @return the well-formed legacy key, or NULL if the specified - * keyword cannot be mapped to a well-formed legacy key. - * @see toUnicodeLocaleKey - * @stable ICU 54 - */ -U_STABLE const char* U_EXPORT2 -uloc_toLegacyKey(const char* keyword); - -/** - * Converts the specified keyword value (BCP 47 Unicode locale extension type, - * or legacy type or type alias) to the canonical legacy type. For example, - * the legacy type "phonebook" is returned for the input BCP 47 Unicode - * locale extension type "phonebk" with the keyword "collation" (or "co"). - *

- * When the specified keyword is not recognized, but the specified value - * satisfies the syntax of legacy key, or when the specified keyword - * allows 'variable' type and the specified value satisfies the syntax, - * then the pointer to the input type value itself will be returned. - * For example, - * uloc_toLegacyType("Foo", "Bar") returns "Bar", - * uloc_toLegacyType("vt", "00A4") returns "00A4". - * - * @param keyword the locale keyword (either legacy keyword such as - * "collation" or BCP 47 Unicode locale extension - * key such as "co"). - * @param value the locale keyword value (either BCP 47 Unicode locale - * extension type such as "phonebk" or legacy keyword value - * such as "phonebook"). - * @return the well-formed legacy type, or NULL if the specified - * keyword value cannot be mapped to a well-formed legacy - * type. - * @see toUnicodeLocaleType - * @stable ICU 54 - */ -U_STABLE const char* U_EXPORT2 -uloc_toLegacyType(const char* keyword, const char* value); - -#endif /*_ULOC*/ diff --git a/deps/node/deps/icu-small/source/common/unicode/umachine.h b/deps/node/deps/icu-small/source/common/unicode/umachine.h deleted file mode 100644 index 6d932cfc..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/umachine.h +++ /dev/null @@ -1,413 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: umachine.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep13 -* created by: Markus W. Scherer -* -* This file defines basic types and constants for ICU to be -* platform-independent. umachine.h and utf.h are included into -* utypes.h to provide all the general definitions for ICU. -* All of these definitions used to be in utypes.h before -* the UTF-handling macros made this unmaintainable. -*/ - -#ifndef __UMACHINE_H__ -#define __UMACHINE_H__ - - -/** - * \file - * \brief Basic types and constants for UTF - * - *

Basic types and constants for UTF

- * This file defines basic types and constants for utf.h to be - * platform-independent. umachine.h and utf.h are included into - * utypes.h to provide all the general definitions for ICU. - * All of these definitions used to be in utypes.h before - * the UTF-handling macros made this unmaintainable. - * - */ -/*==========================================================================*/ -/* Include platform-dependent definitions */ -/* which are contained in the platform-specific file platform.h */ -/*==========================================================================*/ - -#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ - -/* - * ANSI C headers: - * stddef.h defines wchar_t - */ -#include - -/*==========================================================================*/ -/* For C wrappers, we use the symbol U_STABLE. */ -/* This works properly if the includer is C or C++. */ -/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ -/*==========================================================================*/ - -/** - * \def U_CFUNC - * This is used in a declaration of a library private ICU C function. - * @stable ICU 2.4 - */ - -/** - * \def U_CDECL_BEGIN - * This is used to begin a declaration of a library private ICU C API. - * @stable ICU 2.4 - */ - -/** - * \def U_CDECL_END - * This is used to end a declaration of a library private ICU C API - * @stable ICU 2.4 - */ - -#ifdef __cplusplus -# define U_CFUNC extern "C" -# define U_CDECL_BEGIN extern "C" { -# define U_CDECL_END } -#else -# define U_CFUNC extern -# define U_CDECL_BEGIN -# define U_CDECL_END -#endif - -#ifndef U_ATTRIBUTE_DEPRECATED -/** - * \def U_ATTRIBUTE_DEPRECATED - * This is used for GCC specific attributes - * @internal - */ -#if U_GCC_MAJOR_MINOR >= 302 -# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) -/** - * \def U_ATTRIBUTE_DEPRECATED - * This is used for Visual C++ specific attributes - * @internal - */ -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) -#else -# define U_ATTRIBUTE_DEPRECATED -#endif -#endif - -/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ -#define U_CAPI U_CFUNC U_EXPORT -/** This is used to declare a function as a stable public ICU C API*/ -#define U_STABLE U_CAPI -/** This is used to declare a function as a draft public ICU C API */ -#define U_DRAFT U_CAPI -/** This is used to declare a function as a deprecated public ICU C API */ -#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED -/** This is used to declare a function as an obsolete public ICU C API */ -#define U_OBSOLETE U_CAPI -/** This is used to declare a function as an internal ICU C API */ -#define U_INTERNAL U_CAPI - -/** - * \def U_OVERRIDE - * Defined to the C++11 "override" keyword if available. - * Denotes a class or member which is an override of the base class. - * May result in an error if it applied to something not an override. - * @internal - */ -#ifndef U_OVERRIDE -#define U_OVERRIDE override -#endif - -/** - * \def U_FINAL - * Defined to the C++11 "final" keyword if available. - * Denotes a class or member which may not be overridden in subclasses. - * May result in an error if subclasses attempt to override. - * @internal - */ -#if !defined(U_FINAL) || defined(U_IN_DOXYGEN) -#define U_FINAL final -#endif - - -/*==========================================================================*/ -/* limits for int32_t etc., like in POSIX inttypes.h */ -/*==========================================================================*/ - -#ifndef INT8_MIN -/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ -# define INT8_MIN ((int8_t)(-128)) -#endif -#ifndef INT16_MIN -/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ -# define INT16_MIN ((int16_t)(-32767-1)) -#endif -#ifndef INT32_MIN -/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ -# define INT32_MIN ((int32_t)(-2147483647-1)) -#endif - -#ifndef INT8_MAX -/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ -# define INT8_MAX ((int8_t)(127)) -#endif -#ifndef INT16_MAX -/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ -# define INT16_MAX ((int16_t)(32767)) -#endif -#ifndef INT32_MAX -/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ -# define INT32_MAX ((int32_t)(2147483647)) -#endif - -#ifndef UINT8_MAX -/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ -# define UINT8_MAX ((uint8_t)(255U)) -#endif -#ifndef UINT16_MAX -/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ -# define UINT16_MAX ((uint16_t)(65535U)) -#endif -#ifndef UINT32_MAX -/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ -# define UINT32_MAX ((uint32_t)(4294967295U)) -#endif - -#if defined(U_INT64_T_UNAVAILABLE) -# error int64_t is required for decimal format and rule-based number format. -#else -# ifndef INT64_C -/** - * Provides a platform independent way to specify a signed 64-bit integer constant. - * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C - * @stable ICU 2.8 - */ -# define INT64_C(c) c ## LL -# endif -# ifndef UINT64_C -/** - * Provides a platform independent way to specify an unsigned 64-bit integer constant. - * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C - * @stable ICU 2.8 - */ -# define UINT64_C(c) c ## ULL -# endif -# ifndef U_INT64_MIN -/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ -# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) -# endif -# ifndef U_INT64_MAX -/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ -# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) -# endif -# ifndef U_UINT64_MAX -/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ -# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) -# endif -#endif - -/*==========================================================================*/ -/* Boolean data type */ -/*==========================================================================*/ - -/** The ICU boolean type @stable ICU 2.0 */ -typedef int8_t UBool; - -#ifndef TRUE -/** The TRUE value of a UBool @stable ICU 2.0 */ -# define TRUE 1 -#endif -#ifndef FALSE -/** The FALSE value of a UBool @stable ICU 2.0 */ -# define FALSE 0 -#endif - - -/*==========================================================================*/ -/* Unicode data types */ -/*==========================================================================*/ - -/* wchar_t-related definitions -------------------------------------------- */ - -/* - * \def U_WCHAR_IS_UTF16 - * Defined if wchar_t uses UTF-16. - * - * @stable ICU 2.0 - */ -/* - * \def U_WCHAR_IS_UTF32 - * Defined if wchar_t uses UTF-32. - * - * @stable ICU 2.0 - */ -#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) -# ifdef __STDC_ISO_10646__ -# if (U_SIZEOF_WCHAR_T==2) -# define U_WCHAR_IS_UTF16 -# elif (U_SIZEOF_WCHAR_T==4) -# define U_WCHAR_IS_UTF32 -# endif -# elif defined __UCS2__ -# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) -# define U_WCHAR_IS_UTF16 -# endif -# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) -# if (U_SIZEOF_WCHAR_T==4) -# define U_WCHAR_IS_UTF32 -# endif -# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) -# define U_WCHAR_IS_UTF32 -# elif U_PLATFORM_HAS_WIN32_API -# define U_WCHAR_IS_UTF16 -# endif -#endif - -/* UChar and UChar32 definitions -------------------------------------------- */ - -/** Number of bytes in a UChar. @stable ICU 2.0 */ -#define U_SIZEOF_UCHAR 2 - -/** - * \def U_CHAR16_IS_TYPEDEF - * If 1, then char16_t is a typedef and not a real type (yet) - * @internal - */ -#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) -// for AIX, uchar.h needs to be included -# include -# define U_CHAR16_IS_TYPEDEF 1 -#elif defined(_MSC_VER) && (_MSC_VER < 1900) -// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, -// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx -# define U_CHAR16_IS_TYPEDEF 1 -#else -# define U_CHAR16_IS_TYPEDEF 0 -#endif - - -/** - * \var UChar - * - * The base type for UTF-16 code units and pointers. - * Unsigned 16-bit integer. - * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. - * - * UChar is configurable by defining the macro UCHAR_TYPE - * on the preprocessor or compiler command line: - * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. - * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.) - * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. - * - * The default is UChar=char16_t. - * - * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. - * - * In C, char16_t is a simple typedef of uint_least16_t. - * ICU requires uint_least16_t=uint16_t for data memory mapping. - * On macOS, char16_t is not available because the uchar.h standard header is missing. - * - * @stable ICU 4.4 - */ - -#if 1 - // #if 1 is normal. UChar defaults to char16_t in C++. - // For configuration testing of UChar=uint16_t temporarily change this to #if 0. - // The intltest Makefile #defines UCHAR_TYPE=char16_t, - // so we only #define it to uint16_t if it is undefined so far. -#elif !defined(UCHAR_TYPE) -# define UCHAR_TYPE uint16_t -#endif - -#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ - defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) - // Inside the ICU library code, never configurable. - typedef char16_t UChar; -#elif defined(UCHAR_TYPE) - typedef UCHAR_TYPE UChar; -#elif defined(__cplusplus) - typedef char16_t UChar; -#else - typedef uint16_t UChar; -#endif - -/** - * \var OldUChar - * Default ICU 58 definition of UChar. - * A base type for UTF-16 code units and pointers. - * Unsigned 16-bit integer. - * - * Define OldUChar to be wchar_t if that is 16 bits wide. - * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. - * - * This makes the definition of OldUChar platform-dependent - * but allows direct string type compatibility with platforms with - * 16-bit wchar_t types. - * - * This is how UChar was defined in ICU 58, for transition convenience. - * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. - * The current UChar responds to UCHAR_TYPE but OldUChar does not. - * - * @stable ICU 59 - */ -#if U_SIZEOF_WCHAR_T==2 - typedef wchar_t OldUChar; -#elif defined(__CHAR16_TYPE__) - typedef __CHAR16_TYPE__ OldUChar; -#else - typedef uint16_t OldUChar; -#endif - -/** - * Define UChar32 as a type for single Unicode code points. - * UChar32 is a signed 32-bit integer (same as int32_t). - * - * The Unicode code point range is 0..0x10ffff. - * All other values (negative or >=0x110000) are illegal as Unicode code points. - * They may be used as sentinel values to indicate "done", "error" - * or similar non-code point conditions. - * - * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined - * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) - * or else to be uint32_t. - * That is, the definition of UChar32 was platform-dependent. - * - * @see U_SENTINEL - * @stable ICU 2.4 - */ -typedef int32_t UChar32; - -/** - * This value is intended for sentinel values for APIs that - * (take or) return single code points (UChar32). - * It is outside of the Unicode code point range 0..0x10ffff. - * - * For example, a "done" or "error" value in a new API - * could be indicated with U_SENTINEL. - * - * ICU APIs designed before ICU 2.4 usually define service-specific "done" - * values, mostly 0xffff. - * Those may need to be distinguished from - * actual U+ffff text contents by calling functions like - * CharacterIterator::hasNext() or UnicodeString::length(). - * - * @return -1 - * @see UChar32 - * @stable ICU 2.4 - */ -#define U_SENTINEL (-1) - -#include "unicode/urename.h" - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/umisc.h b/deps/node/deps/icu-small/source/common/unicode/umisc.h deleted file mode 100644 index a46fa323..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/umisc.h +++ /dev/null @@ -1,62 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2006, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: umisc.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999oct15 -* created by: Markus W. Scherer -*/ - -#ifndef UMISC_H -#define UMISC_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C API:misc definitions - * - * This file contains miscellaneous definitions for the C APIs. - */ - -U_CDECL_BEGIN - -/** A struct representing a range of text containing a specific field - * @stable ICU 2.0 - */ -typedef struct UFieldPosition { - /** - * The field - * @stable ICU 2.0 - */ - int32_t field; - /** - * The start of the text range containing field - * @stable ICU 2.0 - */ - int32_t beginIndex; - /** - * The limit of the text range containing field - * @stable ICU 2.0 - */ - int32_t endIndex; -} UFieldPosition; - -#if !UCONFIG_NO_SERVICE -/** - * Opaque type returned by registerInstance, registerFactory and unregister for service registration. - * @stable ICU 2.6 - */ -typedef const void* URegistryKey; -#endif - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/umutablecptrie.h b/deps/node/deps/icu-small/source/common/unicode/umutablecptrie.h deleted file mode 100644 index e75191a4..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/umutablecptrie.h +++ /dev/null @@ -1,241 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// umutablecptrie.h (split out of ucptrie.h) -// created: 2018jan24 Markus W. Scherer - -#ifndef __UMUTABLECPTRIE_H__ -#define __UMUTABLECPTRIE_H__ - -#include "unicode/utypes.h" - -#ifndef U_HIDE_DRAFT_API - -#include "unicode/localpointer.h" -#include "unicode/ucpmap.h" -#include "unicode/ucptrie.h" -#include "unicode/utf8.h" - -U_CDECL_BEGIN - -/** - * \file - * - * This file defines a mutable Unicode code point trie. - * - * @see UCPTrie - * @see UMutableCPTrie - */ - -/** - * Mutable Unicode code point trie. - * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values. - * For details see http://site.icu-project.org/design/struct/utrie - * - * Setting values (especially ranges) and lookup is fast. - * The mutable trie is only somewhat space-efficient. - * It builds a compacted, immutable UCPTrie. - * - * This trie can be modified while iterating over its contents. - * For example, it is possible to merge its values with those from another - * set of ranges (e.g., another mutable or immutable trie): - * Iterate over those source ranges; for each of them iterate over this trie; - * add the source value into the value of each trie range. - * - * @see UCPTrie - * @see umutablecptrie_buildImmutable - * @draft ICU 63 - */ -typedef struct UMutableCPTrie UMutableCPTrie; - -/** - * Creates a mutable trie that initially maps each Unicode code point to the same value. - * It uses 32-bit data values until umutablecptrie_buildImmutable() is called. - * umutablecptrie_buildImmutable() takes a valueWidth parameter which - * determines the number of bits in the data value in the resulting UCPTrie. - * You must umutablecptrie_close() the trie once you are done using it. - * - * @param initialValue the initial value that is set for all code points - * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16 - * @param pErrorCode an in/out ICU UErrorCode - * @return the trie - * @draft ICU 63 - */ -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); - -/** - * Clones a mutable trie. - * You must umutablecptrie_close() the clone once you are done using it. - * - * @param other the trie to clone - * @param pErrorCode an in/out ICU UErrorCode - * @return the trie clone - * @draft ICU 63 - */ -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode); - -/** - * Closes a mutable trie and releases associated memory. - * - * @param trie the trie - * @draft ICU 63 - */ -U_CAPI void U_EXPORT2 -umutablecptrie_close(UMutableCPTrie *trie); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUMutableCPTriePointer - * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @draft ICU 63 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close); - -U_NAMESPACE_END - -#endif - -/** - * Creates a mutable trie with the same contents as the UCPMap. - * You must umutablecptrie_close() the mutable trie once you are done using it. - * - * @param map the source map - * @param pErrorCode an in/out ICU UErrorCode - * @return the mutable trie - * @draft ICU 63 - */ -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode); - -/** - * Creates a mutable trie with the same contents as the immutable one. - * You must umutablecptrie_close() the mutable trie once you are done using it. - * - * @param trie the immutable trie - * @param pErrorCode an in/out ICU UErrorCode - * @return the mutable trie - * @draft ICU 63 - */ -U_CAPI UMutableCPTrie * U_EXPORT2 -umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode); - -/** - * Returns the value for a code point as stored in the trie. - * - * @param trie the trie - * @param c the code point - * @return the value - * @draft ICU 63 - */ -U_CAPI uint32_t U_EXPORT2 -umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c); - -/** - * Returns the last code point such that all those from start to there have the same value. - * Can be used to efficiently iterate over all same-value ranges in a trie. - * (This is normally faster than iterating over code points and get()ting each value, - * but much slower than a data structure that stores ranges directly.) - * - * The trie can be modified between calls to this function. - * - * If the UCPMapValueFilter function pointer is not NULL, then - * the value to be delivered is passed through that function, and the return value is the end - * of the range where all values are modified to the same actual value. - * The value is unchanged if that function pointer is NULL. - * - * See the same-signature ucptrie_getRange() for a code sample. - * - * @param trie the trie - * @param start range start - * @param option defines whether surrogates are treated normally, - * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL - * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL - * @param filter a pointer to a function that may modify the trie data value, - * or NULL if the values from the trie are to be used unmodified - * @param context an opaque pointer that is passed on to the filter function - * @param pValue if not NULL, receives the value that every code point start..end has; - * may have been modified by filter(context, trie value) - * if that function pointer is not NULL - * @return the range end code point, or -1 if start is not a valid code point - * @draft ICU 63 - */ -U_CAPI UChar32 U_EXPORT2 -umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue); - -/** - * Sets a value for a code point. - * - * @param trie the trie - * @param c the code point - * @param value the value - * @param pErrorCode an in/out ICU UErrorCode - * @draft ICU 63 - */ -U_CAPI void U_EXPORT2 -umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); - -/** - * Sets a value for each code point [start..end]. - * Faster and more space-efficient than setting the value for each code point separately. - * - * @param trie the trie - * @param start the first code point to get the value - * @param end the last code point to get the value (inclusive) - * @param value the value - * @param pErrorCode an in/out ICU UErrorCode - * @draft ICU 63 - */ -U_CAPI void U_EXPORT2 -umutablecptrie_setRange(UMutableCPTrie *trie, - UChar32 start, UChar32 end, - uint32_t value, UErrorCode *pErrorCode); - -/** - * Compacts the data and builds an immutable UCPTrie according to the parameters. - * After this, the mutable trie will be empty. - * - * The mutable trie stores 32-bit values until buildImmutable() is called. - * If values shorter than 32 bits are to be stored in the immutable trie, - * then the upper bits are discarded. - * For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581, - * and the value width is 8 bits, then each of these is stored as 0x81 - * and the immutable trie will return that as an unsigned value. - * (Some implementations may want to make productive temporary use of the upper bits - * until buildImmutable() discards them.) - * - * Not every possible set of mappings can be built into a UCPTrie, - * because of limitations resulting from speed and space optimizations. - * Every Unicode assigned character can be mapped to a unique value. - * Typical data yields data structures far smaller than the limitations. - * - * It is possible to construct extremely unusual mappings that exceed the data structure limits. - * In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR. - * - * @param trie the trie trie - * @param type selects the trie type - * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits, - * then the values stored in the trie will be truncated first - * @param pErrorCode an in/out ICU UErrorCode - * - * @see umutablecptrie_fromUCPTrie - * @draft ICU 63 - */ -U_CAPI UCPTrie * U_EXPORT2 -umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, - UErrorCode *pErrorCode); - -U_CDECL_END - -#endif // U_HIDE_DRAFT_API -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/unifilt.h b/deps/node/deps/icu-small/source/common/unicode/unifilt.h deleted file mode 100644 index 99cce785..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/unifilt.h +++ /dev/null @@ -1,122 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2010, International Business Machines Corporation and others. -* All Rights Reserved. -********************************************************************** -* Date Name Description -* 11/17/99 aliu Creation. -********************************************************************** -*/ -#ifndef UNIFILT_H -#define UNIFILT_H - -#include "unicode/unifunct.h" -#include "unicode/unimatch.h" - -/** - * \file - * \brief C++ API: Unicode Filter - */ - -U_NAMESPACE_BEGIN - -/** - * U_ETHER is used to represent character values for positions outside - * a range. For example, transliterator uses this to represent - * characters outside the range contextStart..contextLimit-1. This - * allows explicit matching by rules and UnicodeSets of text outside a - * defined range. - * @stable ICU 3.0 - */ -#define U_ETHER ((char16_t)0xFFFF) - -/** - * - * UnicodeFilter defines a protocol for selecting a - * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. - * Currently, filters are used in conjunction with classes like {@link - * Transliterator} to only process selected characters through a - * transformation. - * - *

Note: UnicodeFilter currently stubs out two pure virtual methods - * of its base class, UnicodeMatcher. These methods are toPattern() - * and matchesIndexValue(). This is done so that filter classes that - * are not actually used as matchers -- specifically, those in the - * UnicodeFilterLogic component, and those in tests -- can continue to - * work without defining these methods. As long as a filter is not - * used in an RBT during real transliteration, these methods will not - * be called. However, this breaks the UnicodeMatcher base class - * protocol, and it is not a correct solution. - * - *

In the future we may revisit the UnicodeMatcher / UnicodeFilter - * hierarchy and either redesign it, or simply remove the stubs in - * UnicodeFilter and force subclasses to implement the full - * UnicodeMatcher protocol. - * - * @see UnicodeFilterLogic - * @stable ICU 2.0 - */ -class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { - -public: - /** - * Destructor - * @stable ICU 2.0 - */ - virtual ~UnicodeFilter(); - - /** - * Returns true for characters that are in the selected - * subset. In other words, if a character is to be - * filtered, then contains() returns - * false. - * @stable ICU 2.0 - */ - virtual UBool contains(UChar32 c) const = 0; - - /** - * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer - * and return the pointer. - * @stable ICU 2.4 - */ - virtual UnicodeMatcher* toMatcher() const; - - /** - * Implement UnicodeMatcher API. - * @stable ICU 2.4 - */ - virtual UMatchDegree matches(const Replaceable& text, - int32_t& offset, - int32_t limit, - UBool incremental); - - /** - * UnicodeFunctor API. Nothing to do. - * @stable ICU 2.4 - */ - virtual void setData(const TransliterationRuleData*); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - -protected: - - /* - * Since this class has pure virtual functions, - * a constructor can't be used. - * @stable ICU 2.0 - */ -/* UnicodeFilter();*/ -}; - -/*inline UnicodeFilter::UnicodeFilter() {}*/ - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/unifunct.h b/deps/node/deps/icu-small/source/common/unicode/unifunct.h deleted file mode 100644 index 66a02ce7..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/unifunct.h +++ /dev/null @@ -1,127 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2005, International Business Machines Corporation -* and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 01/14/2002 aliu Creation. -********************************************************************** -*/ -#ifndef UNIFUNCT_H -#define UNIFUNCT_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" - -/** - * \file - * \brief C++ API: Unicode Functor - */ - -U_NAMESPACE_BEGIN - -class UnicodeMatcher; -class UnicodeReplacer; -class TransliterationRuleData; - -/** - * UnicodeFunctor is an abstract base class for objects - * that perform match and/or replace operations on Unicode strings. - * @author Alan Liu - * @stable ICU 2.4 - */ -class U_COMMON_API UnicodeFunctor : public UObject { - -public: - - /** - * Destructor - * @stable ICU 2.4 - */ - virtual ~UnicodeFunctor(); - - /** - * Return a copy of this object. All UnicodeFunctor objects - * have to support cloning in order to allow classes using - * UnicodeFunctor to implement cloning. - * @stable ICU 2.4 - */ - virtual UnicodeFunctor* clone() const = 0; - - /** - * Cast 'this' to a UnicodeMatcher* pointer and return the - * pointer, or null if this is not a UnicodeMatcher*. Subclasses - * that mix in UnicodeMatcher as a base class must override this. - * This protocol is required because a pointer to a UnicodeFunctor - * cannot be cast to a pointer to a UnicodeMatcher, since - * UnicodeMatcher is a mixin that does not derive from - * UnicodeFunctor. - * @stable ICU 2.4 - */ - virtual UnicodeMatcher* toMatcher() const; - - /** - * Cast 'this' to a UnicodeReplacer* pointer and return the - * pointer, or null if this is not a UnicodeReplacer*. Subclasses - * that mix in UnicodeReplacer as a base class must override this. - * This protocol is required because a pointer to a UnicodeFunctor - * cannot be cast to a pointer to a UnicodeReplacer, since - * UnicodeReplacer is a mixin that does not derive from - * UnicodeFunctor. - * @stable ICU 2.4 - */ - virtual UnicodeReplacer* toReplacer() const; - - /** - * Return the class ID for this class. This is useful only for - * comparing to a return value from getDynamicClassID(). - * @return The class ID for all objects of this class. - * @stable ICU 2.0 - */ - static UClassID U_EXPORT2 getStaticClassID(void); - - /** - * Returns a unique class ID polymorphically. This method - * is to implement a simple version of RTTI, since not all C++ - * compilers support genuine RTTI. Polymorphic operator==() and - * clone() methods call this method. - * - *

Concrete subclasses of UnicodeFunctor should use the macro - * UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to - * provide definitios getStaticClassID and getDynamicClassID. - * - * @return The class ID for this object. All objects of a given - * class have the same class ID. Objects of other classes have - * different class IDs. - * @stable ICU 2.4 - */ - virtual UClassID getDynamicClassID(void) const = 0; - - /** - * Set the data object associated with this functor. The data - * object provides context for functor-to-standin mapping. This - * method is required when assigning a functor to a different data - * object. This function MAY GO AWAY later if the architecture is - * changed to pass data object pointers through the API. - * @internal ICU 2.1 - */ - virtual void setData(const TransliterationRuleData*) = 0; - -protected: - - /** - * Since this class has pure virtual functions, - * a constructor can't be used. - * @stable ICU 2.0 - */ - /*UnicodeFunctor();*/ - -}; - -/*inline UnicodeFunctor::UnicodeFunctor() {}*/ - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/unimatch.h b/deps/node/deps/icu-small/source/common/unicode/unimatch.h deleted file mode 100644 index 8bf39950..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/unimatch.h +++ /dev/null @@ -1,165 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 07/18/01 aliu Creation. -********************************************************************** -*/ -#ifndef UNIMATCH_H -#define UNIMATCH_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Unicode Matcher - */ - - -U_NAMESPACE_BEGIN - -class Replaceable; -class UnicodeString; -class UnicodeSet; - -/** - * Constants returned by UnicodeMatcher::matches() - * indicating the degree of match. - * @stable ICU 2.4 - */ -enum UMatchDegree { - /** - * Constant returned by matches() indicating a - * mismatch between the text and this matcher. The text contains - * a character which does not match, or the text does not contain - * all desired characters for a non-incremental match. - * @stable ICU 2.4 - */ - U_MISMATCH, - - /** - * Constant returned by matches() indicating a - * partial match between the text and this matcher. This value is - * only returned for incremental match operations. All characters - * of the text match, but more characters are required for a - * complete match. Alternatively, for variable-length matchers, - * all characters of the text match, and if more characters were - * supplied at limit, they might also match. - * @stable ICU 2.4 - */ - U_PARTIAL_MATCH, - - /** - * Constant returned by matches() indicating a - * complete match between the text and this matcher. For an - * incremental variable-length match, this value is returned if - * the given text matches, and it is known that additional - * characters would not alter the extent of the match. - * @stable ICU 2.4 - */ - U_MATCH -}; - -/** - * UnicodeMatcher defines a protocol for objects that can - * match a range of characters in a Replaceable string. - * @stable ICU 2.4 - */ -class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { - -public: - /** - * Destructor. - * @stable ICU 2.4 - */ - virtual ~UnicodeMatcher(); - - /** - * Return a UMatchDegree value indicating the degree of match for - * the given text at the given offset. Zero, one, or more - * characters may be matched. - * - * Matching in the forward direction is indicated by limit > - * offset. Characters from offset forwards to limit-1 will be - * considered for matching. - * - * Matching in the reverse direction is indicated by limit < - * offset. Characters from offset backwards to limit+1 will be - * considered for matching. - * - * If limit == offset then the only match possible is a zero - * character match (which subclasses may implement if desired). - * - * As a side effect, advance the offset parameter to the limit of - * the matched substring. In the forward direction, this will be - * the index of the last matched character plus one. In the - * reverse direction, this will be the index of the last matched - * character minus one. - * - *

Note: This method is not const because some classes may - * modify their state as the result of a match. - * - * @param text the text to be matched - * @param offset on input, the index into text at which to begin - * matching. On output, the limit of the matched text. The - * number of matched characters is the output value of offset - * minus the input value. Offset should always point to the - * HIGH SURROGATE (leading code unit) of a pair of surrogates, - * both on entry and upon return. - * @param limit the limit index of text to be matched. Greater - * than offset for a forward direction match, less than offset for - * a backward direction match. The last character to be - * considered for matching will be text.charAt(limit-1) in the - * forward direction or text.charAt(limit+1) in the backward - * direction. - * @param incremental if TRUE, then assume further characters may - * be inserted at limit and check for partial matching. Otherwise - * assume the text as given is complete. - * @return a match degree value indicating a full match, a partial - * match, or a mismatch. If incremental is FALSE then - * U_PARTIAL_MATCH should never be returned. - * @stable ICU 2.4 - */ - virtual UMatchDegree matches(const Replaceable& text, - int32_t& offset, - int32_t limit, - UBool incremental) = 0; - - /** - * Returns a string representation of this matcher. If the result of - * calling this function is passed to the appropriate parser, it - * will produce another matcher that is equal to this one. - * @param result the string to receive the pattern. Previous - * contents will be deleted. - * @param escapeUnprintable if TRUE then convert unprintable - * character to their hex escape representations, \\uxxxx or - * \\Uxxxxxxxx. Unprintable characters are those other than - * U+000A, U+0020..U+007E. - * @stable ICU 2.4 - */ - virtual UnicodeString& toPattern(UnicodeString& result, - UBool escapeUnprintable = FALSE) const = 0; - - /** - * Returns TRUE if this matcher will match a character c, where c - * & 0xFF == v, at offset, in the forward direction (with limit > - * offset). This is used by RuleBasedTransliterator for - * indexing. - * @stable ICU 2.4 - */ - virtual UBool matchesIndexValue(uint8_t v) const = 0; - - /** - * Union the set of all characters that may be matched by this object - * into the given set. - * @param toUnionTo the set into which to union the source characters - * @stable ICU 2.4 - */ - virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uniset.h b/deps/node/deps/icu-small/source/common/unicode/uniset.h deleted file mode 100644 index 2ab2695a..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uniset.h +++ /dev/null @@ -1,1722 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -*************************************************************************** -* Copyright (C) 1999-2016, International Business Machines Corporation -* and others. All Rights Reserved. -*************************************************************************** -* Date Name Description -* 10/20/99 alan Creation. -*************************************************************************** -*/ - -#ifndef UNICODESET_H -#define UNICODESET_H - -#include "unicode/ucpmap.h" -#include "unicode/unifilt.h" -#include "unicode/unistr.h" -#include "unicode/uset.h" - -/** - * \file - * \brief C++ API: Unicode Set - */ - -U_NAMESPACE_BEGIN - -// Forward Declarations. -class BMPSet; -class CharacterProperties; -class ParsePosition; -class RBBIRuleScanner; -class SymbolTable; -class UnicodeSetStringSpan; -class UVector; -class RuleCharacterIterator; - -/** - * A mutable set of Unicode characters and multicharacter strings. Objects of this class - * represent character classes used in regular expressions. - * A character specifies a subset of Unicode code points. Legal - * code points are U+0000 to U+10FFFF, inclusive. - * - *

The UnicodeSet class is not designed to be subclassed. - * - *

UnicodeSet supports two APIs. The first is the - * operand API that allows the caller to modify the value of - * a UnicodeSet object. It conforms to Java 2's - * java.util.Set interface, although - * UnicodeSet does not actually implement that - * interface. All methods of Set are supported, with the - * modification that they take a character range or single character - * instead of an Object, and they take a - * UnicodeSet instead of a Collection. The - * operand API may be thought of in terms of boolean logic: a boolean - * OR is implemented by add, a boolean AND is implemented - * by retain, a boolean XOR is implemented by - * complement taking an argument, and a boolean NOT is - * implemented by complement with no argument. In terms - * of traditional set theory function names, add is a - * union, retain is an intersection, remove - * is an asymmetric difference, and complement with no - * argument is a set complement with respect to the superset range - * MIN_VALUE-MAX_VALUE - * - *

The second API is the - * applyPattern()/toPattern() API from the - * java.text.Format-derived classes. Unlike the - * methods that add characters, add categories, and control the logic - * of the set, the method applyPattern() sets all - * attributes of a UnicodeSet at once, based on a - * string pattern. - * - *

Pattern syntax

- * - * Patterns are accepted by the constructors and the - * applyPattern() methods and returned by the - * toPattern() method. These patterns follow a syntax - * similar to that employed by version 8 regular expression character - * classes. Here are some simple examples: - * - * \htmlonly
\endhtmlonly - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
[]No characters
[a]The character 'a'
[ae]The characters 'a' and 'e'
[a-e]The characters 'a' through 'e' inclusive, in Unicode code - * point order
[\\u4E01]The character U+4E01
[a{ab}{ac}]The character 'a' and the multicharacter strings "ab" and - * "ac"
[\\p{Lu}]All characters in the general category Uppercase Letter
- * \htmlonly
\endhtmlonly - * - * Any character may be preceded by a backslash in order to remove any special - * meaning. White space characters, as defined by UCharacter.isWhitespace(), are - * ignored, unless they are escaped. - * - *

Property patterns specify a set of characters having a certain - * property as defined by the Unicode standard. Both the POSIX-like - * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a - * complete list of supported property patterns, see the User's Guide - * for UnicodeSet at - * - * http://icu-project.org/userguide/unicodeSet.html. - * Actual determination of property data is defined by the underlying - * Unicode database as implemented by UCharacter. - * - *

Patterns specify individual characters, ranges of characters, and - * Unicode property sets. When elements are concatenated, they - * specify their union. To complement a set, place a '^' immediately - * after the opening '['. Property patterns are inverted by modifying - * their delimiters; "[:^foo]" and "\\P{foo}". In any other location, - * '^' has no special meaning. - * - *

Ranges are indicated by placing two a '-' between two - * characters, as in "a-z". This specifies the range of all - * characters from the left to the right, in Unicode order. If the - * left character is greater than or equal to the - * right character it is a syntax error. If a '-' occurs as the first - * character after the opening '[' or '[^', or if it occurs as the - * last character before the closing ']', then it is taken as a - * literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same - * set of three characters, 'a', 'b', and '-'. - * - *

Sets may be intersected using the '&' operator or the asymmetric - * set difference may be taken using the '-' operator, for example, - * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters - * with values less than 4096. Operators ('&' and '|') have equal - * precedence and bind left-to-right. Thus - * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to - * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for - * difference; intersection is commutative. - * - * - *
[a]The set containing 'a' - *
[a-z]The set containing 'a' - * through 'z' and all letters in between, in Unicode order - *
[^a-z]The set containing - * all characters but 'a' through 'z', - * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF - *
[[pat1][pat2]] - * The union of sets specified by pat1 and pat2 - *
[[pat1]&[pat2]] - * The intersection of sets specified by pat1 and pat2 - *
[[pat1]-[pat2]] - * The asymmetric difference of sets specified by pat1 and - * pat2 - *
[:Lu:] or \\p{Lu} - * The set of characters having the specified - * Unicode property; in - * this case, Unicode uppercase letters - *
[:^Lu:] or \\P{Lu} - * The set of characters not having the given - * Unicode property - *
- * - *

Warning: you cannot add an empty string ("") to a UnicodeSet.

- * - *

Formal syntax

- * - * \htmlonly
\endhtmlonly - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
pattern :=  ('[' '^'? item* ']') | - * property
item :=  char | (char '-' char) | pattern-expr
- *
pattern-expr :=  pattern | pattern-expr pattern | - * pattern-expr op pattern
- *
op :=  '&' | '-'
- *
special :=  '[' | ']' | '-'
- *
char :=  any character that is not special
- * | ('\'
any character)
- * | ('\\u' hex hex hex hex)
- *
hex :=  any character for which - * Character.digit(c, 16) - * returns a non-negative result
property :=  a Unicode property set pattern
- *
- * - * - * - * - *
Legend: - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
a := b  a may be replaced by b
a?zero or one instance of a
- *
a*one or more instances of a
- *
a | beither a or b
- *
'a'the literal string between the quotes
- *
- * \htmlonly
\endhtmlonly - * - *

Note: - * - Most UnicodeSet methods do not take a UErrorCode parameter because - * there are usually very few opportunities for failure other than a shortage - * of memory, error codes in low-level C++ string methods would be inconvenient, - * and the error code as the last parameter (ICU convention) would prevent - * the use of default parameter values. - * Instead, such methods set the UnicodeSet into a "bogus" state - * (see isBogus()) if an error occurs. - * - * @author Alan Liu - * @stable ICU 2.0 - */ -class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { - - int32_t len; // length of list used; 0 <= len <= capacity - int32_t capacity; // capacity of list - UChar32* list; // MUST be terminated with HIGH - BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL. - UChar32* buffer; // internal buffer, may be NULL - int32_t bufferCapacity; // capacity of buffer - int32_t patLen; - - /** - * The pattern representation of this set. This may not be the - * most economical pattern. It is the pattern supplied to - * applyPattern(), with variables substituted and whitespace - * removed. For sets constructed without applyPattern(), or - * modified using the non-pattern API, this string will be empty, - * indicating that toPattern() must generate a pattern - * representation from the inversion list. - */ - char16_t *pat; - UVector* strings; // maintained in sorted order - UnicodeSetStringSpan *stringSpan; - -private: - enum { // constants - kIsBogus = 1 // This set is bogus (i.e. not valid) - }; - uint8_t fFlags; // Bit flag (see constants above) -public: - /** - * Determine if this object contains a valid set. - * A bogus set has no value. It is different from an empty set. - * It can be used to indicate that no set value is available. - * - * @return TRUE if the set is bogus/invalid, FALSE otherwise - * @see setToBogus() - * @stable ICU 4.0 - */ - inline UBool isBogus(void) const; - - /** - * Make this UnicodeSet object invalid. - * The string will test TRUE with isBogus(). - * - * A bogus set has no value. It is different from an empty set. - * It can be used to indicate that no set value is available. - * - * This utility function is used throughout the UnicodeSet - * implementation to indicate that a UnicodeSet operation failed, - * and may be used in other functions, - * especially but not exclusively when such functions do not - * take a UErrorCode for simplicity. - * - * @see isBogus() - * @stable ICU 4.0 - */ - void setToBogus(); - -public: - - enum { - /** - * Minimum value that can be stored in a UnicodeSet. - * @stable ICU 2.4 - */ - MIN_VALUE = 0, - - /** - * Maximum value that can be stored in a UnicodeSet. - * @stable ICU 2.4 - */ - MAX_VALUE = 0x10ffff - }; - - //---------------------------------------------------------------- - // Constructors &c - //---------------------------------------------------------------- - -public: - - /** - * Constructs an empty set. - * @stable ICU 2.0 - */ - UnicodeSet(); - - /** - * Constructs a set containing the given range. If end < - * start then an empty set is created. - * - * @param start first character, inclusive, of range - * @param end last character, inclusive, of range - * @stable ICU 2.4 - */ - UnicodeSet(UChar32 start, UChar32 end); - -#ifndef U_HIDE_INTERNAL_API - /** - * @internal - */ - enum ESerialization { - kSerialized /* result of serialize() */ - }; - - /** - * Constructs a set from the output of serialize(). - * - * @param buffer the 16 bit array - * @param bufferLen the original length returned from serialize() - * @param serialization the value 'kSerialized' - * @param status error code - * - * @internal - */ - UnicodeSet(const uint16_t buffer[], int32_t bufferLen, - ESerialization serialization, UErrorCode &status); -#endif /* U_HIDE_INTERNAL_API */ - - /** - * Constructs a set from the given pattern. See the class - * description for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern - * contains a syntax error. - * @stable ICU 2.0 - */ - UnicodeSet(const UnicodeString& pattern, - UErrorCode& status); - -#ifndef U_HIDE_INTERNAL_API - /** - * Constructs a set from the given pattern. See the class - * description for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param symbols a symbol table mapping variable names to values - * and stand-in characters to UnicodeSets; may be NULL - * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern - * contains a syntax error. - * @internal - */ - UnicodeSet(const UnicodeString& pattern, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status); -#endif /* U_HIDE_INTERNAL_API */ - - /** - * Constructs a set from the given pattern. See the class description - * for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param pos on input, the position in pattern at which to start parsing. - * On output, the position after the last character parsed. - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param symbols a symbol table mapping variable names to values - * and stand-in characters to UnicodeSets; may be NULL - * @param status input-output error code - * @stable ICU 2.8 - */ - UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status); - - /** - * Constructs a set that is identical to the given UnicodeSet. - * @stable ICU 2.0 - */ - UnicodeSet(const UnicodeSet& o); - - /** - * Destructs the set. - * @stable ICU 2.0 - */ - virtual ~UnicodeSet(); - - /** - * Assigns this object to be a copy of another. - * A frozen set will not be modified. - * @stable ICU 2.0 - */ - UnicodeSet& operator=(const UnicodeSet& o); - - /** - * Compares the specified object with this set for equality. Returns - * true if the two sets - * have the same size, and every member of the specified set is - * contained in this set (or equivalently, every member of this set is - * contained in the specified set). - * - * @param o set to be compared for equality with this set. - * @return true if the specified set is equal to this set. - * @stable ICU 2.0 - */ - virtual UBool operator==(const UnicodeSet& o) const; - - /** - * Compares the specified object with this set for equality. Returns - * true if the specified set is not equal to this set. - * @stable ICU 2.0 - */ - UBool operator!=(const UnicodeSet& o) const; - - /** - * Returns a copy of this object. All UnicodeFunctor objects have - * to support cloning in order to allow classes using - * UnicodeFunctors, such as Transliterator, to implement cloning. - * If this set is frozen, then the clone will be frozen as well. - * Use cloneAsThawed() for a mutable clone of a frozen set. - * @see cloneAsThawed - * @stable ICU 2.0 - */ - virtual UnicodeFunctor* clone() const; - - /** - * Returns the hash code value for this set. - * - * @return the hash code value for this set. - * @see Object#hashCode() - * @stable ICU 2.0 - */ - virtual int32_t hashCode(void) const; - - /** - * Get a UnicodeSet pointer from a USet - * - * @param uset a USet (the ICU plain C type for UnicodeSet) - * @return the corresponding UnicodeSet pointer. - * - * @stable ICU 4.2 - */ - inline static UnicodeSet *fromUSet(USet *uset); - - /** - * Get a UnicodeSet pointer from a const USet - * - * @param uset a const USet (the ICU plain C type for UnicodeSet) - * @return the corresponding UnicodeSet pointer. - * - * @stable ICU 4.2 - */ - inline static const UnicodeSet *fromUSet(const USet *uset); - - /** - * Produce a USet * pointer for this UnicodeSet. - * USet is the plain C type for UnicodeSet - * - * @return a USet pointer for this UnicodeSet - * @stable ICU 4.2 - */ - inline USet *toUSet(); - - - /** - * Produce a const USet * pointer for this UnicodeSet. - * USet is the plain C type for UnicodeSet - * - * @return a const USet pointer for this UnicodeSet - * @stable ICU 4.2 - */ - inline const USet * toUSet() const; - - - //---------------------------------------------------------------- - // Freezable API - //---------------------------------------------------------------- - - /** - * Determines whether the set has been frozen (made immutable) or not. - * See the ICU4J Freezable interface for details. - * @return TRUE/FALSE for whether the set has been frozen - * @see freeze - * @see cloneAsThawed - * @stable ICU 3.8 - */ - inline UBool isFrozen() const; - - /** - * Freeze the set (make it immutable). - * Once frozen, it cannot be unfrozen and is therefore thread-safe - * until it is deleted. - * See the ICU4J Freezable interface for details. - * Freezing the set may also make some operations faster, for example - * contains() and span(). - * A frozen set will not be modified. (It remains frozen.) - * @return this set. - * @see isFrozen - * @see cloneAsThawed - * @stable ICU 3.8 - */ - UnicodeFunctor *freeze(); - - /** - * Clone the set and make the clone mutable. - * See the ICU4J Freezable interface for details. - * @return the mutable clone - * @see freeze - * @see isFrozen - * @stable ICU 3.8 - */ - UnicodeFunctor *cloneAsThawed() const; - - //---------------------------------------------------------------- - // Public API - //---------------------------------------------------------------- - - /** - * Make this object represent the range `start - end`. - * If `end > start` then this object is set to an empty range. - * A frozen set will not be modified. - * - * @param start first character in the set, inclusive - * @param end last character in the set, inclusive - * @stable ICU 2.4 - */ - UnicodeSet& set(UChar32 start, UChar32 end); - - /** - * Return true if the given position, in the given pattern, appears - * to be the start of a UnicodeSet pattern. - * @stable ICU 2.4 - */ - static UBool resemblesPattern(const UnicodeString& pattern, - int32_t pos); - - /** - * Modifies this set to represent the set specified by the given - * pattern, ignoring Unicode Pattern_White_Space characters. - * See the class description for the syntax of the pattern language. - * A frozen set will not be modified. - * @param pattern a string specifying what characters are in the set - * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern - * contains a syntax error. - * Empties the set passed before applying the pattern. - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeSet& applyPattern(const UnicodeString& pattern, - UErrorCode& status); - -#ifndef U_HIDE_INTERNAL_API - /** - * Modifies this set to represent the set specified by the given - * pattern, optionally ignoring Unicode Pattern_White_Space characters. - * See the class description for the syntax of the pattern language. - * A frozen set will not be modified. - * @param pattern a string specifying what characters are in the set - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param symbols a symbol table mapping variable names to - * values and stand-ins to UnicodeSets; may be NULL - * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern - * contains a syntax error. - * Empties the set passed before applying the pattern. - * @return a reference to this - * @internal - */ - UnicodeSet& applyPattern(const UnicodeString& pattern, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status); -#endif /* U_HIDE_INTERNAL_API */ - - /** - * Parses the given pattern, starting at the given position. The - * character at pattern.charAt(pos.getIndex()) must be '[', or the - * parse fails. Parsing continues until the corresponding closing - * ']'. If a syntax error is encountered between the opening and - * closing brace, the parse fails. Upon return from a successful - * parse, the ParsePosition is updated to point to the character - * following the closing ']', and a StringBuffer containing a - * pairs list for the parsed pattern is returned. This method calls - * itself recursively to parse embedded subpatterns. - * Empties the set passed before applying the pattern. - * A frozen set will not be modified. - * - * @param pattern the string containing the pattern to be parsed. - * The portion of the string from pos.getIndex(), which must be a - * '[', to the corresponding closing ']', is parsed. - * @param pos upon entry, the position at which to being parsing. - * The character at pattern.charAt(pos.getIndex()) must be a '['. - * Upon return from a successful parse, pos.getIndex() is either - * the character after the closing ']' of the parsed pattern, or - * pattern.length() if the closing ']' is the last character of - * the pattern string. - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param symbols a symbol table mapping variable names to - * values and stand-ins to UnicodeSets; may be NULL - * @param status returns U_ILLEGAL_ARGUMENT_ERROR if the pattern - * contains a syntax error. - * @return a reference to this - * @stable ICU 2.8 - */ - UnicodeSet& applyPattern(const UnicodeString& pattern, - ParsePosition& pos, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status); - - /** - * Returns a string representation of this set. If the result of - * calling this function is passed to a UnicodeSet constructor, it - * will produce another set that is equal to this one. - * A frozen set will not be modified. - * @param result the string to receive the rules. Previous - * contents will be deleted. - * @param escapeUnprintable if TRUE then convert unprintable - * character to their hex escape representations, \\uxxxx or - * \\Uxxxxxxxx. Unprintable characters are those other than - * U+000A, U+0020..U+007E. - * @stable ICU 2.0 - */ - virtual UnicodeString& toPattern(UnicodeString& result, - UBool escapeUnprintable = FALSE) const; - - /** - * Modifies this set to contain those code points which have the given value - * for the given binary or enumerated property, as returned by - * u_getIntPropertyValue. Prior contents of this set are lost. - * A frozen set will not be modified. - * - * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 - * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 - * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. - * - * @param value a value in the range u_getIntPropertyMinValue(prop).. - * u_getIntPropertyMaxValue(prop), with one exception. If prop is - * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but - * rather a mask value produced by U_GET_GC_MASK(). This allows grouped - * categories such as [:L:] to be represented. - * - * @param ec error code input/output parameter - * - * @return a reference to this set - * - * @stable ICU 2.4 - */ - UnicodeSet& applyIntPropertyValue(UProperty prop, - int32_t value, - UErrorCode& ec); - - /** - * Modifies this set to contain those code points which have the - * given value for the given property. Prior contents of this - * set are lost. - * A frozen set will not be modified. - * - * @param prop a property alias, either short or long. The name is matched - * loosely. See PropertyAliases.txt for names and a description of loose - * matching. If the value string is empty, then this string is interpreted - * as either a General_Category value alias, a Script value alias, a binary - * property alias, or a special ID. Special IDs are matched loosely and - * correspond to the following sets: - * - * "ANY" = [\\u0000-\\U0010FFFF], - * "ASCII" = [\\u0000-\\u007F], - * "Assigned" = [:^Cn:]. - * - * @param value a value alias, either short or long. The name is matched - * loosely. See PropertyValueAliases.txt for names and a description of - * loose matching. In addition to aliases listed, numeric values and - * canonical combining classes may be expressed numerically, e.g., ("nv", - * "0.5") or ("ccc", "220"). The value string may also be empty. - * - * @param ec error code input/output parameter - * - * @return a reference to this set - * - * @stable ICU 2.4 - */ - UnicodeSet& applyPropertyAlias(const UnicodeString& prop, - const UnicodeString& value, - UErrorCode& ec); - - /** - * Returns the number of elements in this set (its cardinality). - * Note than the elements of a set may include both individual - * codepoints and strings. - * - * @return the number of elements in this set (its cardinality). - * @stable ICU 2.0 - */ - virtual int32_t size(void) const; - - /** - * Returns true if this set contains no elements. - * - * @return true if this set contains no elements. - * @stable ICU 2.0 - */ - virtual UBool isEmpty(void) const; - - /** - * Returns true if this set contains the given character. - * This function works faster with a frozen set. - * @param c character to be checked for containment - * @return true if the test condition is met - * @stable ICU 2.0 - */ - virtual UBool contains(UChar32 c) const; - - /** - * Returns true if this set contains every character - * of the given range. - * @param start first character, inclusive, of the range - * @param end last character, inclusive, of the range - * @return true if the test condition is met - * @stable ICU 2.0 - */ - virtual UBool contains(UChar32 start, UChar32 end) const; - - /** - * Returns true if this set contains the given - * multicharacter string. - * @param s string to be checked for containment - * @return true if this set contains the specified string - * @stable ICU 2.4 - */ - UBool contains(const UnicodeString& s) const; - - /** - * Returns true if this set contains all the characters and strings - * of the given set. - * @param c set to be checked for containment - * @return true if the test condition is met - * @stable ICU 2.4 - */ - virtual UBool containsAll(const UnicodeSet& c) const; - - /** - * Returns true if this set contains all the characters - * of the given string. - * @param s string containing characters to be checked for containment - * @return true if the test condition is met - * @stable ICU 2.4 - */ - UBool containsAll(const UnicodeString& s) const; - - /** - * Returns true if this set contains none of the characters - * of the given range. - * @param start first character, inclusive, of the range - * @param end last character, inclusive, of the range - * @return true if the test condition is met - * @stable ICU 2.4 - */ - UBool containsNone(UChar32 start, UChar32 end) const; - - /** - * Returns true if this set contains none of the characters and strings - * of the given set. - * @param c set to be checked for containment - * @return true if the test condition is met - * @stable ICU 2.4 - */ - UBool containsNone(const UnicodeSet& c) const; - - /** - * Returns true if this set contains none of the characters - * of the given string. - * @param s string containing characters to be checked for containment - * @return true if the test condition is met - * @stable ICU 2.4 - */ - UBool containsNone(const UnicodeString& s) const; - - /** - * Returns true if this set contains one or more of the characters - * in the given range. - * @param start first character, inclusive, of the range - * @param end last character, inclusive, of the range - * @return true if the condition is met - * @stable ICU 2.4 - */ - inline UBool containsSome(UChar32 start, UChar32 end) const; - - /** - * Returns true if this set contains one or more of the characters - * and strings of the given set. - * @param s The set to be checked for containment - * @return true if the condition is met - * @stable ICU 2.4 - */ - inline UBool containsSome(const UnicodeSet& s) const; - - /** - * Returns true if this set contains one or more of the characters - * of the given string. - * @param s string containing characters to be checked for containment - * @return true if the condition is met - * @stable ICU 2.4 - */ - inline UBool containsSome(const UnicodeString& s) const; - - /** - * Returns the length of the initial substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Similar to the strspn() C library function. - * Unpaired surrogates are treated according to contains() of their surrogate code points. - * This function works faster with a frozen set and with a non-negative string length argument. - * @param s start of the string - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the length of the initial substring according to the spanCondition; - * 0 if the start of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ - int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; - - /** - * Returns the end of the substring of the input string according to the USetSpanCondition. - * Same as start+span(s.getBuffer()+start, s.length()-start, spanCondition) - * after pinning start to 0<=start<=s.length(). - * @param s the string - * @param start the start index in the string for the span operation - * @param spanCondition specifies the containment condition - * @return the exclusive end of the substring according to the spanCondition; - * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition - * @stable ICU 4.4 - * @see USetSpanCondition - */ - inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const; - - /** - * Returns the start of the trailing substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Unpaired surrogates are treated according to contains() of their surrogate code points. - * This function works faster with a frozen set and with a non-negative string length argument. - * @param s start of the string - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the start of the trailing substring according to the spanCondition; - * the string length if the end of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ - int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; - - /** - * Returns the start of the substring of the input string according to the USetSpanCondition. - * Same as spanBack(s.getBuffer(), limit, spanCondition) - * after pinning limit to 0<=end<=s.length(). - * @param s the string - * @param limit the exclusive-end index in the string for the span operation - * (use s.length() or INT32_MAX for spanning back from the end of the string) - * @param spanCondition specifies the containment condition - * @return the start of the substring according to the spanCondition; - * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition - * @stable ICU 4.4 - * @see USetSpanCondition - */ - inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const; - - /** - * Returns the length of the initial substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Similar to the strspn() C library function. - * Malformed byte sequences are treated according to contains(0xfffd). - * This function works faster with a frozen set and with a non-negative string length argument. - * @param s start of the string (UTF-8) - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the length of the initial substring according to the spanCondition; - * 0 if the start of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ - int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; - - /** - * Returns the start of the trailing substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Malformed byte sequences are treated according to contains(0xfffd). - * This function works faster with a frozen set and with a non-negative string length argument. - * @param s start of the string (UTF-8) - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the start of the trailing substring according to the spanCondition; - * the string length if the end of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ - int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; - - /** - * Implement UnicodeMatcher::matches() - * @stable ICU 2.4 - */ - virtual UMatchDegree matches(const Replaceable& text, - int32_t& offset, - int32_t limit, - UBool incremental); - -private: - /** - * Returns the longest match for s in text at the given position. - * If limit > start then match forward from start+1 to limit - * matching all characters except s.charAt(0). If limit < start, - * go backward starting from start-1 matching all characters - * except s.charAt(s.length()-1). This method assumes that the - * first character, text.charAt(start), matches s, so it does not - * check it. - * @param text the text to match - * @param start the first character to match. In the forward - * direction, text.charAt(start) is matched against s.charAt(0). - * In the reverse direction, it is matched against - * s.charAt(s.length()-1). - * @param limit the limit offset for matching, either last+1 in - * the forward direction, or last-1 in the reverse direction, - * where last is the index of the last character to match. - * @param s - * @return If part of s matches up to the limit, return |limit - - * start|. If all of s matches before reaching the limit, return - * s.length(). If there is a mismatch between s and text, return - * 0 - */ - static int32_t matchRest(const Replaceable& text, - int32_t start, int32_t limit, - const UnicodeString& s); - - /** - * Returns the smallest value i such that c < list[i]. Caller - * must ensure that c is a legal value or this method will enter - * an infinite loop. This method performs a binary search. - * @param c a character in the range MIN_VALUE..MAX_VALUE - * inclusive - * @return the smallest integer i in the range 0..len-1, - * inclusive, such that c < list[i] - */ - int32_t findCodePoint(UChar32 c) const; - -public: - - /** - * Implementation of UnicodeMatcher API. Union the set of all - * characters that may be matched by this object into the given - * set. - * @param toUnionTo the set into which to union the source characters - * @stable ICU 2.4 - */ - virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; - - /** - * Returns the index of the given character within this set, where - * the set is ordered by ascending code point. If the character - * is not in this set, return -1. The inverse of this method is - * charAt(). - * @return an index from 0..size()-1, or -1 - * @stable ICU 2.4 - */ - int32_t indexOf(UChar32 c) const; - - /** - * Returns the character at the given index within this set, where - * the set is ordered by ascending code point. If the index is - * out of range, return (UChar32)-1. The inverse of this method is - * indexOf(). - * @param index an index from 0..size()-1 - * @return the character at the given index, or (UChar32)-1. - * @stable ICU 2.4 - */ - UChar32 charAt(int32_t index) const; - - /** - * Adds the specified range to this set if it is not already - * present. If this set already contains the specified range, - * the call leaves this set unchanged. If end > start - * then an empty range is added, leaving the set unchanged. - * This is equivalent to a boolean logic OR, or a set UNION. - * A frozen set will not be modified. - * - * @param start first character, inclusive, of range to be added - * to this set. - * @param end last character, inclusive, of range to be added - * to this set. - * @stable ICU 2.0 - */ - virtual UnicodeSet& add(UChar32 start, UChar32 end); - - /** - * Adds the specified character to this set if it is not already - * present. If this set already contains the specified character, - * the call leaves this set unchanged. - * A frozen set will not be modified. - * @stable ICU 2.0 - */ - UnicodeSet& add(UChar32 c); - - /** - * Adds the specified multicharacter to this set if it is not already - * present. If this set already contains the multicharacter, - * the call leaves this set unchanged. - * Thus "ch" => {"ch"} - *
Warning: you cannot add an empty string ("") to a UnicodeSet. - * A frozen set will not be modified. - * @param s the source string - * @return this object, for chaining - * @stable ICU 2.4 - */ - UnicodeSet& add(const UnicodeString& s); - - private: - /** - * @return a code point IF the string consists of a single one. - * otherwise returns -1. - * @param s string to test - */ - static int32_t getSingleCP(const UnicodeString& s); - - void _add(const UnicodeString& s); - - public: - /** - * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * A frozen set will not be modified. - * @param s the source string - * @return this object, for chaining - * @stable ICU 2.4 - */ - UnicodeSet& addAll(const UnicodeString& s); - - /** - * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * A frozen set will not be modified. - * @param s the source string - * @return this object, for chaining - * @stable ICU 2.4 - */ - UnicodeSet& retainAll(const UnicodeString& s); - - /** - * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * A frozen set will not be modified. - * @param s the source string - * @return this object, for chaining - * @stable ICU 2.4 - */ - UnicodeSet& complementAll(const UnicodeString& s); - - /** - * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * A frozen set will not be modified. - * @param s the source string - * @return this object, for chaining - * @stable ICU 2.4 - */ - UnicodeSet& removeAll(const UnicodeString& s); - - /** - * Makes a set from a multicharacter string. Thus "ch" => {"ch"} - *
Warning: you cannot add an empty string ("") to a UnicodeSet. - * @param s the source string - * @return a newly created set containing the given string. - * The caller owns the return object and is responsible for deleting it. - * @stable ICU 2.4 - */ - static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s); - - - /** - * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} - * @param s the source string - * @return a newly created set containing the given characters - * The caller owns the return object and is responsible for deleting it. - * @stable ICU 2.4 - */ - static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s); - - /** - * Retain only the elements in this set that are contained in the - * specified range. If end > start then an empty range is - * retained, leaving the set empty. This is equivalent to - * a boolean logic AND, or a set INTERSECTION. - * A frozen set will not be modified. - * - * @param start first character, inclusive, of range to be retained - * to this set. - * @param end last character, inclusive, of range to be retained - * to this set. - * @stable ICU 2.0 - */ - virtual UnicodeSet& retain(UChar32 start, UChar32 end); - - - /** - * Retain the specified character from this set if it is present. - * A frozen set will not be modified. - * @stable ICU 2.0 - */ - UnicodeSet& retain(UChar32 c); - - /** - * Removes the specified range from this set if it is present. - * The set will not contain the specified range once the call - * returns. If end > start then an empty range is - * removed, leaving the set unchanged. - * A frozen set will not be modified. - * - * @param start first character, inclusive, of range to be removed - * from this set. - * @param end last character, inclusive, of range to be removed - * from this set. - * @stable ICU 2.0 - */ - virtual UnicodeSet& remove(UChar32 start, UChar32 end); - - /** - * Removes the specified character from this set if it is present. - * The set will not contain the specified range once the call - * returns. - * A frozen set will not be modified. - * @stable ICU 2.0 - */ - UnicodeSet& remove(UChar32 c); - - /** - * Removes the specified string from this set if it is present. - * The set will not contain the specified character once the call - * returns. - * A frozen set will not be modified. - * @param s the source string - * @return this object, for chaining - * @stable ICU 2.4 - */ - UnicodeSet& remove(const UnicodeString& s); - - /** - * Inverts this set. This operation modifies this set so that - * its value is its complement. This is equivalent to - * complement(MIN_VALUE, MAX_VALUE). - * A frozen set will not be modified. - * @stable ICU 2.0 - */ - virtual UnicodeSet& complement(void); - - /** - * Complements the specified range in this set. Any character in - * the range will be removed if it is in this set, or will be - * added if it is not in this set. If end > start - * then an empty range is complemented, leaving the set unchanged. - * This is equivalent to a boolean logic XOR. - * A frozen set will not be modified. - * - * @param start first character, inclusive, of range to be removed - * from this set. - * @param end last character, inclusive, of range to be removed - * from this set. - * @stable ICU 2.0 - */ - virtual UnicodeSet& complement(UChar32 start, UChar32 end); - - /** - * Complements the specified character in this set. The character - * will be removed if it is in this set, or will be added if it is - * not in this set. - * A frozen set will not be modified. - * @stable ICU 2.0 - */ - UnicodeSet& complement(UChar32 c); - - /** - * Complement the specified string in this set. - * The set will not contain the specified string once the call - * returns. - *
Warning: you cannot add an empty string ("") to a UnicodeSet. - * A frozen set will not be modified. - * @param s the string to complement - * @return this object, for chaining - * @stable ICU 2.4 - */ - UnicodeSet& complement(const UnicodeString& s); - - /** - * Adds all of the elements in the specified set to this set if - * they're not already present. This operation effectively - * modifies this set so that its value is the union of the two - * sets. The behavior of this operation is unspecified if the specified - * collection is modified while the operation is in progress. - * A frozen set will not be modified. - * - * @param c set whose elements are to be added to this set. - * @see #add(UChar32, UChar32) - * @stable ICU 2.0 - */ - virtual UnicodeSet& addAll(const UnicodeSet& c); - - /** - * Retains only the elements in this set that are contained in the - * specified set. In other words, removes from this set all of - * its elements that are not contained in the specified set. This - * operation effectively modifies this set so that its value is - * the intersection of the two sets. - * A frozen set will not be modified. - * - * @param c set that defines which elements this set will retain. - * @stable ICU 2.0 - */ - virtual UnicodeSet& retainAll(const UnicodeSet& c); - - /** - * Removes from this set all of its elements that are contained in the - * specified set. This operation effectively modifies this - * set so that its value is the asymmetric set difference of - * the two sets. - * A frozen set will not be modified. - * - * @param c set that defines which elements will be removed from - * this set. - * @stable ICU 2.0 - */ - virtual UnicodeSet& removeAll(const UnicodeSet& c); - - /** - * Complements in this set all elements contained in the specified - * set. Any character in the other set will be removed if it is - * in this set, or will be added if it is not in this set. - * A frozen set will not be modified. - * - * @param c set that defines which elements will be xor'ed from - * this set. - * @stable ICU 2.4 - */ - virtual UnicodeSet& complementAll(const UnicodeSet& c); - - /** - * Removes all of the elements from this set. This set will be - * empty after this call returns. - * A frozen set will not be modified. - * @stable ICU 2.0 - */ - virtual UnicodeSet& clear(void); - - /** - * Close this set over the given attribute. For the attribute - * USET_CASE, the result is to modify this set so that: - * - * 1. For each character or string 'a' in this set, all strings or - * characters 'b' such that foldCase(a) == foldCase(b) are added - * to this set. - * - * 2. For each string 'e' in the resulting set, if e != - * foldCase(e), 'e' will be removed. - * - * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] - * - * (Here foldCase(x) refers to the operation u_strFoldCase, and a - * == b denotes that the contents are the same, not pointer - * comparison.) - * - * A frozen set will not be modified. - * - * @param attribute bitmask for attributes to close over. - * Currently only the USET_CASE bit is supported. Any undefined bits - * are ignored. - * @return a reference to this set. - * @stable ICU 4.2 - */ - UnicodeSet& closeOver(int32_t attribute); - - /** - * Remove all strings from this set. - * - * @return a reference to this set. - * @stable ICU 4.2 - */ - virtual UnicodeSet &removeAllStrings(); - - /** - * Iteration method that returns the number of ranges contained in - * this set. - * @see #getRangeStart - * @see #getRangeEnd - * @stable ICU 2.4 - */ - virtual int32_t getRangeCount(void) const; - - /** - * Iteration method that returns the first character in the - * specified range of this set. - * @see #getRangeCount - * @see #getRangeEnd - * @stable ICU 2.4 - */ - virtual UChar32 getRangeStart(int32_t index) const; - - /** - * Iteration method that returns the last character in the - * specified range of this set. - * @see #getRangeStart - * @see #getRangeEnd - * @stable ICU 2.4 - */ - virtual UChar32 getRangeEnd(int32_t index) const; - - /** - * Serializes this set into an array of 16-bit integers. Serialization - * (currently) only records the characters in the set; multicharacter - * strings are ignored. - * - * The array has following format (each line is one 16-bit - * integer): - * - * length = (n+2*m) | (m!=0?0x8000:0) - * bmpLength = n; present if m!=0 - * bmp[0] - * bmp[1] - * ... - * bmp[n-1] - * supp-high[0] - * supp-low[0] - * supp-high[1] - * supp-low[1] - * ... - * supp-high[m-1] - * supp-low[m-1] - * - * The array starts with a header. After the header are n bmp - * code points, then m supplementary code points. Either n or m - * or both may be zero. n+2*m is always <= 0x7FFF. - * - * If there are no supplementary characters (if m==0) then the - * header is one 16-bit integer, 'length', with value n. - * - * If there are supplementary characters (if m!=0) then the header - * is two 16-bit integers. The first, 'length', has value - * (n+2*m)|0x8000. The second, 'bmpLength', has value n. - * - * After the header the code points are stored in ascending order. - * Supplementary code points are stored as most significant 16 - * bits followed by least significant 16 bits. - * - * @param dest pointer to buffer of destCapacity 16-bit integers. - * May be NULL only if destCapacity is zero. - * @param destCapacity size of dest, or zero. Must not be negative. - * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR - * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if - * n+2*m+(m!=0?2:1) > destCapacity. - * @return the total length of the serialized format, including - * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other - * than U_BUFFER_OVERFLOW_ERROR. - * @stable ICU 2.4 - */ - int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const; - - /** - * Reallocate this objects internal structures to take up the least - * possible space, without changing this object's value. - * A frozen set will not be modified. - * @stable ICU 2.4 - */ - virtual UnicodeSet& compact(); - - /** - * Return the class ID for this class. This is useful only for - * comparing to a return value from getDynamicClassID(). For example: - *

-     * .      Base* polymorphic_pointer = createPolymorphicObject();
-     * .      if (polymorphic_pointer->getDynamicClassID() ==
-     * .          Derived::getStaticClassID()) ...
-     * 
- * @return The class ID for all objects of this class. - * @stable ICU 2.0 - */ - static UClassID U_EXPORT2 getStaticClassID(void); - - /** - * Implement UnicodeFunctor API. - * - * @return The class ID for this object. All objects of a given - * class have the same class ID. Objects of other classes have - * different class IDs. - * @stable ICU 2.4 - */ - virtual UClassID getDynamicClassID(void) const; - -private: - - // Private API for the USet API - - friend class USetAccess; - - int32_t getStringCount() const; - - const UnicodeString* getString(int32_t index) const; - - //---------------------------------------------------------------- - // RuleBasedTransliterator support - //---------------------------------------------------------------- - -private: - - /** - * Returns true if this set contains any character whose low byte - * is the given value. This is used by RuleBasedTransliterator for - * indexing. - */ - virtual UBool matchesIndexValue(uint8_t v) const; - -private: - friend class RBBIRuleScanner; - - //---------------------------------------------------------------- - // Implementation: Clone as thawed (see ICU4J Freezable) - //---------------------------------------------------------------- - - UnicodeSet(const UnicodeSet& o, UBool /* asThawed */); - UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed); - - //---------------------------------------------------------------- - // Implementation: Pattern parsing - //---------------------------------------------------------------- - - void applyPatternIgnoreSpace(const UnicodeString& pattern, - ParsePosition& pos, - const SymbolTable* symbols, - UErrorCode& status); - - void applyPattern(RuleCharacterIterator& chars, - const SymbolTable* symbols, - UnicodeString& rebuiltPat, - uint32_t options, - UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), - int32_t depth, - UErrorCode& ec); - - //---------------------------------------------------------------- - // Implementation: Utility methods - //---------------------------------------------------------------- - - void ensureCapacity(int32_t newLen, UErrorCode& ec); - - void ensureBufferCapacity(int32_t newLen, UErrorCode& ec); - - void swapBuffers(void); - - UBool allocateStrings(UErrorCode &status); - - UnicodeString& _toPattern(UnicodeString& result, - UBool escapeUnprintable) const; - - UnicodeString& _generatePattern(UnicodeString& result, - UBool escapeUnprintable) const; - - static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable); - - static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable); - - //---------------------------------------------------------------- - // Implementation: Fundamental operators - //---------------------------------------------------------------- - - void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity); - - void add(const UChar32* other, int32_t otherLen, int8_t polarity); - - void retain(const UChar32* other, int32_t otherLen, int8_t polarity); - - /** - * Return true if the given position, in the given pattern, appears - * to be the start of a property set pattern [:foo:], \\p{foo}, or - * \\P{foo}, or \\N{name}. - */ - static UBool resemblesPropertyPattern(const UnicodeString& pattern, - int32_t pos); - - static UBool resemblesPropertyPattern(RuleCharacterIterator& chars, - int32_t iterOpts); - - /** - * Parse the given property pattern at the given parse position - * and set this UnicodeSet to the result. - * - * The original design document is out of date, but still useful. - * Ignore the property and value names: - * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html - * - * Recognized syntax: - * - * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]" - * \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P" - * \\N{name} - white space not allowed within "\\N" - * - * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored. - * Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading - * and trailing space is deleted, and internal runs of whitespace - * are collapsed to a single space. - * - * We support binary properties, enumerated properties, and the - * following non-enumerated properties: - * - * Numeric_Value - * Name - * Unicode_1_Name - * - * @param pattern the pattern string - * @param ppos on entry, the position at which to begin parsing. - * This should be one of the locations marked '^': - * - * [:blah:] \\p{blah} \\P{blah} \\N{name} - * ^ % ^ % ^ % ^ % - * - * On return, the position after the last character parsed, that is, - * the locations marked '%'. If the parse fails, ppos is returned - * unchanged. - * @param ec status - * @return a reference to this. - */ - UnicodeSet& applyPropertyPattern(const UnicodeString& pattern, - ParsePosition& ppos, - UErrorCode &ec); - - void applyPropertyPattern(RuleCharacterIterator& chars, - UnicodeString& rebuiltPat, - UErrorCode& ec); - - friend class CharacterProperties; - static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status); - - /** - * A filter that returns TRUE if the given code point should be - * included in the UnicodeSet being constructed. - */ - typedef UBool (*Filter)(UChar32 codePoint, void* context); - - /** - * Given a filter, set this UnicodeSet to the code points - * contained by that filter. The filter MUST be - * property-conformant. That is, if it returns value v for one - * code point, then it must return v for all affiliated code - * points, as defined by the inclusions list. See - * getInclusions(). - * src is a UPropertySource value. - */ - void applyFilter(Filter filter, - void* context, - const UnicodeSet* inclusions, - UErrorCode &status); - -#ifndef U_HIDE_DRAFT_API // Skipped: ucpmap.h is draft only. - void applyIntPropertyValue(const UCPMap *map, - UCPMapValueFilter *filter, const void *context, - UErrorCode &errorCode); -#endif /* U_HIDE_DRAFT_API */ - - /** - * Set the new pattern to cache. - */ - void setPattern(const UnicodeString& newPat); - /** - * Release existing cached pattern. - */ - void releasePattern(); - - friend class UnicodeSetIterator; -}; - - - -inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const { - return !operator==(o); -} - -inline UBool UnicodeSet::isFrozen() const { - return (UBool)(bmpSet!=NULL || stringSpan!=NULL); -} - -inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const { - return !containsNone(start, end); -} - -inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const { - return !containsNone(s); -} - -inline UBool UnicodeSet::containsSome(const UnicodeString& s) const { - return !containsNone(s); -} - -inline UBool UnicodeSet::isBogus() const { - return (UBool)(fFlags & kIsBogus); -} - -inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) { - return reinterpret_cast(uset); -} - -inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) { - return reinterpret_cast(uset); -} - -inline USet *UnicodeSet::toUSet() { - return reinterpret_cast(this); -} - -inline const USet *UnicodeSet::toUSet() const { - return reinterpret_cast(this); -} - -inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const { - int32_t sLength=s.length(); - if(start<0) { - start=0; - } else if(start>sLength) { - start=sLength; - } - return start+span(s.getBuffer()+start, sLength-start, spanCondition); -} - -inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const { - int32_t sLength=s.length(); - if(limit<0) { - limit=0; - } else if(limit>sLength) { - limit=sLength; - } - return spanBack(s.getBuffer(), limit, spanCondition); -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/unistr.h b/deps/node/deps/icu-small/source/common/unicode/unistr.h deleted file mode 100644 index bf954b5f..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/unistr.h +++ /dev/null @@ -1,4774 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File unistr.h -* -* Modification History: -* -* Date Name Description -* 09/25/98 stephen Creation. -* 11/11/98 stephen Changed per 11/9 code review. -* 04/20/99 stephen Overhauled per 4/16 code review. -* 11/18/99 aliu Made to inherit from Replaceable. Added method -* handleReplaceBetween(); other methods unchanged. -* 06/25/01 grhoten Remove dependency on iostream. -****************************************************************************** -*/ - -#ifndef UNISTR_H -#define UNISTR_H - -/** - * \file - * \brief C++ API: Unicode String - */ - -#include -#include "unicode/utypes.h" -#include "unicode/char16ptr.h" -#include "unicode/rep.h" -#include "unicode/std_string.h" -#include "unicode/stringpiece.h" -#include "unicode/bytestream.h" - -struct UConverter; // unicode/ucnv.h - -#ifndef USTRING_H -/** - * \ingroup ustring_ustrlen - */ -U_STABLE int32_t U_EXPORT2 -u_strlen(const UChar *s); -#endif - -U_NAMESPACE_BEGIN - -#if !UCONFIG_NO_BREAK_ITERATION -class BreakIterator; // unicode/brkiter.h -#endif -class Edits; - -U_NAMESPACE_END - -// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. -/** - * Internal string case mapping function type. - * All error checking must be done. - * src and dest must not overlap. - * @internal - */ -typedef int32_t U_CALLCONV -UStringCaseMapper(int32_t caseLocale, uint32_t options, -#if !UCONFIG_NO_BREAK_ITERATION - icu::BreakIterator *iter, -#endif - char16_t *dest, int32_t destCapacity, - const char16_t *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -U_NAMESPACE_BEGIN - -class Locale; // unicode/locid.h -class StringCharacterIterator; -class UnicodeStringAppendable; // unicode/appendable.h - -/* The include has been moved to unicode/ustream.h */ - -/** - * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor - * which constructs a Unicode string from an invariant-character char * string. - * About invariant characters see utypes.h. - * This constructor has no runtime dependency on conversion code and is - * therefore recommended over ones taking a charset name string - * (where the empty string "" indicates invariant-character conversion). - * - * @stable ICU 3.2 - */ -#define US_INV icu::UnicodeString::kInvariant - -/** - * Unicode String literals in C++. - * - * Note: these macros are not recommended for new code. - * Prior to the availability of C++11 and u"unicode string literals", - * these macros were provided for portability and efficiency when - * initializing UnicodeStrings from literals. - * - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. - * - * The string parameter must be a C string literal. - * The length of the string, not including the terminating - * NUL, must be specified as a constant. - * @stable ICU 2.0 - */ -#if !U_CHAR16_IS_TYPEDEF -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) -#else -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length) -#endif - -/** - * Unicode String literals in C++. - * Dependent on the platform properties, different UnicodeString - * constructors should be used to create a UnicodeString object from - * a string literal. - * The macros are defined for improved performance. - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. - * - * The string parameter must be a C string literal. - * @stable ICU 2.0 - */ -#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) - -/** - * \def UNISTR_FROM_CHAR_EXPLICIT - * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) - * constructors are marked as explicit, preventing their inadvertent use. - * @stable ICU 49 - */ -#ifndef UNISTR_FROM_CHAR_EXPLICIT -# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) - // Auto-"explicit" in ICU library code. -# define UNISTR_FROM_CHAR_EXPLICIT explicit -# else - // Empty by default for source code compatibility. -# define UNISTR_FROM_CHAR_EXPLICIT -# endif -#endif - -/** - * \def UNISTR_FROM_STRING_EXPLICIT - * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) - * constructors are marked as explicit, preventing their inadvertent use. - * - * In particular, this helps prevent accidentally depending on ICU conversion code - * by passing a string literal into an API with a const UnicodeString & parameter. - * @stable ICU 49 - */ -#ifndef UNISTR_FROM_STRING_EXPLICIT -# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) - // Auto-"explicit" in ICU library code. -# define UNISTR_FROM_STRING_EXPLICIT explicit -# else - // Empty by default for source code compatibility. -# define UNISTR_FROM_STRING_EXPLICIT -# endif -#endif - -/** - * \def UNISTR_OBJECT_SIZE - * Desired sizeof(UnicodeString) in bytes. - * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. - * The object size may want to be a multiple of 16 bytes, - * which is a common granularity for heap allocation. - * - * Any space inside the object beyond sizeof(vtable pointer) + 2 - * is available for storing short strings inside the object. - * The bigger the object, the longer a string that can be stored inside the object, - * without additional heap allocation. - * - * Depending on a platform's pointer size, pointer alignment requirements, - * and struct padding, the compiler will usually round up sizeof(UnicodeString) - * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), - * to hold the fields for heap-allocated strings. - * Such a minimum size also ensures that the object is easily large enough - * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). - * - * sizeof(UnicodeString) >= 48 should work for all known platforms. - * - * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, - * sizeof(UnicodeString) = 64 would leave space for - * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 - * char16_ts stored inside the object. - * - * The minimum object size on a 64-bit machine would be - * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, - * and the internal buffer would hold up to 11 char16_ts in that case. - * - * @see U16_MAX_LENGTH - * @stable ICU 56 - */ -#ifndef UNISTR_OBJECT_SIZE -# define UNISTR_OBJECT_SIZE 64 -#endif - -/** - * UnicodeString is a string class that stores Unicode characters directly and provides - * similar functionality as the Java String and StringBuffer/StringBuilder classes. - * It is a concrete implementation of the abstract class Replaceable (for transliteration). - * - * A UnicodeString may also "alias" an external array of characters - * (that is, point to it, rather than own the array) - * whose lifetime must then at least match the lifetime of the aliasing object. - * This aliasing may be preserved when returning a UnicodeString by value, - * depending on the compiler and the function implementation, - * via Return Value Optimization (RVO) or the move assignment operator. - * (However, the copy assignment operator does not preserve aliasing.) - * For details see the description of storage models at the end of the class API docs - * and in the User Guide chapter linked from there. - * - * The UnicodeString class is not suitable for subclassing. - * - *

For an overview of Unicode strings in C and C++ see the - * User Guide Strings chapter.

- * - *

In ICU, a Unicode string consists of 16-bit Unicode code units. - * A Unicode character may be stored with either one code unit - * (the most common case) or with a matched pair of special code units - * ("surrogates"). The data type for code units is char16_t. - * For single-character handling, a Unicode character code point is a value - * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.

- * - *

Indexes and offsets into and lengths of strings always count code units, not code points. - * This is the same as with multi-byte char* strings in traditional string handling. - * Operations on partial strings typically do not test for code point boundaries. - * If necessary, the user needs to take care of such boundaries by testing for the code unit - * values or by using functions like - * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() - * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).

- * - * UnicodeString methods are more lenient with regard to input parameter values - * than other ICU APIs. In particular: - * - If indexes are out of bounds for a UnicodeString object - * (<0 or >length()) then they are "pinned" to the nearest boundary. - * - If the buffer passed to an insert/append/replace operation is owned by the - * target object, e.g., calling str.append(str), an extra copy may take place - * to ensure safety. - * - If primitive string pointer values (e.g., const char16_t * or char *) - * for input strings are NULL, then those input string parameters are treated - * as if they pointed to an empty string. - * However, this is not the case for char * parameters for charset names - * or other IDs. - * - Most UnicodeString methods do not take a UErrorCode parameter because - * there are usually very few opportunities for failure other than a shortage - * of memory, error codes in low-level C++ string methods would be inconvenient, - * and the error code as the last parameter (ICU convention) would prevent - * the use of default parameter values. - * Instead, such methods set the UnicodeString into a "bogus" state - * (see isBogus()) if an error occurs. - * - * In string comparisons, two UnicodeString objects that are both "bogus" - * compare equal (to be transitive and prevent endless loops in sorting), - * and a "bogus" string compares less than any non-"bogus" one. - * - * Const UnicodeString methods are thread-safe. Multiple threads can use - * const methods on the same UnicodeString object simultaneously, - * but non-const methods must not be called concurrently (in multiple threads) - * with any other (const or non-const) methods. - * - * Similarly, const UnicodeString & parameters are thread-safe. - * One object may be passed in as such a parameter concurrently in multiple threads. - * This includes the const UnicodeString & parameters for - * copy construction, assignment, and cloning. - * - *

UnicodeString uses several storage methods. - * String contents can be stored inside the UnicodeString object itself, - * in an allocated and shared buffer, or in an outside buffer that is "aliased". - * Most of this is done transparently, but careful aliasing in particular provides - * significant performance improvements. - * Also, the internal buffer is accessible via special functions. - * For details see the - * User Guide Strings chapter.

- * - * @see utf.h - * @see CharacterIterator - * @stable ICU 2.0 - */ -class U_COMMON_API UnicodeString : public Replaceable -{ -public: - - /** - * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor - * which constructs a Unicode string from an invariant-character char * string. - * Use the macro US_INV instead of the full qualification for this value. - * - * @see US_INV - * @stable ICU 3.2 - */ - enum EInvariant { - /** - * @see EInvariant - * @stable ICU 3.2 - */ - kInvariant - }; - - //======================================== - // Read-only operations - //======================================== - - /* Comparison - bitwise only - for international comparison use collation */ - - /** - * Equality operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if text contains the same characters as this one, - * FALSE otherwise. - * @stable ICU 2.0 - */ - inline UBool operator== (const UnicodeString& text) const; - - /** - * Inequality operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return FALSE if text contains the same characters as this one, - * TRUE otherwise. - * @stable ICU 2.0 - */ - inline UBool operator!= (const UnicodeString& text) const; - - /** - * Greater than operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * greater than the characters in text, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator> (const UnicodeString& text) const; - - /** - * Less than operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * less than the characters in text, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator< (const UnicodeString& text) const; - - /** - * Greater than or equal operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * greater than or equal to the characters in text, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator>= (const UnicodeString& text) const; - - /** - * Less than or equal operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * less than or equal to the characters in text, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator<= (const UnicodeString& text) const; - - /** - * Compare the characters bitwise in this UnicodeString to - * the characters in text. - * @param text The UnicodeString to compare to this one. - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as text, -1 if the characters in - * this are bitwise less than the characters in text, +1 if the - * characters in this are bitwise greater than the characters - * in text. - * @stable ICU 2.0 - */ - inline int8_t compare(const UnicodeString& text) const; - - /** - * Compare the characters bitwise in the range - * [start, start + length) with the characters - * in the entire string text. - * (The parameters "start" and "length" are not applied to the other text "text".) - * @param start the offset at which the compare operation begins - * @param length the number of characters of text to compare. - * @param text the other text to be compared against this string. - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as text, -1 if the characters in - * this are bitwise less than the characters in text, +1 if the - * characters in this are bitwise greater than the characters - * in text. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const UnicodeString& text) const; - - /** - * Compare the characters bitwise in the range - * [start, start + length) with the characters - * in srcText in the range - * [srcStart, srcStart + srcLength). - * @param start the offset at which the compare operation begins - * @param length the number of characters in this to compare. - * @param srcText the text to be compared - * @param srcStart the offset into srcText to start comparison - * @param srcLength the number of characters in src to compare - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcText, -1 if the characters in - * this are bitwise less than the characters in srcText, +1 if the - * characters in this are bitwise greater than the characters - * in srcText. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare the characters bitwise in this UnicodeString with the first - * srcLength characters in srcChars. - * @param srcChars The characters to compare to this UnicodeString. - * @param srcLength the number of characters in srcChars to compare - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcChars, -1 if the characters in - * this are bitwise less than the characters in srcChars, +1 if the - * characters in this are bitwise greater than the characters - * in srcChars. - * @stable ICU 2.0 - */ - inline int8_t compare(ConstChar16Ptr srcChars, - int32_t srcLength) const; - - /** - * Compare the characters bitwise in the range - * [start, start + length) with the first - * length characters in srcChars - * @param start the offset at which the compare operation begins - * @param length the number of characters to compare. - * @param srcChars the characters to be compared - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcChars, -1 if the characters in - * this are bitwise less than the characters in srcChars, +1 if the - * characters in this are bitwise greater than the characters - * in srcChars. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const char16_t *srcChars) const; - - /** - * Compare the characters bitwise in the range - * [start, start + length) with the characters - * in srcChars in the range - * [srcStart, srcStart + srcLength). - * @param start the offset at which the compare operation begins - * @param length the number of characters in this to compare - * @param srcChars the characters to be compared - * @param srcStart the offset into srcChars to start comparison - * @param srcLength the number of characters in srcChars to compare - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcChars, -1 if the characters in - * this are bitwise less than the characters in srcChars, +1 if the - * characters in this are bitwise greater than the characters - * in srcChars. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare the characters bitwise in the range - * [start, limit) with the characters - * in srcText in the range - * [srcStart, srcLimit). - * @param start the offset at which the compare operation begins - * @param limit the offset immediately following the compare operation - * @param srcText the text to be compared - * @param srcStart the offset into srcText to start comparison - * @param srcLimit the offset into srcText to limit comparison - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcText, -1 if the characters in - * this are bitwise less than the characters in srcText, +1 if the - * characters in this are bitwise greater than the characters - * in srcText. - * @stable ICU 2.0 - */ - inline int8_t compareBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param text Another string to compare this one to. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(const UnicodeString& text) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const UnicodeString& srcText) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param srcChars A pointer to another string to compare this one to. - * @param srcLength The number of code units from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, - int32_t srcLength) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const char16_t *srcChars) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param limit The offset after the last code unit from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLimit The offset after the last code unit from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrderBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). - * - * @param text Another string to compare this one to. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const UnicodeString& srcText, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). - * - * @param srcChars A pointer to another string to compare this one to. - * @param srcLength The number of code units from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(ConstChar16Ptr srcChars, - int32_t srcLength, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const char16_t *srcChars, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param limit The offset after the last code unit from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLimit The offset after the last code unit from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompareBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit, - uint32_t options) const; - - /** - * Determine if this starts with the characters in text - * @param text The text to match. - * @return TRUE if this starts with the characters in text, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(const UnicodeString& text) const; - - /** - * Determine if this starts with the characters in srcText - * in the range [srcStart, srcStart + srcLength). - * @param srcText The text to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcText to match - * @return TRUE if this starts with the characters in text, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Determine if this starts with the characters in srcChars - * @param srcChars The characters to match. - * @param srcLength the number of characters in srcChars - * @return TRUE if this starts with the characters in srcChars, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(ConstChar16Ptr srcChars, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in srcChars - * in the range [srcStart, srcStart + srcLength). - * @param srcChars The characters to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcChars to match - * @return TRUE if this ends with the characters in srcChars, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in text - * @param text The text to match. - * @return TRUE if this ends with the characters in text, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(const UnicodeString& text) const; - - /** - * Determine if this ends with the characters in srcText - * in the range [srcStart, srcStart + srcLength). - * @param srcText The text to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcText to match - * @return TRUE if this ends with the characters in text, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in srcChars - * @param srcChars The characters to match. - * @param srcLength the number of characters in srcChars - * @return TRUE if this ends with the characters in srcChars, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(ConstChar16Ptr srcChars, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in srcChars - * in the range [srcStart, srcStart + srcLength). - * @param srcChars The characters to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcChars to match - * @return TRUE if this ends with the characters in srcChars, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - - /* Searching - bitwise only */ - - /** - * Locate in this the first occurrence of the characters in text, - * using bitwise comparison. - * @param text The text to search for. - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& text) const; - - /** - * Locate in this the first occurrence of the characters in text - * starting at offset start, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& text, - int32_t start) const; - - /** - * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in text, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& text, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in srcText in the range - * [srcStart, srcStart + srcLength), - * using bitwise comparison. - * @param srcText The text to search for. - * @param srcStart the offset into srcText at which - * to start matching - * @param srcLength the number of characters in srcText to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence of the characters in - * srcChars - * starting at offset start, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars to match - * @param start the offset into this at which to start matching - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const char16_t *srcChars, - int32_t srcLength, - int32_t start) const; - - /** - * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in srcChars, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of srcChars, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(ConstChar16Ptr srcChars, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in srcChars in the range - * [srcStart, srcStart + srcLength), - * using bitwise comparison. - * @param srcChars The text to search for. - * @param srcStart the offset into srcChars at which - * to start matching - * @param srcLength the number of characters in srcChars to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - int32_t indexOf(const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence of the BMP code point c, - * using bitwise comparison. - * @param c The code unit to search for. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(char16_t c) const; - - /** - * Locate in this the first occurrence of the code point c, - * using bitwise comparison. - * - * @param c The code point to search for. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar32 c) const; - - /** - * Locate in this the first occurrence of the BMP code point c, - * starting at offset start, using bitwise comparison. - * @param c The code unit to search for. - * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(char16_t c, - int32_t start) const; - - /** - * Locate in this the first occurrence of the code point c - * starting at offset start, using bitwise comparison. - * - * @param c The code point to search for. - * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar32 c, - int32_t start) const; - - /** - * Locate in this the first occurrence of the BMP code point c - * in the range [start, start + length), - * using bitwise comparison. - * @param c The code unit to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(char16_t c, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence of the code point c - * in the range [start, start + length), - * using bitwise comparison. - * - * @param c The code point to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar32 c, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the characters in text, - * using bitwise comparison. - * @param text The text to search for. - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& text) const; - - /** - * Locate in this the last occurrence of the characters in text - * starting at offset start, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& text, - int32_t start) const; - - /** - * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in text, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& text, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in srcText in the range - * [srcStart, srcStart + srcLength), - * using bitwise comparison. - * @param srcText The text to search for. - * @param srcStart the offset into srcText at which - * to start matching - * @param srcLength the number of characters in srcText to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the characters in srcChars - * starting at offset start, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars to match - * @param start the offset into this at which to start matching - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const char16_t *srcChars, - int32_t srcLength, - int32_t start) const; - - /** - * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in srcChars, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of srcChars, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(ConstChar16Ptr srcChars, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in srcChars in the range - * [srcStart, srcStart + srcLength), - * using bitwise comparison. - * @param srcChars The text to search for. - * @param srcStart the offset into srcChars at which - * to start matching - * @param srcLength the number of characters in srcChars to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of text, - * or -1 if not found. - * @stable ICU 2.0 - */ - int32_t lastIndexOf(const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the BMP code point c, - * using bitwise comparison. - * @param c The code unit to search for. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(char16_t c) const; - - /** - * Locate in this the last occurrence of the code point c, - * using bitwise comparison. - * - * @param c The code point to search for. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar32 c) const; - - /** - * Locate in this the last occurrence of the BMP code point c - * starting at offset start, using bitwise comparison. - * @param c The code unit to search for. - * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(char16_t c, - int32_t start) const; - - /** - * Locate in this the last occurrence of the code point c - * starting at offset start, using bitwise comparison. - * - * @param c The code point to search for. - * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar32 c, - int32_t start) const; - - /** - * Locate in this the last occurrence of the BMP code point c - * in the range [start, start + length), - * using bitwise comparison. - * @param c The code unit to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(char16_t c, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the code point c - * in the range [start, start + length), - * using bitwise comparison. - * - * @param c The code point to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar32 c, - int32_t start, - int32_t length) const; - - - /* Character access */ - - /** - * Return the code unit at offset offset. - * If the offset is not valid (0..length()-1) then U+ffff is returned. - * @param offset a valid offset into the text - * @return the code unit at offset offset - * or 0xffff if the offset is not valid for this string - * @stable ICU 2.0 - */ - inline char16_t charAt(int32_t offset) const; - - /** - * Return the code unit at offset offset. - * If the offset is not valid (0..length()-1) then U+ffff is returned. - * @param offset a valid offset into the text - * @return the code unit at offset offset - * @stable ICU 2.0 - */ - inline char16_t operator[] (int32_t offset) const; - - /** - * Return the code point that contains the code unit - * at offset offset. - * If the offset is not valid (0..length()-1) then U+ffff is returned. - * @param offset a valid offset into the text - * that indicates the text offset of any of the code units - * that will be assembled into a code point (21-bit value) and returned - * @return the code point of text at offset - * or 0xffff if the offset is not valid for this string - * @stable ICU 2.0 - */ - UChar32 char32At(int32_t offset) const; - - /** - * Adjust a random-access offset so that - * it points to the beginning of a Unicode character. - * The offset that is passed in points to - * any code unit of a code point, - * while the returned offset will point to the first code unit - * of the same code point. - * In UTF-16, if the input offset points to a second surrogate - * of a surrogate pair, then the returned offset will point - * to the first surrogate. - * @param offset a valid offset into one code point of the text - * @return offset of the first code unit of the same code point - * @see U16_SET_CP_START - * @stable ICU 2.0 - */ - int32_t getChar32Start(int32_t offset) const; - - /** - * Adjust a random-access offset so that - * it points behind a Unicode character. - * The offset that is passed in points behind - * any code unit of a code point, - * while the returned offset will point behind the last code unit - * of the same code point. - * In UTF-16, if the input offset points behind the first surrogate - * (i.e., to the second surrogate) - * of a surrogate pair, then the returned offset will point - * behind the second surrogate (i.e., to the first surrogate). - * @param offset a valid offset after any code unit of a code point of the text - * @return offset of the first code unit after the same code point - * @see U16_SET_CP_LIMIT - * @stable ICU 2.0 - */ - int32_t getChar32Limit(int32_t offset) const; - - /** - * Move the code unit index along the string by delta code points. - * Interpret the input index as a code unit-based offset into the string, - * move the index forward or backward by delta code points, and - * return the resulting index. - * The input index should point to the first code unit of a code point, - * if there is more than one. - * - * Both input and output indexes are code unit-based as for all - * string indexes/offsets in ICU (and other libraries, like MBCS char*). - * If delta<0 then the index is moved backward (toward the start of the string). - * If delta>0 then the index is moved forward (toward the end of the string). - * - * This behaves like CharacterIterator::move32(delta, kCurrent). - * - * Behavior for out-of-bounds indexes: - * moveIndex32 pins the input index to 0..length(), i.e., - * if the input index<0 then it is pinned to 0; - * if it is index>length() then it is pinned to length(). - * Afterwards, the index is moved by delta code points - * forward or backward, - * but no further backward than to 0 and no further forward than to length(). - * The resulting index return value will be in between 0 and length(), inclusively. - * - * Examples: - *
-   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
-   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
-   *
-   * // initial index: position of U+10000
-   * int32_t index=1;
-   *
-   * // the following examples will all result in index==4, position of U+10ffff
-   *
-   * // skip 2 code points from some position in the string
-   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
-   *
-   * // go to the 3rd code point from the start of s (0-based)
-   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
-   *
-   * // go to the next-to-last code point of s
-   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
-   * 
- * - * @param index input code unit index - * @param delta (signed) code point count to move the index forward or backward - * in the string - * @return the resulting code unit index - * @stable ICU 2.0 - */ - int32_t moveIndex32(int32_t index, int32_t delta) const; - - /* Substring extraction */ - - /** - * Copy the characters in the range - * [start, start + length) into the array dst, - * beginning at dstStart. - * If the string aliases to dst itself as an external buffer, - * then extract() will not copy the contents. - * - * @param start offset of first character which will be copied into the array - * @param length the number of characters to extract - * @param dst array in which to copy characters. The length of dst - * must be at least (dstStart + length). - * @param dstStart the offset in dst where the first character - * will be extracted - * @stable ICU 2.0 - */ - inline void extract(int32_t start, - int32_t length, - Char16Ptr dst, - int32_t dstStart = 0) const; - - /** - * Copy the contents of the string into dest. - * This is a convenience function that - * checks if there is enough space in dest, - * extracts the entire string if possible, - * and NUL-terminates dest if possible. - * - * If the string fits into dest but cannot be NUL-terminated - * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. - * If the string itself does not fit into dest - * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. - * - * If the string aliases to dest itself as an external buffer, - * then extract() will not copy the contents. - * - * @param dest Destination string buffer. - * @param destCapacity Number of char16_ts available at dest. - * @param errorCode ICU error code. - * @return length() - * @stable ICU 2.0 - */ - int32_t - extract(Char16Ptr dest, int32_t destCapacity, - UErrorCode &errorCode) const; - - /** - * Copy the characters in the range - * [start, start + length) into the UnicodeString - * target. - * @param start offset of first character which will be copied - * @param length the number of characters to extract - * @param target UnicodeString into which to copy characters. - * @return A reference to target - * @stable ICU 2.0 - */ - inline void extract(int32_t start, - int32_t length, - UnicodeString& target) const; - - /** - * Copy the characters in the range [start, limit) - * into the array dst, beginning at dstStart. - * @param start offset of first character which will be copied into the array - * @param limit offset immediately following the last character to be copied - * @param dst array in which to copy characters. The length of dst - * must be at least (dstStart + (limit - start)). - * @param dstStart the offset in dst where the first character - * will be extracted - * @stable ICU 2.0 - */ - inline void extractBetween(int32_t start, - int32_t limit, - char16_t *dst, - int32_t dstStart = 0) const; - - /** - * Copy the characters in the range [start, limit) - * into the UnicodeString target. Replaceable API. - * @param start offset of first character which will be copied - * @param limit offset immediately following the last character to be copied - * @param target UnicodeString into which to copy characters. - * @return A reference to target - * @stable ICU 2.0 - */ - virtual void extractBetween(int32_t start, - int32_t limit, - UnicodeString& target) const; - - /** - * Copy the characters in the range - * [start, start + startLength) into an array of characters. - * All characters must be invariant (see utypes.h). - * Use US_INV as the last, signature-distinguishing parameter. - * - * This function does not write any more than targetCapacity - * characters but returns the length of the entire output string - * so that one can allocate a larger buffer and call the function again - * if necessary. - * The output string is NUL-terminated if possible. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction, can be NULL - * if targetLength is 0 - * @param targetCapacity the length of the target buffer - * @param inv Signature-distinguishing paramater, use US_INV. - * @return the output string length, not including the terminating NUL - * @stable ICU 3.2 - */ - int32_t extract(int32_t start, - int32_t startLength, - char *target, - int32_t targetCapacity, - enum EInvariant inv) const; - -#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION - - /** - * Copy the characters in the range - * [start, start + length) into an array of characters - * in the platform's default codepage. - * This function does not write any more than targetLength - * characters but returns the length of the entire output string - * so that one can allocate a larger buffer and call the function again - * if necessary. - * The output string is NUL-terminated if possible. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction - * @param targetLength the length of the target buffer - * If target is NULL, then the number of bytes required for - * target is returned. - * @return the output string length, not including the terminating NUL - * @stable ICU 2.0 - */ - int32_t extract(int32_t start, - int32_t startLength, - char *target, - uint32_t targetLength) const; - -#endif - -#if !UCONFIG_NO_CONVERSION - - /** - * Copy the characters in the range - * [start, start + length) into an array of characters - * in a specified codepage. - * The output string is NUL-terminated. - * - * Recommendation: For invariant-character strings use - * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction - * @param codepage the desired codepage for the characters. 0 has - * the special meaning of the default codepage - * If codepage is an empty string (""), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * If target is NULL, then the number of bytes required for - * target is returned. It is assumed that the target is big enough - * to fit all of the characters. - * @return the output string length, not including the terminating NUL - * @stable ICU 2.0 - */ - inline int32_t extract(int32_t start, - int32_t startLength, - char *target, - const char *codepage = 0) const; - - /** - * Copy the characters in the range - * [start, start + length) into an array of characters - * in a specified codepage. - * This function does not write any more than targetLength - * characters but returns the length of the entire output string - * so that one can allocate a larger buffer and call the function again - * if necessary. - * The output string is NUL-terminated if possible. - * - * Recommendation: For invariant-character strings use - * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction - * @param targetLength the length of the target buffer - * @param codepage the desired codepage for the characters. 0 has - * the special meaning of the default codepage - * If codepage is an empty string (""), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * If target is NULL, then the number of bytes required for - * target is returned. - * @return the output string length, not including the terminating NUL - * @stable ICU 2.0 - */ - int32_t extract(int32_t start, - int32_t startLength, - char *target, - uint32_t targetLength, - const char *codepage) const; - - /** - * Convert the UnicodeString into a codepage string using an existing UConverter. - * The output string is NUL-terminated if possible. - * - * This function avoids the overhead of opening and closing a converter if - * multiple strings are extracted. - * - * @param dest destination string buffer, can be NULL if destCapacity==0 - * @param destCapacity the number of chars available at dest - * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), - * or NULL for the default converter - * @param errorCode normal ICU error code - * @return the length of the output string, not counting the terminating NUL; - * if the length is greater than destCapacity, then the string will not fit - * and a buffer of the indicated length would need to be passed in - * @stable ICU 2.0 - */ - int32_t extract(char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const; - -#endif - - /** - * Create a temporary substring for the specified range. - * Unlike the substring constructor and setTo() functions, - * the object returned here will be a read-only alias (using getBuffer()) - * rather than copying the text. - * As a result, this substring operation is much faster but requires - * that the original string not be modified or deleted during the lifetime - * of the returned substring object. - * @param start offset of the first character visible in the substring - * @param length length of the substring - * @return a read-only alias UnicodeString object for the substring - * @stable ICU 4.4 - */ - UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; - - /** - * Create a temporary substring for the specified range. - * Same as tempSubString(start, length) except that the substring range - * is specified as a (start, limit) pair (with an exclusive limit index) - * rather than a (start, length) pair. - * @param start offset of the first character visible in the substring - * @param limit offset immediately following the last character visible in the substring - * @return a read-only alias UnicodeString object for the substring - * @stable ICU 4.4 - */ - inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; - - /** - * Convert the UnicodeString to UTF-8 and write the result - * to a ByteSink. This is called by toUTF8String(). - * Unpaired surrogates are replaced with U+FFFD. - * Calls u_strToUTF8WithSub(). - * - * @param sink A ByteSink to which the UTF-8 version of the string is written. - * sink.Flush() is called at the end. - * @stable ICU 4.2 - * @see toUTF8String - */ - void toUTF8(ByteSink &sink) const; - - /** - * Convert the UnicodeString to UTF-8 and append the result - * to a standard string. - * Unpaired surrogates are replaced with U+FFFD. - * Calls toUTF8(). - * - * @param result A standard string (or a compatible object) - * to which the UTF-8 version of the string is appended. - * @return The string object. - * @stable ICU 4.2 - * @see toUTF8 - */ - template - StringClass &toUTF8String(StringClass &result) const { - StringByteSink sbs(&result, length()); - toUTF8(sbs); - return result; - } - - /** - * Convert the UnicodeString to UTF-32. - * Unpaired surrogates are replaced with U+FFFD. - * Calls u_strToUTF32WithSub(). - * - * @param utf32 destination string buffer, can be NULL if capacity==0 - * @param capacity the number of UChar32s available at utf32 - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The length of the UTF-32 string. - * @see fromUTF32 - * @stable ICU 4.2 - */ - int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; - - /* Length operations */ - - /** - * Return the length of the UnicodeString object. - * The length is the number of char16_t code units are in the UnicodeString. - * If you want the number of code points, please use countChar32(). - * @return the length of the UnicodeString object - * @see countChar32 - * @stable ICU 2.0 - */ - inline int32_t length(void) const; - - /** - * Count Unicode code points in the length char16_t code units of the string. - * A code point may occupy either one or two char16_t code units. - * Counting code points involves reading all code units. - * - * This functions is basically the inverse of moveIndex32(). - * - * @param start the index of the first code unit to check - * @param length the number of char16_t code units to check - * @return the number of code points in the specified code units - * @see length - * @stable ICU 2.0 - */ - int32_t - countChar32(int32_t start=0, int32_t length=INT32_MAX) const; - - /** - * Check if the length char16_t code units of the string - * contain more Unicode code points than a certain number. - * This is more efficient than counting all code points in this part of the string - * and comparing that number with a threshold. - * This function may not need to scan the string at all if the length - * falls within a certain range, and - * never needs to count more than 'number+1' code points. - * Logically equivalent to (countChar32(start, length)>number). - * A Unicode code point may occupy either one or two char16_t code units. - * - * @param start the index of the first code unit to check (0 for the entire string) - * @param length the number of char16_t code units to check - * (use INT32_MAX for the entire string; remember that start/length - * values are pinned) - * @param number The number of code points in the (sub)string is compared against - * the 'number' parameter. - * @return Boolean value for whether the string contains more Unicode code points - * than 'number'. Same as (u_countChar32(s, length)>number). - * @see countChar32 - * @see u_strHasMoreChar32Than - * @stable ICU 2.4 - */ - UBool - hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; - - /** - * Determine if this string is empty. - * @return TRUE if this string contains 0 characters, FALSE otherwise. - * @stable ICU 2.0 - */ - inline UBool isEmpty(void) const; - - /** - * Return the capacity of the internal buffer of the UnicodeString object. - * This is useful together with the getBuffer functions. - * See there for details. - * - * @return the number of char16_ts available in the internal buffer - * @see getBuffer - * @stable ICU 2.0 - */ - inline int32_t getCapacity(void) const; - - /* Other operations */ - - /** - * Generate a hash code for this object. - * @return The hash code of this UnicodeString. - * @stable ICU 2.0 - */ - inline int32_t hashCode(void) const; - - /** - * Determine if this object contains a valid string. - * A bogus string has no value. It is different from an empty string, - * although in both cases isEmpty() returns TRUE and length() returns 0. - * setToBogus() and isBogus() can be used to indicate that no string value is available. - * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and - * length() returns 0. - * - * @return TRUE if the string is bogus/invalid, FALSE otherwise - * @see setToBogus() - * @stable ICU 2.0 - */ - inline UBool isBogus(void) const; - - - //======================================== - // Write operations - //======================================== - - /* Assignment operations */ - - /** - * Assignment operator. Replace the characters in this UnicodeString - * with the characters from srcText. - * - * Starting with ICU 2.4, the assignment operator and the copy constructor - * allocate a new buffer and copy the buffer contents even for readonly aliases. - * By contrast, the fastCopyFrom() function implements the old, - * more efficient but less safe behavior - * of making this string also a readonly alias to the same buffer. - * - * If the source object has an "open" buffer from getBuffer(minCapacity), - * then the copy is an empty string. - * - * @param srcText The text containing the characters to replace - * @return a reference to this - * @stable ICU 2.0 - * @see fastCopyFrom - */ - UnicodeString &operator=(const UnicodeString &srcText); - - /** - * Almost the same as the assignment operator. - * Replace the characters in this UnicodeString - * with the characters from srcText. - * - * This function works the same as the assignment operator - * for all strings except for ones that are readonly aliases. - * - * Starting with ICU 2.4, the assignment operator and the copy constructor - * allocate a new buffer and copy the buffer contents even for readonly aliases. - * This function implements the old, more efficient but less safe behavior - * of making this string also a readonly alias to the same buffer. - * - * The fastCopyFrom function must be used only if it is known that the lifetime of - * this UnicodeString does not exceed the lifetime of the aliased buffer - * including its contents, for example for strings from resource bundles - * or aliases to string constants. - * - * If the source object has an "open" buffer from getBuffer(minCapacity), - * then the copy is an empty string. - * - * @param src The text containing the characters to replace. - * @return a reference to this - * @stable ICU 2.4 - */ - UnicodeString &fastCopyFrom(const UnicodeString &src); - - /** - * Move assignment operator; might leave src in bogus state. - * This string will have the same contents and state that the source string had. - * The behavior is undefined if *this and src are the same object. - * @param src source string - * @return *this - * @stable ICU 56 - */ - UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT { - return moveFrom(src); - } - - // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API - /** - * Move assignment; might leave src in bogus state. - * This string will have the same contents and state that the source string had. - * The behavior is undefined if *this and src are the same object. - * - * Can be called explicitly, does not need C++11 support. - * @param src source string - * @return *this - * @draft ICU 56 - */ - UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT; - - /** - * Swap strings. - * @param other other string - * @stable ICU 56 - */ - void swap(UnicodeString &other) U_NOEXCEPT; - - /** - * Non-member UnicodeString swap function. - * @param s1 will get s2's contents and state - * @param s2 will get s1's contents and state - * @stable ICU 56 - */ - friend U_COMMON_API inline void U_EXPORT2 - swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT { - s1.swap(s2); - } - - /** - * Assignment operator. Replace the characters in this UnicodeString - * with the code unit ch. - * @param ch the code unit to replace - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator= (char16_t ch); - - /** - * Assignment operator. Replace the characters in this UnicodeString - * with the code point ch. - * @param ch the code point to replace - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator= (UChar32 ch); - - /** - * Set the text in the UnicodeString object to the characters - * in srcText in the range - * [srcStart, srcText.length()). - * srcText is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters - * will be obtained - * @return a reference to this - * @stable ICU 2.2 - */ - inline UnicodeString& setTo(const UnicodeString& srcText, - int32_t srcStart); - - /** - * Set the text in the UnicodeString object to the characters - * in srcText in the range - * [srcStart, srcStart + srcLength). - * srcText is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters - * will be obtained - * @param srcLength the number of characters in srcText in the - * replace string. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& setTo(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Set the text in the UnicodeString object to the characters in - * srcText. - * srcText is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& setTo(const UnicodeString& srcText); - - /** - * Set the characters in the UnicodeString object to the characters - * in srcChars. srcChars is not modified. - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& setTo(const char16_t *srcChars, - int32_t srcLength); - - /** - * Set the characters in the UnicodeString object to the code unit - * srcChar. - * @param srcChar the code unit which becomes the UnicodeString's character - * content - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& setTo(char16_t srcChar); - - /** - * Set the characters in the UnicodeString object to the code point - * srcChar. - * @param srcChar the code point which becomes the UnicodeString's character - * content - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& setTo(UChar32 srcChar); - - /** - * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has copy-on-write semantics: - * When the string is modified, then the buffer is first copied into - * newly allocated memory. - * The aliased buffer is never modified. - * - * In an assignment to another UnicodeString, when using the copy constructor - * or the assignment operator, the text will be copied. - * When using fastCopyFrom(), the text will be aliased again, - * so that both strings then alias the same readonly-text. - * - * @param isTerminated specifies if text is NUL-terminated. - * This must be true if textLength==-1. - * @param text The characters to alias for the UnicodeString. - * @param textLength The number of Unicode characters in text to alias. - * If -1, then this constructor will determine the length - * by calling u_strlen(). - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString &setTo(UBool isTerminated, - ConstChar16Ptr text, - int32_t textLength); - - /** - * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has write-through semantics: - * For as long as the capacity of the buffer is sufficient, write operations - * will directly affect the buffer. When more capacity is necessary, then - * a new buffer will be allocated and the contents copied as with regularly - * constructed strings. - * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(Char16Ptr dst) function detects whether the dst pointer is the same - * as the string buffer itself and will in this case not copy the contents. - * - * @param buffer The characters to alias for the UnicodeString. - * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in char16_ts. - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString &setTo(char16_t *buffer, - int32_t buffLength, - int32_t buffCapacity); - - /** - * Make this UnicodeString object invalid. - * The string will test TRUE with isBogus(). - * - * A bogus string has no value. It is different from an empty string. - * It can be used to indicate that no string value is available. - * getBuffer() and getTerminatedBuffer() return NULL, and - * length() returns 0. - * - * This utility function is used throughout the UnicodeString - * implementation to indicate that a UnicodeString operation failed, - * and may be used in other functions, - * especially but not exclusively when such functions do not - * take a UErrorCode for simplicity. - * - * The following methods, and no others, will clear a string object's bogus flag: - * - remove() - * - remove(0, INT32_MAX) - * - truncate(0) - * - operator=() (assignment operator) - * - setTo(...) - * - * The simplest ways to turn a bogus string into an empty one - * is to use the remove() function. - * Examples for other functions that are equivalent to "set to empty string": - * \code - * if(s.isBogus()) { - * s.remove(); // set to an empty string (remove all), or - * s.remove(0, INT32_MAX); // set to an empty string (remove all), or - * s.truncate(0); // set to an empty string (complete truncation), or - * s=UnicodeString(); // assign an empty string, or - * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or - * static const char16_t nul=0; - * s.setTo(&nul, 0); // set to an empty C Unicode string - * } - * \endcode - * - * @see isBogus() - * @stable ICU 2.0 - */ - void setToBogus(); - - /** - * Set the character at the specified offset to the specified character. - * @param offset A valid offset into the text of the character to set - * @param ch The new character - * @return A reference to this - * @stable ICU 2.0 - */ - UnicodeString& setCharAt(int32_t offset, - char16_t ch); - - - /* Append operations */ - - /** - * Append operator. Append the code unit ch to the UnicodeString - * object. - * @param ch the code unit to be appended - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator+= (char16_t ch); - - /** - * Append operator. Append the code point ch to the UnicodeString - * object. - * @param ch the code point to be appended - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator+= (UChar32 ch); - - /** - * Append operator. Append the characters in srcText to the - * UnicodeString object. srcText is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator+= (const UnicodeString& srcText); - - /** - * Append the characters - * in srcText in the range - * [srcStart, srcStart + srcLength) to the - * UnicodeString object at offset start. srcText - * is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters - * will be obtained - * @param srcLength the number of characters in srcText in - * the append string - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Append the characters in srcText to the UnicodeString object. - * srcText is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(const UnicodeString& srcText); - - /** - * Append the characters in srcChars in the range - * [srcStart, srcStart + srcLength) to the UnicodeString - * object at offset - * start. srcChars is not modified. - * @param srcChars the source for the new characters - * @param srcStart the offset into srcChars where new characters - * will be obtained - * @param srcLength the number of characters in srcChars in - * the append string; can be -1 if srcChars is NUL-terminated - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength); - - /** - * Append the characters in srcChars to the UnicodeString object - * at offset start. srcChars is not modified. - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars; - * can be -1 if srcChars is NUL-terminated - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(ConstChar16Ptr srcChars, - int32_t srcLength); - - /** - * Append the code unit srcChar to the UnicodeString object. - * @param srcChar the code unit to append - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(char16_t srcChar); - - /** - * Append the code point srcChar to the UnicodeString object. - * @param srcChar the code point to append - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& append(UChar32 srcChar); - - - /* Insert operations */ - - /** - * Insert the characters in srcText in the range - * [srcStart, srcStart + srcLength) into the UnicodeString - * object at offset start. srcText is not modified. - * @param start the offset where the insertion begins - * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters - * will be obtained - * @param srcLength the number of characters in srcText in - * the insert string - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Insert the characters in srcText into the UnicodeString object - * at offset start. srcText is not modified. - * @param start the offset where the insertion begins - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - const UnicodeString& srcText); - - /** - * Insert the characters in srcChars in the range - * [srcStart, srcStart + srcLength) into the UnicodeString - * object at offset start. srcChars is not modified. - * @param start the offset at which the insertion begins - * @param srcChars the source for the new characters - * @param srcStart the offset into srcChars where new characters - * will be obtained - * @param srcLength the number of characters in srcChars - * in the insert string - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength); - - /** - * Insert the characters in srcChars into the UnicodeString object - * at offset start. srcChars is not modified. - * @param start the offset where the insertion begins - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - ConstChar16Ptr srcChars, - int32_t srcLength); - - /** - * Insert the code unit srcChar into the UnicodeString object at - * offset start. - * @param start the offset at which the insertion occurs - * @param srcChar the code unit to insert - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - char16_t srcChar); - - /** - * Insert the code point srcChar into the UnicodeString object at - * offset start. - * @param start the offset at which the insertion occurs - * @param srcChar the code point to insert - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - UChar32 srcChar); - - - /* Replace operations */ - - /** - * Replace the characters in the range - * [start, start + length) with the characters in - * srcText in the range - * [srcStart, srcStart + srcLength). - * srcText is not modified. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * start + length is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters - * will be obtained - * @param srcLength the number of characters in srcText in - * the replace string - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Replace the characters in the range - * [start, start + length) - * with the characters in srcText. srcText is - * not modified. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * start + length is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, - int32_t length, - const UnicodeString& srcText); - - /** - * Replace the characters in the range - * [start, start + length) with the characters in - * srcChars in the range - * [srcStart, srcStart + srcLength). srcChars - * is not modified. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * start + length is not modified. - * @param srcChars the source for the new characters - * @param srcStart the offset into srcChars where new characters - * will be obtained - * @param srcLength the number of characters in srcChars - * in the replace string - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength); - - /** - * Replace the characters in the range - * [start, start + length) with the characters in - * srcChars. srcChars is not modified. - * @param start the offset at which the replace operation begins - * @param length number of characters to replace. The character at - * start + length is not modified. - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replace(int32_t start, - int32_t length, - ConstChar16Ptr srcChars, - int32_t srcLength); - - /** - * Replace the characters in the range - * [start, start + length) with the code unit - * srcChar. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * start + length is not modified. - * @param srcChar the new code unit - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replace(int32_t start, - int32_t length, - char16_t srcChar); - - /** - * Replace the characters in the range - * [start, start + length) with the code point - * srcChar. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * start + length is not modified. - * @param srcChar the new code point - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); - - /** - * Replace the characters in the range [start, limit) - * with the characters in srcText. srcText is not modified. - * @param start the offset at which the replace operation begins - * @param limit the offset immediately following the replace range - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText); - - /** - * Replace the characters in the range [start, limit) - * with the characters in srcText in the range - * [srcStart, srcLimit). srcText is not modified. - * @param start the offset at which the replace operation begins - * @param limit the offset immediately following the replace range - * @param srcText the source for the new characters - * @param srcStart the offset into srcChars where new characters - * will be obtained - * @param srcLimit the offset immediately following the range to copy - * in srcText - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit); - - /** - * Replace a substring of this object with the given text. - * @param start the beginning index, inclusive; 0 <= start - * <= limit. - * @param limit the ending index, exclusive; start <= limit - * <= length(). - * @param text the text to replace characters start - * to limit - 1 - * @stable ICU 2.0 - */ - virtual void handleReplaceBetween(int32_t start, - int32_t limit, - const UnicodeString& text); - - /** - * Replaceable API - * @return TRUE if it has MetaData - * @stable ICU 2.4 - */ - virtual UBool hasMetaData() const; - - /** - * Copy a substring of this object, retaining attribute (out-of-band) - * information. This method is used to duplicate or reorder substrings. - * The destination index must not overlap the source range. - * - * @param start the beginning index, inclusive; 0 <= start <= - * limit. - * @param limit the ending index, exclusive; start <= limit <= - * length(). - * @param dest the destination index. The characters from - * start..limit-1 will be copied to dest. - * Implementations of this method may assume that dest <= start || - * dest >= limit. - * @stable ICU 2.0 - */ - virtual void copy(int32_t start, int32_t limit, int32_t dest); - - /* Search and replace operations */ - - /** - * Replace all occurrences of characters in oldText with the characters - * in newText - * @param oldText the text containing the search text - * @param newText the text containing the replacement text - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& findAndReplace(const UnicodeString& oldText, - const UnicodeString& newText); - - /** - * Replace all occurrences of characters in oldText with characters - * in newText - * in the range [start, start + length). - * @param start the start of the range in which replace will performed - * @param length the length of the range in which replace will be performed - * @param oldText the text containing the search text - * @param newText the text containing the replacement text - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& findAndReplace(int32_t start, - int32_t length, - const UnicodeString& oldText, - const UnicodeString& newText); - - /** - * Replace all occurrences of characters in oldText in the range - * [oldStart, oldStart + oldLength) with the characters - * in newText in the range - * [newStart, newStart + newLength) - * in the range [start, start + length). - * @param start the start of the range in which replace will performed - * @param length the length of the range in which replace will be performed - * @param oldText the text containing the search text - * @param oldStart the start of the search range in oldText - * @param oldLength the length of the search range in oldText - * @param newText the text containing the replacement text - * @param newStart the start of the replacement range in newText - * @param newLength the length of the replacement range in newText - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& findAndReplace(int32_t start, - int32_t length, - const UnicodeString& oldText, - int32_t oldStart, - int32_t oldLength, - const UnicodeString& newText, - int32_t newStart, - int32_t newLength); - - - /* Remove operations */ - - /** - * Remove all characters from the UnicodeString object. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& remove(void); - - /** - * Remove the characters in the range - * [start, start + length) from the UnicodeString object. - * @param start the offset of the first character to remove - * @param length the number of characters to remove - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& remove(int32_t start, - int32_t length = (int32_t)INT32_MAX); - - /** - * Remove the characters in the range - * [start, limit) from the UnicodeString object. - * @param start the offset of the first character to remove - * @param limit the offset immediately following the range to remove - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& removeBetween(int32_t start, - int32_t limit = (int32_t)INT32_MAX); - - /** - * Retain only the characters in the range - * [start, limit) from the UnicodeString object. - * Removes characters before start and at and after limit. - * @param start the offset of the first character to retain - * @param limit the offset immediately following the range to retain - * @return a reference to this - * @stable ICU 4.4 - */ - inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); - - /* Length operations */ - - /** - * Pad the start of this UnicodeString with the character padChar. - * If the length of this UnicodeString is less than targetLength, - * length() - targetLength copies of padChar will be added to the - * beginning of this UnicodeString. - * @param targetLength the desired length of the string - * @param padChar the character to use for padding. Defaults to - * space (U+0020) - * @return TRUE if the text was padded, FALSE otherwise. - * @stable ICU 2.0 - */ - UBool padLeading(int32_t targetLength, - char16_t padChar = 0x0020); - - /** - * Pad the end of this UnicodeString with the character padChar. - * If the length of this UnicodeString is less than targetLength, - * length() - targetLength copies of padChar will be added to the - * end of this UnicodeString. - * @param targetLength the desired length of the string - * @param padChar the character to use for padding. Defaults to - * space (U+0020) - * @return TRUE if the text was padded, FALSE otherwise. - * @stable ICU 2.0 - */ - UBool padTrailing(int32_t targetLength, - char16_t padChar = 0x0020); - - /** - * Truncate this UnicodeString to the targetLength. - * @param targetLength the desired length of this UnicodeString. - * @return TRUE if the text was truncated, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool truncate(int32_t targetLength); - - /** - * Trims leading and trailing whitespace from this UnicodeString. - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& trim(void); - - - /* Miscellaneous operations */ - - /** - * Reverse this UnicodeString in place. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& reverse(void); - - /** - * Reverse the range [start, start + length) in - * this UnicodeString. - * @param start the start of the range to reverse - * @param length the number of characters to to reverse - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& reverse(int32_t start, - int32_t length); - - /** - * Convert the characters in this to UPPER CASE following the conventions of - * the default locale. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toUpper(void); - - /** - * Convert the characters in this to UPPER CASE following the conventions of - * a specific locale. - * @param locale The locale containing the conventions to use. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toUpper(const Locale& locale); - - /** - * Convert the characters in this to lower case following the conventions of - * the default locale. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toLower(void); - - /** - * Convert the characters in this to lower case following the conventions of - * a specific locale. - * @param locale The locale containing the conventions to use. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toLower(const Locale& locale); - -#if !UCONFIG_NO_BREAK_ITERATION - - /** - * Titlecase this string, convenience function using the default locale. - * - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (0), then a standard titlecase - * break iterator is opened. - * Otherwise the provided iterator is set to the string's text. - * @return A reference to this. - * @stable ICU 2.1 - */ - UnicodeString &toTitle(BreakIterator *titleIter); - - /** - * Titlecase this string. - * - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (0), then a standard titlecase - * break iterator is opened. - * Otherwise the provided iterator is set to the string's text. - * @param locale The locale to consider. - * @return A reference to this. - * @stable ICU 2.1 - */ - UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); - - /** - * Titlecase this string, with options. - * - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with options.) - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (0), then a standard titlecase - * break iterator is opened. - * Otherwise the provided iterator is set to the string's text. - * @param locale The locale to consider. - * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, - * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, - * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. - * @param options Options bit set, see ucasemap_open(). - * @return A reference to this. - * @stable ICU 3.8 - */ - UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); - -#endif - - /** - * Case-folds the characters in this string. - * - * Case-folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * - * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); - - //======================================== - // Access to the internal buffer - //======================================== - - /** - * Get a read/write pointer to the internal buffer. - * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, - * writable, and is still owned by the UnicodeString object. - * Calls to getBuffer(minCapacity) must not be nested, and - * must be matched with calls to releaseBuffer(newLength). - * If the string buffer was read-only or shared, - * then it will be reallocated and copied. - * - * An attempted nested call will return 0, and will not further modify the - * state of the UnicodeString object. - * It also returns 0 if the string is bogus. - * - * The actual capacity of the string buffer may be larger than minCapacity. - * getCapacity() returns the actual capacity. - * For many operations, the full capacity should be used to avoid reallocations. - * - * While the buffer is "open" between getBuffer(minCapacity) - * and releaseBuffer(newLength), the following applies: - * - The string length is set to 0. - * - Any read API call on the UnicodeString object will behave like on a 0-length string. - * - Any write API call on the UnicodeString object is disallowed and will have no effect. - * - You can read from and write to the returned buffer. - * - The previous string contents will still be in the buffer; - * if you want to use it, then you need to call length() before getBuffer(minCapacity). - * If the length() was greater than minCapacity, then any contents after minCapacity - * may be lost. - * The buffer contents is not NUL-terminated by getBuffer(). - * If length()(s.length(). - * (See getTerminatedBuffer().) - * - * The buffer may reside in read-only memory. Its contents must not - * be modified. - * - * @return a read-only pointer to the internal string buffer, - * or nullptr if the string is empty or bogus - * - * @see getBuffer(int32_t minCapacity) - * @see getTerminatedBuffer() - * @stable ICU 2.0 - */ - inline const char16_t *getBuffer() const; - - /** - * Get a read-only pointer to the internal buffer, - * making sure that it is NUL-terminated. - * This can be called at any time on a valid UnicodeString. - * - * It returns 0 if the string is bogus, or - * during an "open" getBuffer(minCapacity), or if the buffer cannot - * be NUL-terminated (because memory allocation failed). - * - * It can be called as many times as desired. - * The pointer that it returns will remain valid until the UnicodeString object is modified, - * at which time the pointer is semantically invalidated and must not be used any more. - * - * The capacity of the buffer can be determined with getCapacity(). - * The part after length()+1 may or may not be initialized and valid, - * depending on the history of the UnicodeString object. - * - * The buffer contents is guaranteed to be NUL-terminated. - * getTerminatedBuffer() may reallocate the buffer if a terminating NUL - * is written. - * For this reason, this function is not const, unlike getBuffer(). - * Note that a UnicodeString may also contain NUL characters as part of its contents. - * - * The buffer may reside in read-only memory. Its contents must not - * be modified. - * - * @return a read-only pointer to the internal string buffer, - * or 0 if the string is empty or bogus - * - * @see getBuffer(int32_t minCapacity) - * @see getBuffer() - * @stable ICU 2.2 - */ - const char16_t *getTerminatedBuffer(); - - //======================================== - // Constructors - //======================================== - - /** Construct an empty UnicodeString. - * @stable ICU 2.0 - */ - inline UnicodeString(); - - /** - * Construct a UnicodeString with capacity to hold capacity char16_ts - * @param capacity the number of char16_ts this UnicodeString should hold - * before a resize is necessary; if count is greater than 0 and count - * code points c take up more space than capacity, then capacity is adjusted - * accordingly. - * @param c is used to initially fill the string - * @param count specifies how many code points c are to be written in the - * string - * @stable ICU 2.0 - */ - UnicodeString(int32_t capacity, UChar32 c, int32_t count); - - /** - * Single char16_t (code unit) constructor. - * - * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_CHAR_EXPLICIT=explicit - * on the compiler command line or similar. - * @param ch the character to place in the UnicodeString - * @stable ICU 2.0 - */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); - - /** - * Single UChar32 (code point) constructor. - * - * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_CHAR_EXPLICIT=explicit - * on the compiler command line or similar. - * @param ch the character to place in the UnicodeString - * @stable ICU 2.0 - */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); - - /** - * char16_t* constructor. - * - * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_STRING_EXPLICIT=explicit - * on the compiler command line or similar. - * @param text The characters to place in the UnicodeString. text - * must be NULL (U+0000) terminated. - * @stable ICU 2.0 - */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); - -#if !U_CHAR16_IS_TYPEDEF - /** - * uint16_t * constructor. - * Delegates to UnicodeString(const char16_t *). - * - * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_STRING_EXPLICIT=explicit - * on the compiler command line or similar. - * @param text NUL-terminated UTF-16 string - * @stable ICU 59 - */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : - UnicodeString(ConstChar16Ptr(text)) {} -#endif - -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * wchar_t * constructor. - * (Only defined if U_SIZEOF_WCHAR_T==2.) - * Delegates to UnicodeString(const char16_t *). - * - * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_STRING_EXPLICIT=explicit - * on the compiler command line or similar. - * @param text NUL-terminated UTF-16 string - * @stable ICU 59 - */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : - UnicodeString(ConstChar16Ptr(text)) {} -#endif - - /** - * nullptr_t constructor. - * Effectively the same as the default constructor, makes an empty string object. - * - * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_STRING_EXPLICIT=explicit - * on the compiler command line or similar. - * @param text nullptr - * @stable ICU 59 - */ - UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); - - /** - * char16_t* constructor. - * @param text The characters to place in the UnicodeString. - * @param textLength The number of Unicode characters in text - * to copy. - * @stable ICU 2.0 - */ - UnicodeString(const char16_t *text, - int32_t textLength); - -#if !U_CHAR16_IS_TYPEDEF - /** - * uint16_t * constructor. - * Delegates to UnicodeString(const char16_t *, int32_t). - * @param text UTF-16 string - * @param length string length - * @stable ICU 59 - */ - UnicodeString(const uint16_t *text, int32_t length) : - UnicodeString(ConstChar16Ptr(text), length) {} -#endif - -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * wchar_t * constructor. - * (Only defined if U_SIZEOF_WCHAR_T==2.) - * Delegates to UnicodeString(const char16_t *, int32_t). - * @param text NUL-terminated UTF-16 string - * @param length string length - * @stable ICU 59 - */ - UnicodeString(const wchar_t *text, int32_t length) : - UnicodeString(ConstChar16Ptr(text), length) {} -#endif - - /** - * nullptr_t constructor. - * Effectively the same as the default constructor, makes an empty string object. - * @param text nullptr - * @param length ignored - * @stable ICU 59 - */ - inline UnicodeString(const std::nullptr_t text, int32_t length); - - /** - * Readonly-aliasing char16_t* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has copy-on-write semantics: - * When the string is modified, then the buffer is first copied into - * newly allocated memory. - * The aliased buffer is never modified. - * - * In an assignment to another UnicodeString, when using the copy constructor - * or the assignment operator, the text will be copied. - * When using fastCopyFrom(), the text will be aliased again, - * so that both strings then alias the same readonly-text. - * - * @param isTerminated specifies if text is NUL-terminated. - * This must be true if textLength==-1. - * @param text The characters to alias for the UnicodeString. - * @param textLength The number of Unicode characters in text to alias. - * If -1, then this constructor will determine the length - * by calling u_strlen(). - * @stable ICU 2.0 - */ - UnicodeString(UBool isTerminated, - ConstChar16Ptr text, - int32_t textLength); - - /** - * Writable-aliasing char16_t* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has write-through semantics: - * For as long as the capacity of the buffer is sufficient, write operations - * will directly affect the buffer. When more capacity is necessary, then - * a new buffer will be allocated and the contents copied as with regularly - * constructed strings. - * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(Char16Ptr dst) function detects whether the dst pointer is the same - * as the string buffer itself and will in this case not copy the contents. - * - * @param buffer The characters to alias for the UnicodeString. - * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in char16_ts. - * @stable ICU 2.0 - */ - UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); - -#if !U_CHAR16_IS_TYPEDEF - /** - * Writable-aliasing uint16_t * constructor. - * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). - * @param buffer writable buffer of/for UTF-16 text - * @param buffLength length of the current buffer contents - * @param buffCapacity buffer capacity - * @stable ICU 59 - */ - UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : - UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} -#endif - -#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) - /** - * Writable-aliasing wchar_t * constructor. - * (Only defined if U_SIZEOF_WCHAR_T==2.) - * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). - * @param buffer writable buffer of/for UTF-16 text - * @param buffLength length of the current buffer contents - * @param buffCapacity buffer capacity - * @stable ICU 59 - */ - UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : - UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} -#endif - - /** - * Writable-aliasing nullptr_t constructor. - * Effectively the same as the default constructor, makes an empty string object. - * @param buffer nullptr - * @param buffLength ignored - * @param buffCapacity ignored - * @stable ICU 59 - */ - inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); - -#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION - - /** - * char* constructor. - * Uses the default converter (and thus depends on the ICU conversion code) - * unless U_CHARSET_IS_UTF8 is set to 1. - * - * For ASCII (really "invariant character") strings it is more efficient to use - * the constructor that takes a US_INV (for its enum EInvariant). - * For ASCII (invariant-character) string literals, see UNICODE_STRING and - * UNICODE_STRING_SIMPLE. - * - * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_STRING_EXPLICIT=explicit - * on the compiler command line or similar. - * @param codepageData an array of bytes, null-terminated, - * in the platform's default codepage. - * @stable ICU 2.0 - * @see UNICODE_STRING - * @see UNICODE_STRING_SIMPLE - */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); - - /** - * char* constructor. - * Uses the default converter (and thus depends on the ICU conversion code) - * unless U_CHARSET_IS_UTF8 is set to 1. - * @param codepageData an array of bytes in the platform's default codepage. - * @param dataLength The number of bytes in codepageData. - * @stable ICU 2.0 - */ - UnicodeString(const char *codepageData, int32_t dataLength); - -#endif - -#if !UCONFIG_NO_CONVERSION - - /** - * char* constructor. - * @param codepageData an array of bytes, null-terminated - * @param codepage the encoding of codepageData. The special - * value 0 for codepage indicates that the text is in the - * platform's default codepage. - * - * If codepage is an empty string (""), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * Recommendation: For invariant-character strings use the constructor - * UnicodeString(const char *src, int32_t length, enum EInvariant inv) - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @stable ICU 2.0 - */ - UnicodeString(const char *codepageData, const char *codepage); - - /** - * char* constructor. - * @param codepageData an array of bytes. - * @param dataLength The number of bytes in codepageData. - * @param codepage the encoding of codepageData. The special - * value 0 for codepage indicates that the text is in the - * platform's default codepage. - * If codepage is an empty string (""), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * Recommendation: For invariant-character strings use the constructor - * UnicodeString(const char *src, int32_t length, enum EInvariant inv) - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @stable ICU 2.0 - */ - UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); - - /** - * char * / UConverter constructor. - * This constructor uses an existing UConverter object to - * convert the codepage string to Unicode and construct a UnicodeString - * from that. - * - * The converter is reset at first. - * If the error code indicates a failure before this constructor is called, - * or if an error occurs during conversion or construction, - * then the string will be bogus. - * - * This function avoids the overhead of opening and closing a converter if - * multiple strings are constructed. - * - * @param src input codepage string - * @param srcLength length of the input string, can be -1 for NUL-terminated strings - * @param cnv converter object (ucnv_resetToUnicode() will be called), - * can be NULL for the default converter - * @param errorCode normal ICU error code - * @stable ICU 2.0 - */ - UnicodeString( - const char *src, int32_t srcLength, - UConverter *cnv, - UErrorCode &errorCode); - -#endif - - /** - * Constructs a Unicode string from an invariant-character char * string. - * About invariant characters see utypes.h. - * This constructor has no runtime dependency on conversion code and is - * therefore recommended over ones taking a charset name string - * (where the empty string "" indicates invariant-character conversion). - * - * Use the macro US_INV as the third, signature-distinguishing parameter. - * - * For example: - * \code - * void fn(const char *s) { - * UnicodeString ustr(s, -1, US_INV); - * // use ustr ... - * } - * \endcode - * - * @param src String using only invariant characters. - * @param length Length of src, or -1 if NUL-terminated. - * @param inv Signature-distinguishing paramater, use US_INV. - * - * @see US_INV - * @stable ICU 3.2 - */ - UnicodeString(const char *src, int32_t length, enum EInvariant inv); - - - /** - * Copy constructor. - * - * Starting with ICU 2.4, the assignment operator and the copy constructor - * allocate a new buffer and copy the buffer contents even for readonly aliases. - * By contrast, the fastCopyFrom() function implements the old, - * more efficient but less safe behavior - * of making this string also a readonly alias to the same buffer. - * - * If the source object has an "open" buffer from getBuffer(minCapacity), - * then the copy is an empty string. - * - * @param that The UnicodeString object to copy. - * @stable ICU 2.0 - * @see fastCopyFrom - */ - UnicodeString(const UnicodeString& that); - - /** - * Move constructor; might leave src in bogus state. - * This string will have the same contents and state that the source string had. - * @param src source string - * @stable ICU 56 - */ - UnicodeString(UnicodeString &&src) U_NOEXCEPT; - - /** - * 'Substring' constructor from tail of source string. - * @param src The UnicodeString object to copy. - * @param srcStart The offset into src at which to start copying. - * @stable ICU 2.2 - */ - UnicodeString(const UnicodeString& src, int32_t srcStart); - - /** - * 'Substring' constructor from subrange of source string. - * @param src The UnicodeString object to copy. - * @param srcStart The offset into src at which to start copying. - * @param srcLength The number of characters from src to copy. - * @stable ICU 2.2 - */ - UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); - - /** - * Clone this object, an instance of a subclass of Replaceable. - * Clones can be used concurrently in multiple threads. - * If a subclass does not implement clone(), or if an error occurs, - * then NULL is returned. - * The clone functions in all subclasses return a pointer to a Replaceable - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see Replaceable::clone - * @see getDynamicClassID - * @stable ICU 2.6 - */ - virtual Replaceable *clone() const; - - /** Destructor. - * @stable ICU 2.0 - */ - virtual ~UnicodeString(); - - /** - * Create a UnicodeString from a UTF-8 string. - * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. - * Calls u_strFromUTF8WithSub(). - * - * @param utf8 UTF-8 input string. - * Note that a StringPiece can be implicitly constructed - * from a std::string or a NUL-terminated const char * string. - * @return A UnicodeString with equivalent UTF-16 contents. - * @see toUTF8 - * @see toUTF8String - * @stable ICU 4.2 - */ - static UnicodeString fromUTF8(StringPiece utf8); - - /** - * Create a UnicodeString from a UTF-32 string. - * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. - * Calls u_strFromUTF32WithSub(). - * - * @param utf32 UTF-32 input string. Must not be NULL. - * @param length Length of the input string, or -1 if NUL-terminated. - * @return A UnicodeString with equivalent UTF-16 contents. - * @see toUTF32 - * @stable ICU 4.2 - */ - static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); - - /* Miscellaneous operations */ - - /** - * Unescape a string of characters and return a string containing - * the result. The following escape sequences are recognized: - * - * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] - * \\Uhhhhhhhh 8 hex digits - * \\xhh 1-2 hex digits - * \\ooo 1-3 octal digits; o in [0-7] - * \\cX control-X; X is masked with 0x1F - * - * as well as the standard ANSI C escapes: - * - * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, - * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, - * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C - * - * Anything else following a backslash is generically escaped. For - * example, "[a\\-z]" returns "[a-z]". - * - * If an escape sequence is ill-formed, this method returns an empty - * string. An example of an ill-formed sequence is "\\u" followed by - * fewer than 4 hex digits. - * - * This function is similar to u_unescape() but not identical to it. - * The latter takes a source char*, so it does escape recognition - * and also invariant conversion. - * - * @return a string with backslash escapes interpreted, or an - * empty string on error. - * @see UnicodeString#unescapeAt() - * @see u_unescape() - * @see u_unescapeAt() - * @stable ICU 2.0 - */ - UnicodeString unescape() const; - - /** - * Unescape a single escape sequence and return the represented - * character. See unescape() for a listing of the recognized escape - * sequences. The character at offset-1 is assumed (without - * checking) to be a backslash. If the escape sequence is - * ill-formed, or the offset is out of range, U_SENTINEL=-1 is - * returned. - * - * @param offset an input output parameter. On input, it is the - * offset into this string where the escape sequence is located, - * after the initial backslash. On output, it is advanced after the - * last character parsed. On error, it is not advanced at all. - * @return the character represented by the escape sequence at - * offset, or U_SENTINEL=-1 on error. - * @see UnicodeString#unescape() - * @see u_unescape() - * @see u_unescapeAt() - * @stable ICU 2.0 - */ - UChar32 unescapeAt(int32_t &offset) const; - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - - //======================================== - // Implementation methods - //======================================== - -protected: - /** - * Implement Replaceable::getLength() (see jitterbug 1027). - * @stable ICU 2.4 - */ - virtual int32_t getLength() const; - - /** - * The change in Replaceable to use virtual getCharAt() allows - * UnicodeString::charAt() to be inline again (see jitterbug 709). - * @stable ICU 2.4 - */ - virtual char16_t getCharAt(int32_t offset) const; - - /** - * The change in Replaceable to use virtual getChar32At() allows - * UnicodeString::char32At() to be inline again (see jitterbug 709). - * @stable ICU 2.4 - */ - virtual UChar32 getChar32At(int32_t offset) const; - -private: - // For char* constructors. Could be made public. - UnicodeString &setToUTF8(StringPiece utf8); - // For extract(char*). - // We could make a toUTF8(target, capacity, errorCode) public but not - // this version: New API will be cleaner if we make callers create substrings - // rather than having start+length on every method, - // and it should take a UErrorCode&. - int32_t - toUTF8(int32_t start, int32_t len, - char *target, int32_t capacity) const; - - /** - * Internal string contents comparison, called by operator==. - * Requires: this & text not bogus and have same lengths. - */ - UBool doEquals(const UnicodeString &text, int32_t len) const; - - inline int8_t - doCompare(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - int8_t doCompare(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - inline int8_t - doCompareCodePointOrder(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - int8_t doCompareCodePointOrder(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - inline int8_t - doCaseCompare(int32_t start, - int32_t length, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - int8_t - doCaseCompare(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - int32_t doIndexOf(char16_t c, - int32_t start, - int32_t length) const; - - int32_t doIndexOf(UChar32 c, - int32_t start, - int32_t length) const; - - int32_t doLastIndexOf(char16_t c, - int32_t start, - int32_t length) const; - - int32_t doLastIndexOf(UChar32 c, - int32_t start, - int32_t length) const; - - void doExtract(int32_t start, - int32_t length, - char16_t *dst, - int32_t dstStart) const; - - inline void doExtract(int32_t start, - int32_t length, - UnicodeString& target) const; - - inline char16_t doCharAt(int32_t offset) const; - - UnicodeString& doReplace(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - UnicodeString& doReplace(int32_t start, - int32_t length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength); - - UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); - UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); - - UnicodeString& doReverse(int32_t start, - int32_t length); - - // calculate hash code - int32_t doHashCode(void) const; - - // get pointer to start of array - // these do not check for kOpenGetBuffer, unlike the public getBuffer() function - inline char16_t* getArrayStart(void); - inline const char16_t* getArrayStart(void) const; - - inline UBool hasShortLength() const; - inline int32_t getShortLength() const; - - // A UnicodeString object (not necessarily its current buffer) - // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). - inline UBool isWritable() const; - - // Is the current buffer writable? - inline UBool isBufferWritable() const; - - // None of the following does releaseArray(). - inline void setZeroLength(); - inline void setShortLength(int32_t len); - inline void setLength(int32_t len); - inline void setToEmpty(); - inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags - - // allocate the array; result may be the stack buffer - // sets refCount to 1 if appropriate - // sets fArray, fCapacity, and flags - // sets length to 0 - // returns boolean for success or failure - UBool allocate(int32_t capacity); - - // release the array if owned - void releaseArray(void); - - // turn a bogus string into an empty one - void unBogus(); - - // implements assigment operator, copy constructor, and fastCopyFrom() - UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); - - // Copies just the fields without memory management. - void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT; - - // Pin start and limit to acceptable values. - inline void pinIndex(int32_t& start) const; - inline void pinIndices(int32_t& start, - int32_t& length) const; - -#if !UCONFIG_NO_CONVERSION - - /* Internal extract() using UConverter. */ - int32_t doExtract(int32_t start, int32_t length, - char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const; - - /* - * Real constructor for converting from codepage data. - * It assumes that it is called with !fRefCounted. - * - * If codepage==0, then the default converter - * is used for the platform encoding. - * If codepage is an empty string (""), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - */ - void doCodepageCreate(const char *codepageData, - int32_t dataLength, - const char *codepage); - - /* - * Worker function for creating a UnicodeString from - * a codepage string using a UConverter. - */ - void - doCodepageCreate(const char *codepageData, - int32_t dataLength, - UConverter *converter, - UErrorCode &status); - -#endif - - /* - * This function is called when write access to the array - * is necessary. - * - * We need to make a copy of the array if - * the buffer is read-only, or - * the buffer is refCounted (shared), and refCount>1, or - * the buffer is too small. - * - * Return FALSE if memory could not be allocated. - */ - UBool cloneArrayIfNeeded(int32_t newCapacity = -1, - int32_t growCapacity = -1, - UBool doCopyArray = TRUE, - int32_t **pBufferToDelete = 0, - UBool forceClone = FALSE); - - /** - * Common function for UnicodeString case mappings. - * The stringCaseMapper has the same type UStringCaseMapper - * as in ustr_imp.h for ustrcase_map(). - */ - UnicodeString & - caseMap(int32_t caseLocale, uint32_t options, -#if !UCONFIG_NO_BREAK_ITERATION - BreakIterator *iter, -#endif - UStringCaseMapper *stringCaseMapper); - - // ref counting - void addRef(void); - int32_t removeRef(void); - int32_t refCount(void) const; - - // constants - enum { - /** - * Size of stack buffer for short strings. - * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. - * @see UNISTR_OBJECT_SIZE - */ - US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, - kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) - kInvalidHashCode=0, // invalid hash code - kEmptyHashCode=1, // hash code for empty string - - // bit flag values for fLengthAndFlags - kIsBogus=1, // this string is bogus, i.e., not valid or NULL - kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields - kRefCounted=4, // there is a refCount field before the characters in fArray - kBufferIsReadonly=8,// do not write to this buffer - kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), - // and releaseBuffer(newLength) must be called - kAllStorageFlags=0x1f, - - kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long - kLength1=1<127; else undefined - int32_t fCapacity; // capacity of fArray (in char16_ts) - // array pointer last to minimize padding for machines with P128 data model - // or pointer sizes that are not a power of 2 - char16_t *fArray; // the Unicode data - } fFields; - } fUnion; -}; - -/** - * Create a new UnicodeString with the concatenation of two others. - * - * @param s1 The first string to be copied to the new one. - * @param s2 The second string to be copied to the new one, after s1. - * @return UnicodeString(s1).append(s2) - * @stable ICU 2.8 - */ -U_COMMON_API UnicodeString U_EXPORT2 -operator+ (const UnicodeString &s1, const UnicodeString &s2); - -//======================================== -// Inline members -//======================================== - -//======================================== -// Privates -//======================================== - -inline void -UnicodeString::pinIndex(int32_t& start) const -{ - // pin index - if(start < 0) { - start = 0; - } else if(start > length()) { - start = length(); - } -} - -inline void -UnicodeString::pinIndices(int32_t& start, - int32_t& _length) const -{ - // pin indices - int32_t len = length(); - if(start < 0) { - start = 0; - } else if(start > len) { - start = len; - } - if(_length < 0) { - _length = 0; - } else if(_length > (len - start)) { - _length = (len - start); - } -} - -inline char16_t* -UnicodeString::getArrayStart() { - return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? - fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; -} - -inline const char16_t* -UnicodeString::getArrayStart() const { - return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? - fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; -} - -//======================================== -// Default constructor -//======================================== - -inline -UnicodeString::UnicodeString() { - fUnion.fStackFields.fLengthAndFlags=kShortString; -} - -inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { - fUnion.fStackFields.fLengthAndFlags=kShortString; -} - -inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { - fUnion.fStackFields.fLengthAndFlags=kShortString; -} - -inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { - fUnion.fStackFields.fLengthAndFlags=kShortString; -} - -//======================================== -// Read-only implementation methods -//======================================== -inline UBool -UnicodeString::hasShortLength() const { - return fUnion.fFields.fLengthAndFlags>=0; -} - -inline int32_t -UnicodeString::getShortLength() const { - // fLengthAndFlags must be non-negative -> short length >= 0 - // and arithmetic or logical shift does not matter. - return fUnion.fFields.fLengthAndFlags>>kLengthShift; -} - -inline int32_t -UnicodeString::length() const { - return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; -} - -inline int32_t -UnicodeString::getCapacity() const { - return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? - US_STACKBUF_SIZE : fUnion.fFields.fCapacity; -} - -inline int32_t -UnicodeString::hashCode() const -{ return doHashCode(); } - -inline UBool -UnicodeString::isBogus() const -{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } - -inline UBool -UnicodeString::isWritable() const -{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } - -inline UBool -UnicodeString::isBufferWritable() const -{ - return (UBool)( - !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && - (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); -} - -inline const char16_t * -UnicodeString::getBuffer() const { - if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { - return nullptr; - } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { - return fUnion.fStackFields.fBuffer; - } else { - return fUnion.fFields.fArray; - } -} - -//======================================== -// Read-only alias methods -//======================================== -inline int8_t -UnicodeString::doCompare(int32_t start, - int32_t thisLength, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ - if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise - } else { - srcText.pinIndices(srcStart, srcLength); - return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); - } -} - -inline UBool -UnicodeString::operator== (const UnicodeString& text) const -{ - if(isBogus()) { - return text.isBogus(); - } else { - int32_t len = length(), textLength = text.length(); - return !text.isBogus() && len == textLength && doEquals(text, len); - } -} - -inline UBool -UnicodeString::operator!= (const UnicodeString& text) const -{ return (! operator==(text)); } - -inline UBool -UnicodeString::operator> (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) == 1; } - -inline UBool -UnicodeString::operator< (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) == -1; } - -inline UBool -UnicodeString::operator>= (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) != -1; } - -inline UBool -UnicodeString::operator<= (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) != 1; } - -inline int8_t -UnicodeString::compare(const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const UnicodeString& srcText) const -{ return doCompare(start, _length, srcText, 0, srcText.length()); } - -inline int8_t -UnicodeString::compare(ConstChar16Ptr srcChars, - int32_t srcLength) const -{ return doCompare(0, length(), srcChars, 0, srcLength); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ return doCompare(start, _length, srcText, srcStart, srcLength); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const char16_t *srcChars) const -{ return doCompare(start, _length, srcChars, 0, _length); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const -{ return doCompare(start, _length, srcChars, srcStart, srcLength); } - -inline int8_t -UnicodeString::compareBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const -{ return doCompare(start, limit - start, - srcText, srcStart, srcLimit - srcStart); } - -inline int8_t -UnicodeString::doCompareCodePointOrder(int32_t start, - int32_t thisLength, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ - if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise - } else { - srcText.pinIndices(srcStart, srcLength); - return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); - } -} - -inline int8_t -UnicodeString::compareCodePointOrder(const UnicodeString& text) const -{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const UnicodeString& srcText) const -{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } - -inline int8_t -UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, - int32_t srcLength) const -{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const char16_t *srcChars) const -{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const -{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } - -inline int8_t -UnicodeString::compareCodePointOrderBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const -{ return doCompareCodePointOrder(start, limit - start, - srcText, srcStart, srcLimit - srcStart); } - -inline int8_t -UnicodeString::doCaseCompare(int32_t start, - int32_t thisLength, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const -{ - if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise - } else { - srcText.pinIndices(srcStart, srcLength); - return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); - } -} - -inline int8_t -UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { - return doCaseCompare(0, length(), text, 0, text.length(), options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const UnicodeString &srcText, - uint32_t options) const { - return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); -} - -inline int8_t -UnicodeString::caseCompare(ConstChar16Ptr srcChars, - int32_t srcLength, - uint32_t options) const { - return doCaseCompare(0, length(), srcChars, 0, srcLength, options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const { - return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const char16_t *srcChars, - uint32_t options) const { - return doCaseCompare(start, _length, srcChars, 0, _length, options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const { - return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); -} - -inline int8_t -UnicodeString::caseCompareBetween(int32_t start, - int32_t limit, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLimit, - uint32_t options) const { - return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); -} - -inline int32_t -UnicodeString::indexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ - if(!srcText.isBogus()) { - srcText.pinIndices(srcStart, srcLength); - if(srcLength > 0) { - return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); - } - } - return -1; -} - -inline int32_t -UnicodeString::indexOf(const UnicodeString& text) const -{ return indexOf(text, 0, text.length(), 0, length()); } - -inline int32_t -UnicodeString::indexOf(const UnicodeString& text, - int32_t start) const { - pinIndex(start); - return indexOf(text, 0, text.length(), start, length() - start); -} - -inline int32_t -UnicodeString::indexOf(const UnicodeString& text, - int32_t start, - int32_t _length) const -{ return indexOf(text, 0, text.length(), start, _length); } - -inline int32_t -UnicodeString::indexOf(const char16_t *srcChars, - int32_t srcLength, - int32_t start) const { - pinIndex(start); - return indexOf(srcChars, 0, srcLength, start, length() - start); -} - -inline int32_t -UnicodeString::indexOf(ConstChar16Ptr srcChars, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ return indexOf(srcChars, 0, srcLength, start, _length); } - -inline int32_t -UnicodeString::indexOf(char16_t c, - int32_t start, - int32_t _length) const -{ return doIndexOf(c, start, _length); } - -inline int32_t -UnicodeString::indexOf(UChar32 c, - int32_t start, - int32_t _length) const -{ return doIndexOf(c, start, _length); } - -inline int32_t -UnicodeString::indexOf(char16_t c) const -{ return doIndexOf(c, 0, length()); } - -inline int32_t -UnicodeString::indexOf(UChar32 c) const -{ return indexOf(c, 0, length()); } - -inline int32_t -UnicodeString::indexOf(char16_t c, - int32_t start) const { - pinIndex(start); - return doIndexOf(c, start, length() - start); -} - -inline int32_t -UnicodeString::indexOf(UChar32 c, - int32_t start) const { - pinIndex(start); - return indexOf(c, start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ return lastIndexOf(srcChars, 0, srcLength, start, _length); } - -inline int32_t -UnicodeString::lastIndexOf(const char16_t *srcChars, - int32_t srcLength, - int32_t start) const { - pinIndex(start); - return lastIndexOf(srcChars, 0, srcLength, start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ - if(!srcText.isBogus()) { - srcText.pinIndices(srcStart, srcLength); - if(srcLength > 0) { - return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); - } - } - return -1; -} - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& text, - int32_t start, - int32_t _length) const -{ return lastIndexOf(text, 0, text.length(), start, _length); } - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& text, - int32_t start) const { - pinIndex(start); - return lastIndexOf(text, 0, text.length(), start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& text) const -{ return lastIndexOf(text, 0, text.length(), 0, length()); } - -inline int32_t -UnicodeString::lastIndexOf(char16_t c, - int32_t start, - int32_t _length) const -{ return doLastIndexOf(c, start, _length); } - -inline int32_t -UnicodeString::lastIndexOf(UChar32 c, - int32_t start, - int32_t _length) const { - return doLastIndexOf(c, start, _length); -} - -inline int32_t -UnicodeString::lastIndexOf(char16_t c) const -{ return doLastIndexOf(c, 0, length()); } - -inline int32_t -UnicodeString::lastIndexOf(UChar32 c) const { - return lastIndexOf(c, 0, length()); -} - -inline int32_t -UnicodeString::lastIndexOf(char16_t c, - int32_t start) const { - pinIndex(start); - return doLastIndexOf(c, start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(UChar32 c, - int32_t start) const { - pinIndex(start); - return lastIndexOf(c, start, length() - start); -} - -inline UBool -UnicodeString::startsWith(const UnicodeString& text) const -{ return compare(0, text.length(), text, 0, text.length()) == 0; } - -inline UBool -UnicodeString::startsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } - -inline UBool -UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(toUCharPtr(srcChars)); - } - return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; -} - -inline UBool -UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(toUCharPtr(srcChars)); - } - return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; -} - -inline UBool -UnicodeString::endsWith(const UnicodeString& text) const -{ return doCompare(length() - text.length(), text.length(), - text, 0, text.length()) == 0; } - -inline UBool -UnicodeString::endsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const { - srcText.pinIndices(srcStart, srcLength); - return doCompare(length() - srcLength, srcLength, - srcText, srcStart, srcLength) == 0; -} - -inline UBool -UnicodeString::endsWith(ConstChar16Ptr srcChars, - int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(toUCharPtr(srcChars)); - } - return doCompare(length() - srcLength, srcLength, - srcChars, 0, srcLength) == 0; -} - -inline UBool -UnicodeString::endsWith(const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); - } - return doCompare(length() - srcLength, srcLength, - srcChars, srcStart, srcLength) == 0; -} - -//======================================== -// replace -//======================================== -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - const UnicodeString& srcText) -{ return doReplace(start, _length, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, _length, srcText, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - ConstChar16Ptr srcChars, - int32_t srcLength) -{ return doReplace(start, _length, srcChars, 0, srcLength); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, _length, srcChars, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - char16_t srcChar) -{ return doReplace(start, _length, &srcChar, 0, 1); } - -inline UnicodeString& -UnicodeString::replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText) -{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) -{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } - -inline UnicodeString& -UnicodeString::findAndReplace(const UnicodeString& oldText, - const UnicodeString& newText) -{ return findAndReplace(0, length(), oldText, 0, oldText.length(), - newText, 0, newText.length()); } - -inline UnicodeString& -UnicodeString::findAndReplace(int32_t start, - int32_t _length, - const UnicodeString& oldText, - const UnicodeString& newText) -{ return findAndReplace(start, _length, oldText, 0, oldText.length(), - newText, 0, newText.length()); } - -// ============================ -// extract -// ============================ -inline void -UnicodeString::doExtract(int32_t start, - int32_t _length, - UnicodeString& target) const -{ target.replace(0, target.length(), *this, start, _length); } - -inline void -UnicodeString::extract(int32_t start, - int32_t _length, - Char16Ptr target, - int32_t targetStart) const -{ doExtract(start, _length, target, targetStart); } - -inline void -UnicodeString::extract(int32_t start, - int32_t _length, - UnicodeString& target) const -{ doExtract(start, _length, target); } - -#if !UCONFIG_NO_CONVERSION - -inline int32_t -UnicodeString::extract(int32_t start, - int32_t _length, - char *dst, - const char *codepage) const - -{ - // This dstSize value will be checked explicitly - return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); -} - -#endif - -inline void -UnicodeString::extractBetween(int32_t start, - int32_t limit, - char16_t *dst, - int32_t dstStart) const { - pinIndex(start); - pinIndex(limit); - doExtract(start, limit - start, dst, dstStart); -} - -inline UnicodeString -UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { - return tempSubString(start, limit - start); -} - -inline char16_t -UnicodeString::doCharAt(int32_t offset) const -{ - if((uint32_t)offset < (uint32_t)length()) { - return getArrayStart()[offset]; - } else { - return kInvalidUChar; - } -} - -inline char16_t -UnicodeString::charAt(int32_t offset) const -{ return doCharAt(offset); } - -inline char16_t -UnicodeString::operator[] (int32_t offset) const -{ return doCharAt(offset); } - -inline UBool -UnicodeString::isEmpty() const { - // Arithmetic or logical right shift does not matter: only testing for 0. - return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; -} - -//======================================== -// Write implementation methods -//======================================== -inline void -UnicodeString::setZeroLength() { - fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; -} - -inline void -UnicodeString::setShortLength(int32_t len) { - // requires 0 <= len <= kMaxShortLength - fUnion.fFields.fLengthAndFlags = - (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); -} - -inline void -UnicodeString::setLength(int32_t len) { - if(len <= kMaxShortLength) { - setShortLength(len); - } else { - fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; - fUnion.fFields.fLength = len; - } -} - -inline void -UnicodeString::setToEmpty() { - fUnion.fFields.fLengthAndFlags = kShortString; -} - -inline void -UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { - setLength(len); - fUnion.fFields.fArray = array; - fUnion.fFields.fCapacity = capacity; -} - -inline UnicodeString& -UnicodeString::operator= (char16_t ch) -{ return doReplace(0, length(), &ch, 0, 1); } - -inline UnicodeString& -UnicodeString::operator= (UChar32 ch) -{ return replace(0, length(), ch); } - -inline UnicodeString& -UnicodeString::setTo(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ - unBogus(); - return doReplace(0, length(), srcText, srcStart, srcLength); -} - -inline UnicodeString& -UnicodeString::setTo(const UnicodeString& srcText, - int32_t srcStart) -{ - unBogus(); - srcText.pinIndex(srcStart); - return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); -} - -inline UnicodeString& -UnicodeString::setTo(const UnicodeString& srcText) -{ - return copyFrom(srcText); -} - -inline UnicodeString& -UnicodeString::setTo(const char16_t *srcChars, - int32_t srcLength) -{ - unBogus(); - return doReplace(0, length(), srcChars, 0, srcLength); -} - -inline UnicodeString& -UnicodeString::setTo(char16_t srcChar) -{ - unBogus(); - return doReplace(0, length(), &srcChar, 0, 1); -} - -inline UnicodeString& -UnicodeString::setTo(UChar32 srcChar) -{ - unBogus(); - return replace(0, length(), srcChar); -} - -inline UnicodeString& -UnicodeString::append(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ return doAppend(srcText, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::append(const UnicodeString& srcText) -{ return doAppend(srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::append(const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) -{ return doAppend(srcChars, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::append(ConstChar16Ptr srcChars, - int32_t srcLength) -{ return doAppend(srcChars, 0, srcLength); } - -inline UnicodeString& -UnicodeString::append(char16_t srcChar) -{ return doAppend(&srcChar, 0, 1); } - -inline UnicodeString& -UnicodeString::operator+= (char16_t ch) -{ return doAppend(&ch, 0, 1); } - -inline UnicodeString& -UnicodeString::operator+= (UChar32 ch) { - return append(ch); -} - -inline UnicodeString& -UnicodeString::operator+= (const UnicodeString& srcText) -{ return doAppend(srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, 0, srcText, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - const UnicodeString& srcText) -{ return doReplace(start, 0, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - const char16_t *srcChars, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, 0, srcChars, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - ConstChar16Ptr srcChars, - int32_t srcLength) -{ return doReplace(start, 0, srcChars, 0, srcLength); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - char16_t srcChar) -{ return doReplace(start, 0, &srcChar, 0, 1); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - UChar32 srcChar) -{ return replace(start, 0, srcChar); } - - -inline UnicodeString& -UnicodeString::remove() -{ - // remove() of a bogus string makes the string empty and non-bogus - if(isBogus()) { - setToEmpty(); - } else { - setZeroLength(); - } - return *this; -} - -inline UnicodeString& -UnicodeString::remove(int32_t start, - int32_t _length) -{ - if(start <= 0 && _length == INT32_MAX) { - // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus - return remove(); - } - return doReplace(start, _length, NULL, 0, 0); -} - -inline UnicodeString& -UnicodeString::removeBetween(int32_t start, - int32_t limit) -{ return doReplace(start, limit - start, NULL, 0, 0); } - -inline UnicodeString & -UnicodeString::retainBetween(int32_t start, int32_t limit) { - truncate(limit); - return doReplace(0, start, NULL, 0, 0); -} - -inline UBool -UnicodeString::truncate(int32_t targetLength) -{ - if(isBogus() && targetLength == 0) { - // truncate(0) of a bogus string makes the string empty and non-bogus - unBogus(); - return FALSE; - } else if((uint32_t)targetLength < (uint32_t)length()) { - setLength(targetLength); - return TRUE; - } else { - return FALSE; - } -} - -inline UnicodeString& -UnicodeString::reverse() -{ return doReverse(0, length()); } - -inline UnicodeString& -UnicodeString::reverse(int32_t start, - int32_t _length) -{ return doReverse(start, _length); } - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/unorm.h b/deps/node/deps/icu-small/source/common/unicode/unorm.h deleted file mode 100644 index 3839de12..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/unorm.h +++ /dev/null @@ -1,472 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (c) 1996-2016, International Business Machines Corporation -* and others. All Rights Reserved. -******************************************************************************* -* File unorm.h -* -* Created by: Vladimir Weinstein 12052000 -* -* Modification history : -* -* Date Name Description -* 02/01/01 synwee Added normalization quickcheck enum and method. -*/ -#ifndef UNORM_H -#define UNORM_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/uiter.h" -#include "unicode/unorm2.h" - -/** - * \file - * \brief C API: Unicode Normalization - * - * Old Unicode normalization API. - * - * This API has been replaced by the unorm2.h API and is only available - * for backward compatibility. The functions here simply delegate to the - * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize(). - * There is one exception: The new API does not provide a replacement for unorm_compare(). - * Its declaration has been moved to unorm2.h. - * - * unorm_normalize transforms Unicode text into an equivalent composed or - * decomposed form, allowing for easier sorting and searching of text. - * unorm_normalize supports the standard normalization forms described in - * - * Unicode Standard Annex #15: Unicode Normalization Forms. - * - * Characters with accents or other adornments can be encoded in - * several different ways in Unicode. For example, take the character A-acute. - * In Unicode, this can be encoded as a single character (the - * "composed" form): - * - * \code - * 00C1 LATIN CAPITAL LETTER A WITH ACUTE - * \endcode - * - * or as two separate characters (the "decomposed" form): - * - * \code - * 0041 LATIN CAPITAL LETTER A - * 0301 COMBINING ACUTE ACCENT - * \endcode - * - * To a user of your program, however, both of these sequences should be - * treated as the same "user-level" character "A with acute accent". When you are searching or - * comparing text, you must ensure that these two sequences are treated - * equivalently. In addition, you must handle characters with more than one - * accent. Sometimes the order of a character's combining accents is - * significant, while in other cases accent sequences in different orders are - * really equivalent. - * - * Similarly, the string "ffi" can be encoded as three separate letters: - * - * \code - * 0066 LATIN SMALL LETTER F - * 0066 LATIN SMALL LETTER F - * 0069 LATIN SMALL LETTER I - * \endcode - * - * or as the single character - * - * \code - * FB03 LATIN SMALL LIGATURE FFI - * \endcode - * - * The ffi ligature is not a distinct semantic character, and strictly speaking - * it shouldn't be in Unicode at all, but it was included for compatibility - * with existing character sets that already provided it. The Unicode standard - * identifies such characters by giving them "compatibility" decompositions - * into the corresponding semantic characters. When sorting and searching, you - * will often want to use these mappings. - * - * unorm_normalize helps solve these problems by transforming text into the - * canonical composed and decomposed forms as shown in the first example above. - * In addition, you can have it perform compatibility decompositions so that - * you can treat compatibility characters the same as their equivalents. - * Finally, unorm_normalize rearranges accents into the proper canonical - * order, so that you do not have to worry about accent rearrangement on your - * own. - * - * Form FCD, "Fast C or D", is also designed for collation. - * It allows to work on strings that are not necessarily normalized - * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed - * characters and their decomposed equivalents the same. - * - * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings - * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical - * themselves. - * - * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character, - * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long - * as their decompositions do not need canonical reordering. - * - * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts - - * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will - * return UNORM_YES for most strings in practice. - * - * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD. - * - * For more details on FCD see the collation design document: - * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm - * - * ICU collation performs either NFD or FCD normalization automatically if normalization - * is turned on for the collator object. - * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons, - * transliteration/transcription, unique representations, etc. - * - * The W3C generally recommends to exchange texts in NFC. - * Note also that most legacy character encodings use only precomposed forms and often do not - * encode any combining marks by themselves. For conversion to such character encodings the - * Unicode text needs to be normalized to NFC. - * For more usage examples, see the Unicode Standard Annex. - */ - -// Do not conditionalize the following enum with #ifndef U_HIDE_DEPRECATED_API, -// it is needed for layout of Normalizer object. -/** - * Constants for normalization modes. - * @deprecated ICU 56 Use unorm2.h instead. - */ -typedef enum { - /** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_NONE = 1, - /** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_NFD = 2, - /** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_NFKD = 3, - /** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_NFC = 4, - /** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_DEFAULT = UNORM_NFC, - /** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_NFKC =5, - /** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_FCD = 6, - - /** One more than the highest normalization mode constant. @deprecated ICU 56 Use unorm2.h instead. */ - UNORM_MODE_COUNT -} UNormalizationMode; - -#ifndef U_HIDE_DEPRECATED_API - -/** - * Constants for options flags for normalization. - * Use 0 for default options, - * including normalization according to the Unicode version - * that is currently supported by ICU (see u_getUnicodeVersion). - * @deprecated ICU 56 Use unorm2.h instead. - */ -enum { - /** - * Options bit set value to select Unicode 3.2 normalization - * (except NormalizationCorrections). - * At most one Unicode version can be selected at a time. - * @deprecated ICU 56 Use unorm2.h instead. - */ - UNORM_UNICODE_3_2=0x20 -}; - -/** - * Lowest-order bit number of unorm_compare() options bits corresponding to - * normalization options bits. - * - * The options parameter for unorm_compare() uses most bits for - * itself and for various comparison and folding flags. - * The most significant bits, however, are shifted down and passed on - * to the normalization implementation. - * (That is, from unorm_compare(..., options, ...), - * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the - * internal normalization functions.) - * - * @see unorm_compare - * @deprecated ICU 56 Use unorm2.h instead. - */ -#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 - -/** - * Normalize a string. - * The string will be normalized according the specified normalization mode - * and options. - * The source and result buffers must not be the same, nor overlap. - * - * @param source The string to normalize. - * @param sourceLength The length of source, or -1 if NUL-terminated. - * @param mode The normalization mode; one of UNORM_NONE, - * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT. - * @param options The normalization options, ORed together (0 for no options). - * @param result A pointer to a buffer to receive the result string. - * The result string is NUL-terminated if possible. - * @param resultLength The maximum size of result. - * @param status A pointer to a UErrorCode to receive any errors. - * @return The total buffer size needed; if greater than resultLength, - * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR. - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED int32_t U_EXPORT2 -unorm_normalize(const UChar *source, int32_t sourceLength, - UNormalizationMode mode, int32_t options, - UChar *result, int32_t resultLength, - UErrorCode *status); - -/** - * Performing quick check on a string, to quickly determine if the string is - * in a particular normalization format. - * Three types of result can be returned UNORM_YES, UNORM_NO or - * UNORM_MAYBE. Result UNORM_YES indicates that the argument - * string is in the desired normalized format, UNORM_NO determines that - * argument string is not in the desired normalized format. A - * UNORM_MAYBE result indicates that a more thorough check is required, - * the user may have to put the string in its normalized form and compare the - * results. - * - * @param source string for determining if it is in a normalized format - * @param sourcelength length of source to test, or -1 if NUL-terminated - * @param mode which normalization form to test for - * @param status a pointer to a UErrorCode to receive any errors - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see unorm_isNormalized - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED UNormalizationCheckResult U_EXPORT2 -unorm_quickCheck(const UChar *source, int32_t sourcelength, - UNormalizationMode mode, - UErrorCode *status); - -/** - * Performing quick check on a string; same as unorm_quickCheck but - * takes an extra options parameter like most normalization functions. - * - * @param src String that is to be tested if it is in a normalization format. - * @param srcLength Length of source to test, or -1 if NUL-terminated. - * @param mode Which normalization form to test for. - * @param options The normalization options, ORed together (0 for no options). - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see unorm_quickCheck - * @see unorm_isNormalized - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED UNormalizationCheckResult U_EXPORT2 -unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode); - -/** - * Test if a string is in a given normalization form. - * This is semantically equivalent to source.equals(normalize(source, mode)) . - * - * Unlike unorm_quickCheck(), this function returns a definitive result, - * never a "maybe". - * For NFD, NFKD, and FCD, both functions work exactly the same. - * For NFC and NFKC where quickCheck may return "maybe", this function will - * perform further tests to arrive at a TRUE/FALSE result. - * - * @param src String that is to be tested if it is in a normalization format. - * @param srcLength Length of source to test, or -1 if NUL-terminated. - * @param mode Which normalization form to test for. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode" normalization form. - * - * @see unorm_quickCheck - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED UBool U_EXPORT2 -unorm_isNormalized(const UChar *src, int32_t srcLength, - UNormalizationMode mode, - UErrorCode *pErrorCode); - -/** - * Test if a string is in a given normalization form; same as unorm_isNormalized but - * takes an extra options parameter like most normalization functions. - * - * @param src String that is to be tested if it is in a normalization format. - * @param srcLength Length of source to test, or -1 if NUL-terminated. - * @param mode Which normalization form to test for. - * @param options The normalization options, ORed together (0 for no options). - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode/options" normalization form. - * - * @see unorm_quickCheck - * @see unorm_isNormalized - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED UBool U_EXPORT2 -unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode); - -/** - * Iterative normalization forward. - * This function (together with unorm_previous) is somewhat - * similar to the C++ Normalizer class (see its non-static functions). - * - * Iterative normalization is useful when only a small portion of a longer - * string/text needs to be processed. - * - * For example, the likelihood may be high that processing the first 10% of some - * text will be sufficient to find certain data. - * Another example: When one wants to concatenate two normalized strings and get a - * normalized result, it is much more efficient to normalize just a small part of - * the result around the concatenation place instead of re-normalizing everything. - * - * The input text is an instance of the C character iteration API UCharIterator. - * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any - * other kind of text object. - * - * If a buffer overflow occurs, then the caller needs to reset the iterator to the - * old index and call the function again with a larger buffer - if the caller cares - * for the actual output. - * Regardless of the output buffer, the iterator will always be moved to the next - * normalization boundary. - * - * This function (like unorm_previous) serves two purposes: - * - * 1) To find the next boundary so that the normalization of the part of the text - * from the current position to that boundary does not affect and is not affected - * by the part of the text beyond that boundary. - * - * 2) To normalize the text up to the boundary. - * - * The second step is optional, per the doNormalize parameter. - * It is omitted for operations like string concatenation, where the two adjacent - * string ends need to be normalized together. - * In such a case, the output buffer will just contain a copy of the text up to the - * boundary. - * - * pNeededToNormalize is an output-only parameter. Its output value is only defined - * if normalization was requested (doNormalize) and successful (especially, no - * buffer overflow). - * It is useful for operations like a normalizing transliterator, where one would - * not want to replace a piece of text if it is not modified. - * - * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE - * if the normalization was necessary. - * - * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE. - * - * If the buffer overflows, then *pNeededToNormalize will be undefined; - * essentially, whenever U_FAILURE is true (like in buffer overflows), this result - * will be undefined. - * - * @param src The input text in the form of a C character iterator. - * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. - * @param destCapacity The number of UChars that fit into dest. - * @param mode The normalization mode. - * @param options The normalization options, ORed together (0 for no options). - * @param doNormalize Indicates if the source text up to the next boundary - * is to be normalized (TRUE) or just copied (FALSE). - * @param pNeededToNormalize Output flag indicating if the normalization resulted in - * different text from the input. - * Not defined if an error occurs including buffer overflow. - * Always FALSE if !doNormalize. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of output (number of UChars) when successful or buffer overflow. - * - * @see unorm_previous - * @see unorm_normalize - * - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED int32_t U_EXPORT2 -unorm_next(UCharIterator *src, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode); - -/** - * Iterative normalization backward. - * This function (together with unorm_next) is somewhat - * similar to the C++ Normalizer class (see its non-static functions). - * For all details see unorm_next. - * - * @param src The input text in the form of a C character iterator. - * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. - * @param destCapacity The number of UChars that fit into dest. - * @param mode The normalization mode. - * @param options The normalization options, ORed together (0 for no options). - * @param doNormalize Indicates if the source text up to the next boundary - * is to be normalized (TRUE) or just copied (FALSE). - * @param pNeededToNormalize Output flag indicating if the normalization resulted in - * different text from the input. - * Not defined if an error occurs including buffer overflow. - * Always FALSE if !doNormalize. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of output (number of UChars) when successful or buffer overflow. - * - * @see unorm_next - * @see unorm_normalize - * - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED int32_t U_EXPORT2 -unorm_previous(UCharIterator *src, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode); - -/** - * Concatenate normalized strings, making sure that the result is normalized as well. - * - * If both the left and the right strings are in - * the normalization form according to "mode/options", - * then the result will be - * - * \code - * dest=normalize(left+right, mode, options) - * \endcode - * - * With the input strings already being normalized, - * this function will use unorm_next() and unorm_previous() - * to find the adjacent end pieces of the input strings. - * Only the concatenation of these end pieces will be normalized and - * then concatenated with the remaining parts of the input strings. - * - * It is allowed to have dest==left to avoid copying the entire left string. - * - * @param left Left source string, may be same as dest. - * @param leftLength Length of left source string, or -1 if NUL-terminated. - * @param right Right source string. Must not be the same as dest, nor overlap. - * @param rightLength Length of right source string, or -1 if NUL-terminated. - * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. - * @param destCapacity The number of UChars that fit into dest. - * @param mode The normalization mode. - * @param options The normalization options, ORed together (0 for no options). - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of output (number of UChars) when successful or buffer overflow. - * - * @see unorm_normalize - * @see unorm_next - * @see unorm_previous - * - * @deprecated ICU 56 Use unorm2.h instead. - */ -U_DEPRECATED int32_t U_EXPORT2 -unorm_concatenate(const UChar *left, int32_t leftLength, - const UChar *right, int32_t rightLength, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode); - -#endif /* U_HIDE_DEPRECATED_API */ -#endif /* #if !UCONFIG_NO_NORMALIZATION */ -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/unorm2.h b/deps/node/deps/icu-small/source/common/unicode/unorm2.h deleted file mode 100644 index a9bd02f2..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/unorm2.h +++ /dev/null @@ -1,603 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2009-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unorm2.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009dec15 -* created by: Markus W. Scherer -*/ - -#ifndef __UNORM2_H__ -#define __UNORM2_H__ - -/** - * \file - * \brief C API: New API for Unicode Normalization. - * - * Unicode normalization functionality for standard Unicode normalization or - * for using custom mapping tables. - * All instances of UNormalizer2 are unmodifiable/immutable. - * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. - * For more details see the Normalizer2 C++ class. - */ - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" -#include "unicode/stringoptions.h" -#include "unicode/uset.h" - -/** - * Constants for normalization modes. - * For details about standard Unicode normalization forms - * and about the algorithms which are also used with custom mapping tables - * see http://www.unicode.org/unicode/reports/tr15/ - * @stable ICU 4.4 - */ -typedef enum { - /** - * Decomposition followed by composition. - * Same as standard NFC when using an "nfc" instance. - * Same as standard NFKC when using an "nfkc" instance. - * For details about standard Unicode normalization forms - * see http://www.unicode.org/unicode/reports/tr15/ - * @stable ICU 4.4 - */ - UNORM2_COMPOSE, - /** - * Map, and reorder canonically. - * Same as standard NFD when using an "nfc" instance. - * Same as standard NFKD when using an "nfkc" instance. - * For details about standard Unicode normalization forms - * see http://www.unicode.org/unicode/reports/tr15/ - * @stable ICU 4.4 - */ - UNORM2_DECOMPOSE, - /** - * "Fast C or D" form. - * If a string is in this form, then further decomposition without reordering - * would yield the same form as DECOMPOSE. - * Text in "Fast C or D" form can be processed efficiently with data tables - * that are "canonically closed", that is, that provide equivalent data for - * equivalent text, without having to be fully normalized. - * Not a standard Unicode normalization form. - * Not a unique form: Different FCD strings can be canonically equivalent. - * For details see http://www.unicode.org/notes/tn5/#FCD - * @stable ICU 4.4 - */ - UNORM2_FCD, - /** - * Compose only contiguously. - * Also known as "FCC" or "Fast C Contiguous". - * The result will often but not always be in NFC. - * The result will conform to FCD which is useful for processing. - * Not a standard Unicode normalization form. - * For details see http://www.unicode.org/notes/tn5/#FCC - * @stable ICU 4.4 - */ - UNORM2_COMPOSE_CONTIGUOUS -} UNormalization2Mode; - -/** - * Result values for normalization quick check functions. - * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms - * @stable ICU 2.0 - */ -typedef enum UNormalizationCheckResult { - /** - * The input string is not in the normalization form. - * @stable ICU 2.0 - */ - UNORM_NO, - /** - * The input string is in the normalization form. - * @stable ICU 2.0 - */ - UNORM_YES, - /** - * The input string may or may not be in the normalization form. - * This value is only returned for composition forms like NFC and FCC, - * when a backward-combining character is found for which the surrounding text - * would have to be analyzed further. - * @stable ICU 2.0 - */ - UNORM_MAYBE -} UNormalizationCheckResult; - -/** - * Opaque C service object type for the new normalization API. - * @stable ICU 4.4 - */ -struct UNormalizer2; -typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ - -#if !UCONFIG_NO_NORMALIZATION - -/** - * Returns a UNormalizer2 instance for Unicode NFC normalization. - * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFCInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFD normalization. - * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFDInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFKC normalization. - * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFKD normalization. - * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFKDInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. - * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance which uses the specified data file - * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) - * and which composes or decomposes text according to the specified mode. - * Returns an unmodifiable singleton instance. Do not delete it. - * - * Use packageName=NULL for data files that are part of ICU's own data. - * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. - * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. - * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. - * - * @param packageName NULL for ICU built-in data, otherwise application data package name - * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file - * @param mode normalization mode (compose or decompose etc.) - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested UNormalizer2, if successful - * @stable ICU 4.4 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode *pErrorCode); - -/** - * Constructs a filtered normalizer wrapping any UNormalizer2 instance - * and a filter set. - * Both are aliased and must not be modified or deleted while this object - * is used. - * The filter set should be frozen; otherwise the performance will suffer greatly. - * @param norm2 wrapped UNormalizer2 instance - * @param filterSet USet which determines the characters to be normalized - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested UNormalizer2, if successful - * @stable ICU 4.4 - */ -U_STABLE UNormalizer2 * U_EXPORT2 -unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); - -/** - * Closes a UNormalizer2 instance from unorm2_openFiltered(). - * Do not close instances from unorm2_getInstance()! - * @param norm2 UNormalizer2 instance to be closed - * @stable ICU 4.4 - */ -U_STABLE void U_EXPORT2 -unorm2_close(UNormalizer2 *norm2); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUNormalizer2Pointer - * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); - -U_NAMESPACE_END - -#endif - -/** - * Writes the normalized form of the source string to the destination string - * (replacing its contents) and returns the length of the destination string. - * The source and destination strings must be different buffers. - * @param norm2 UNormalizer2 instance - * @param src source string - * @param length length of the source string, or -1 if NUL-terminated - * @param dest destination string; its contents is replaced with normalized src - * @param capacity number of UChars that can be written to dest - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_normalize(const UNormalizer2 *norm2, - const UChar *src, int32_t length, - UChar *dest, int32_t capacity, - UErrorCode *pErrorCode); -/** - * Appends the normalized form of the second string to the first string - * (merging them at the boundary) and returns the length of the first string. - * The result is normalized if the first string was normalized. - * The first and second strings must be different buffers. - * @param norm2 UNormalizer2 instance - * @param first string, should be normalized - * @param firstLength length of the first string, or -1 if NUL-terminated - * @param firstCapacity number of UChars that can be written to first - * @param second string, will be normalized - * @param secondLength length of the source string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, - UChar *first, int32_t firstLength, int32_t firstCapacity, - const UChar *second, int32_t secondLength, - UErrorCode *pErrorCode); -/** - * Appends the second string to the first string - * (merging them at the boundary) and returns the length of the first string. - * The result is normalized if both the strings were normalized. - * The first and second strings must be different buffers. - * @param norm2 UNormalizer2 instance - * @param first string, should be normalized - * @param firstLength length of the first string, or -1 if NUL-terminated - * @param firstCapacity number of UChars that can be written to first - * @param second string, should be normalized - * @param secondLength length of the source string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_append(const UNormalizer2 *norm2, - UChar *first, int32_t firstLength, int32_t firstCapacity, - const UChar *second, int32_t secondLength, - UErrorCode *pErrorCode); - -/** - * Gets the decomposition mapping of c. - * Roughly equivalent to normalizing the String form of c - * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function - * returns a negative value and does not write a string - * if c does not have a decomposition mapping in this instance's data. - * This function is independent of the mode of the UNormalizer2. - * @param norm2 UNormalizer2 instance - * @param c code point - * @param decomposition String buffer which will be set to c's - * decomposition mapping, if there is one. - * @param capacity number of UChars that can be written to decomposition - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_getDecomposition(const UNormalizer2 *norm2, - UChar32 c, UChar *decomposition, int32_t capacity, - UErrorCode *pErrorCode); - -/** - * Gets the raw decomposition mapping of c. - * - * This is similar to the unorm2_getDecomposition() function but returns the - * raw decomposition mapping as specified in UnicodeData.txt or - * (for custom data) in the mapping files processed by the gennorm2 tool. - * By contrast, unorm2_getDecomposition() returns the processed, - * recursively-decomposed version of this mapping. - * - * When used on a standard NFKC Normalizer2 instance, - * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. - * - * When used on a standard NFC Normalizer2 instance, - * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); - * in this case, the result contains either one or two code points (=1..4 UChars). - * - * This function is independent of the mode of the UNormalizer2. - * @param norm2 UNormalizer2 instance - * @param c code point - * @param decomposition String buffer which will be set to c's - * raw decomposition mapping, if there is one. - * @param capacity number of UChars that can be written to decomposition - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value - * @stable ICU 49 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_getRawDecomposition(const UNormalizer2 *norm2, - UChar32 c, UChar *decomposition, int32_t capacity, - UErrorCode *pErrorCode); - -/** - * Performs pairwise composition of a & b and returns the composite if there is one. - * - * Returns a composite code point c only if c has a two-way mapping to a+b. - * In standard Unicode normalization, this means that - * c has a canonical decomposition to a+b - * and c does not have the Full_Composition_Exclusion property. - * - * This function is independent of the mode of the UNormalizer2. - * @param norm2 UNormalizer2 instance - * @param a A (normalization starter) code point. - * @param b Another code point. - * @return The non-negative composite code point if there is one; otherwise a negative value. - * @stable ICU 49 - */ -U_STABLE UChar32 U_EXPORT2 -unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); - -/** - * Gets the combining class of c. - * The default implementation returns 0 - * but all standard implementations return the Unicode Canonical_Combining_Class value. - * @param norm2 UNormalizer2 instance - * @param c code point - * @return c's combining class - * @stable ICU 49 - */ -U_STABLE uint8_t U_EXPORT2 -unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); - -/** - * Tests if the string is normalized. - * Internally, in cases where the quickCheck() method would return "maybe" - * (which is only possible for the two COMPOSE modes) this method - * resolves to "yes" or "no" to provide a definitive result, - * at the cost of doing more work in those cases. - * @param norm2 UNormalizer2 instance - * @param s input string - * @param length length of the string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if s is normalized - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_isNormalized(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode); - -/** - * Tests if the string is normalized. - * For the two COMPOSE modes, the result could be "maybe" in cases that - * would take a little more work to resolve definitively. - * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster - * combination of quick check + normalization, to avoid - * re-checking the "yes" prefix. - * @param norm2 UNormalizer2 instance - * @param s input string - * @param length length of the string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return UNormalizationCheckResult - * @stable ICU 4.4 - */ -U_STABLE UNormalizationCheckResult U_EXPORT2 -unorm2_quickCheck(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode); - -/** - * Returns the end of the normalized substring of the input string. - * In other words, with end=spanQuickCheckYes(s, ec); - * the substring UnicodeString(s, 0, end) - * will pass the quick check with a "yes" result. - * - * The returned end index is usually one or more characters before the - * "no" or "maybe" character: The end index is at a normalization boundary. - * (See the class documentation for more about normalization boundaries.) - * - * When the goal is a normalized string and most input strings are expected - * to be normalized already, then call this method, - * and if it returns a prefix shorter than the input string, - * copy that prefix and use normalizeSecondAndAppend() for the remainder. - * @param norm2 UNormalizer2 instance - * @param s input string - * @param length length of the string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return "yes" span end index - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode); - -/** - * Tests if the character always has a normalization boundary before it, - * regardless of context. - * For details see the Normalizer2 base class documentation. - * @param norm2 UNormalizer2 instance - * @param c character to test - * @return TRUE if c has a normalization boundary before it - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); - -/** - * Tests if the character always has a normalization boundary after it, - * regardless of context. - * For details see the Normalizer2 base class documentation. - * @param norm2 UNormalizer2 instance - * @param c character to test - * @return TRUE if c has a normalization boundary after it - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); - -/** - * Tests if the character is normalization-inert. - * For details see the Normalizer2 base class documentation. - * @param norm2 UNormalizer2 instance - * @param c character to test - * @return TRUE if c is normalization-inert - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); - -/** - * Compares two strings for canonical equivalence. - * Further options include case-insensitive comparison and - * code point order (as opposed to code unit order). - * - * Canonical equivalence between two strings is defined as their normalized - * forms (NFD or NFC) being identical. - * This function compares strings incrementally instead of normalizing - * (and optionally case-folding) both strings entirely, - * improving performance significantly. - * - * Bulk normalization is only necessary if the strings do not fulfill the FCD - * conditions. Only in this case, and only if the strings are relatively long, - * is memory allocated temporarily. - * For FCD strings and short non-FCD strings there is no memory allocation. - * - * Semantically, this is equivalent to - * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) - * where code point order and foldCase are all optional. - * - * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match - * the case folding must be performed first, then the normalization. - * - * @param s1 First source string. - * @param length1 Length of first source string, or -1 if NUL-terminated. - * - * @param s2 Second source string. - * @param length2 Length of second source string, or -1 if NUL-terminated. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Case-sensitive comparison in code unit order, and the input strings - * are quick-checked for FCD. - * - * - UNORM_INPUT_IS_FCD - * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. - * If not set, the function will quickCheck for FCD - * and normalize if necessary. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_COMPARE_IGNORE_CASE - * Set to compare strings case-insensitively using case folding, - * instead of case-sensitively. - * If set, then the following case folding options are used. - * - * - Options as used with case-insensitive comparisons, currently: - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * (see u_strCaseCompare for details) - * - * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT - * - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return <0 or 0 or >0 as usual for string comparisons - * - * @see unorm_normalize - * @see UNORM_FCD - * @see u_strCompare - * @see u_strCaseCompare - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -unorm_compare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode); - -#endif /* !UCONFIG_NO_NORMALIZATION */ -#endif /* __UNORM2_H__ */ diff --git a/deps/node/deps/icu-small/source/common/unicode/uobject.h b/deps/node/deps/icu-small/source/common/unicode/uobject.h deleted file mode 100644 index f7a7b6ed..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uobject.h +++ /dev/null @@ -1,319 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: uobject.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jun26 -* created by: Markus W. Scherer -*/ - -#ifndef __UOBJECT_H__ -#define __UOBJECT_H__ - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Common ICU base class UObject. - */ - -/** - * \def U_NO_THROW - * Define this to define the throw() specification so - * certain functions do not throw any exceptions - * - * UMemory operator new methods should have the throw() specification - * appended to them, so that the compiler adds the additional NULL check - * before calling constructors. Without, if operator new returns NULL the - * constructor is still called, and if the constructor references member - * data, (which it typically does), the result is a segmentation violation. - * - * @stable ICU 4.2 - */ -#ifndef U_NO_THROW -#define U_NO_THROW throw() -#endif - -/*===========================================================================*/ -/* UClassID-based RTTI */ -/*===========================================================================*/ - -/** - * UClassID is used to identify classes without using the compiler's RTTI. - * This was used before C++ compilers consistently supported RTTI. - * ICU 4.6 requires compiler RTTI to be turned on. - * - * Each class hierarchy which needs - * to implement polymorphic clone() or operator==() defines two methods, - * described in detail below. UClassID values can be compared using - * operator==(). Nothing else should be done with them. - * - * \par - * In class hierarchies that implement "poor man's RTTI", - * each concrete subclass implements getDynamicClassID() in the same way: - * - * \code - * class Derived { - * public: - * virtual UClassID getDynamicClassID() const - * { return Derived::getStaticClassID(); } - * } - * \endcode - * - * Each concrete class implements getStaticClassID() as well, which allows - * clients to test for a specific type. - * - * \code - * class Derived { - * public: - * static UClassID U_EXPORT2 getStaticClassID(); - * private: - * static char fgClassID; - * } - * - * // In Derived.cpp: - * UClassID Derived::getStaticClassID() - * { return (UClassID)&Derived::fgClassID; } - * char Derived::fgClassID = 0; // Value is irrelevant - * \endcode - * @stable ICU 2.0 - */ -typedef void* UClassID; - -U_NAMESPACE_BEGIN - -/** - * UMemory is the common ICU base class. - * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4). - * - * This is primarily to make it possible and simple to override the - * C++ memory management by adding new/delete operators to this base class. - * - * To override ALL ICU memory management, including that from plain C code, - * replace the allocation functions declared in cmemory.h - * - * UMemory does not contain any virtual functions. - * Common "boilerplate" functions are defined in UObject. - * - * @stable ICU 2.4 - */ -class U_COMMON_API UMemory { -public: - -/* test versions for debugging shaper heap memory problems */ -#ifdef SHAPER_MEMORY_DEBUG - static void * NewArray(int size, int count); - static void * GrowArray(void * array, int newSize ); - static void FreeArray(void * array ); -#endif - -#if U_OVERRIDE_CXX_ALLOCATION - /** - * Override for ICU4C C++ memory management. - * simple, non-class types are allocated using the macros in common/cmemory.h - * (uprv_malloc(), uprv_free(), uprv_realloc()); - * they or something else could be used here to implement C++ new/delete - * for ICU4C C++ classes - * @stable ICU 2.4 - */ - static void * U_EXPORT2 operator new(size_t size) U_NO_THROW; - - /** - * Override for ICU4C C++ memory management. - * See new(). - * @stable ICU 2.4 - */ - static void * U_EXPORT2 operator new[](size_t size) U_NO_THROW; - - /** - * Override for ICU4C C++ memory management. - * simple, non-class types are allocated using the macros in common/cmemory.h - * (uprv_malloc(), uprv_free(), uprv_realloc()); - * they or something else could be used here to implement C++ new/delete - * for ICU4C C++ classes - * @stable ICU 2.4 - */ - static void U_EXPORT2 operator delete(void *p) U_NO_THROW; - - /** - * Override for ICU4C C++ memory management. - * See delete(). - * @stable ICU 2.4 - */ - static void U_EXPORT2 operator delete[](void *p) U_NO_THROW; - -#if U_HAVE_PLACEMENT_NEW - /** - * Override for ICU4C C++ memory management for STL. - * See new(). - * @stable ICU 2.6 - */ - static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NO_THROW { return ptr; } - - /** - * Override for ICU4C C++ memory management for STL. - * See delete(). - * @stable ICU 2.6 - */ - static inline void U_EXPORT2 operator delete(void *, void *) U_NO_THROW {} -#endif /* U_HAVE_PLACEMENT_NEW */ -#if U_HAVE_DEBUG_LOCATION_NEW - /** - * This method overrides the MFC debug version of the operator new - * - * @param size The requested memory size - * @param file The file where the allocation was requested - * @param line The line where the allocation was requested - */ - static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NO_THROW; - /** - * This method provides a matching delete for the MFC debug new - * - * @param p The pointer to the allocated memory - * @param file The file where the allocation was requested - * @param line The line where the allocation was requested - */ - static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NO_THROW; -#endif /* U_HAVE_DEBUG_LOCATION_NEW */ -#endif /* U_OVERRIDE_CXX_ALLOCATION */ - - /* - * Assignment operator not declared. The compiler will provide one - * which does nothing since this class does not contain any data members. - * API/code coverage may show the assignment operator as present and - * untested - ignore. - * Subclasses need this assignment operator if they use compiler-provided - * assignment operators of their own. An alternative to not declaring one - * here would be to declare and empty-implement a protected or public one. - UMemory &UMemory::operator=(const UMemory &); - */ -}; - -/** - * UObject is the common ICU "boilerplate" class. - * UObject inherits UMemory (starting with ICU 2.4), - * and all other public ICU C++ classes - * are derived from UObject (starting with ICU 2.2). - * - * UObject contains common virtual functions, in particular a virtual destructor. - * - * The clone() function is not available in UObject because it is not - * implemented by all ICU classes. - * Many ICU services provide a clone() function for their class trees, - * defined on the service's C++ base class, and all subclasses within that - * service class tree return a pointer to the service base class - * (which itself is a subclass of UObject). - * This is because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * - * @stable ICU 2.2 - */ -class U_COMMON_API UObject : public UMemory { -public: - /** - * Destructor. - * - * @stable ICU 2.2 - */ - virtual ~UObject(); - - /** - * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class. - * The base class implementation returns a dummy value. - * - * Use compiler RTTI rather than ICU's "poor man's RTTI". - * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI". - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - -protected: - // the following functions are protected to prevent instantiation and - // direct use of UObject itself - - // default constructor - // inline UObject() {} - - // copy constructor - // inline UObject(const UObject &other) {} - -#if 0 - // TODO Sometime in the future. Implement operator==(). - // (This comment inserted in 2.2) - // some or all of the following "boilerplate" functions may be made public - // in a future ICU4C release when all subclasses implement them - - // assignment operator - // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74) - // commented out because the implementation is the same as a compiler's default - // UObject &operator=(const UObject &other) { return *this; } - - // comparison operators - virtual inline UBool operator==(const UObject &other) const { return this==&other; } - inline UBool operator!=(const UObject &other) const { return !operator==(other); } - - // clone() commented out from the base class: - // some compilers do not support co-variant return types - // (i.e., subclasses would have to return UObject * as well, instead of SubClass *) - // see also UObject class documentation. - // virtual UObject *clone() const; -#endif - - /* - * Assignment operator not declared. The compiler will provide one - * which does nothing since this class does not contain any data members. - * API/code coverage may show the assignment operator as present and - * untested - ignore. - * Subclasses need this assignment operator if they use compiler-provided - * assignment operators of their own. An alternative to not declaring one - * here would be to declare and empty-implement a protected or public one. - UObject &UObject::operator=(const UObject &); - */ -}; - -#ifndef U_HIDE_INTERNAL_API -/** - * This is a simple macro to add ICU RTTI to an ICU object implementation. - * This does not go into the header. This should only be used in *.cpp files. - * - * @param myClass The name of the class that needs RTTI defined. - * @internal - */ -#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \ - UClassID U_EXPORT2 myClass::getStaticClassID() { \ - static char classID = 0; \ - return (UClassID)&classID; \ - } \ - UClassID myClass::getDynamicClassID() const \ - { return myClass::getStaticClassID(); } - - -/** - * This macro adds ICU RTTI to an ICU abstract class implementation. - * This macro should be invoked in *.cpp files. The corresponding - * header should declare getStaticClassID. - * - * @param myClass The name of the class that needs RTTI defined. - * @internal - */ -#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \ - UClassID U_EXPORT2 myClass::getStaticClassID() { \ - static char classID = 0; \ - return (UClassID)&classID; \ - } - -#endif /* U_HIDE_INTERNAL_API */ - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/urename.h b/deps/node/deps/icu-small/source/common/unicode/urename.h deleted file mode 100644 index 5812173e..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/urename.h +++ /dev/null @@ -1,1864 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* -* file name: urename.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* Created by: Perl script tools/genren.pl written by Vladimir Weinstein -* -* Contains data for renaming ICU exports. -* Gets included by umachine.h -* -* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT -* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! -*/ - -#ifndef URENAME_H -#define URENAME_H - -/* U_DISABLE_RENAMING can be defined in the following ways: - * - when running configure, e.g. - * runConfigureICU Linux --disable-renaming - * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h - */ - -#include "unicode/uconfig.h" - -#if !U_DISABLE_RENAMING - -// Disable Renaming for Visual Studio's IntelliSense feature, so that 'Go-to-Definition' (F12) will work. -#if !(defined(_MSC_VER) && defined(__INTELLISENSE__)) - -/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give - the platform a chance to define it first. - Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined. - */ - -#ifndef U_ICU_ENTRY_POINT_RENAME -#include "unicode/umachine.h" -#endif - -/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */ -#ifndef U_ICU_ENTRY_POINT_RENAME -#include "unicode/uvernum.h" -#endif - -/* Error out before the following defines cause very strange and unexpected code breakage */ -#ifndef U_ICU_ENTRY_POINT_RENAME -#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used. -#endif - - -/* C exports renaming data */ - -#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString) -#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString) -#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger) -#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase) -#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase) -#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE) -#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP) -#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP) -#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE) -#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE) -#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP) -#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP) -#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE) -#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance) -#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init) -#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded) -#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer) -#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) -#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) -#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) -#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) -#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) -#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) -#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) -#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) -#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) -#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) -#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) -#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) -#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16) -#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17) -#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18) -#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19) -#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2) -#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3) -#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4) -#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5) -#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6) -#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) -#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) -#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) -#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) -#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) -#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) -#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) -#define _UTF16v2Data U_ICU_ENTRY_POINT_RENAME(_UTF16v2Data) -#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) -#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) -#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) -#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) -#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) -#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) -#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl) -#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) -#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) -#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) -#define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce) -#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone) -#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close) -#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals) -#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings) -#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID) -#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart) -#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart) -#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName) -#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart) -#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart) -#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset) -#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID) -#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo) -#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open) -#define locale_getKeywords U_ICU_ENTRY_POINT_RENAME(locale_getKeywords) -#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart) -#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default) -#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default) -#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun) -#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun) -#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun) -#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close) -#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns) -#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine) -#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns) -#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns) -#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns) -#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create) -#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent) -#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent) -#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount) -#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont) -#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit) -#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit) -#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading) -#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent) -#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent) -#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading) -#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun) -#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth) -#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount) -#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit) -#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit) -#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale) -#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel) -#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection) -#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount) -#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit) -#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit) -#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue) -#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent) -#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent) -#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection) -#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont) -#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount) -#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap) -#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs) -#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading) -#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions) -#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex) -#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine) -#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns) -#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns) -#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns) -#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns) -#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns) -#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns) -#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow) -#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns) -#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns) -#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns) -#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems) -#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource) -#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias) -#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem) -#define res_getBinary U_ICU_ENTRY_POINT_RENAME(res_getBinary) -#define res_getIntVector U_ICU_ENTRY_POINT_RENAME(res_getIntVector) -#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType) -#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource) -#define res_getString U_ICU_ENTRY_POINT_RENAME(res_getString) -#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex) -#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey) -#define res_load U_ICU_ENTRY_POINT_RENAME(res_load) -#define res_read U_ICU_ENTRY_POINT_RENAME(res_read) -#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload) -#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars) -#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy) -#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy) -#define u_caseInsensitivePrefixMatch U_ICU_ENTRY_POINT_RENAME(u_caseInsensitivePrefixMatch) -#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose) -#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets) -#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen) -#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge) -#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue) -#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection) -#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName) -#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror) -#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName) -#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType) -#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars) -#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup) -#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32) -#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit) -#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames) -#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes) -#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName) -#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt) -#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose) -#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof) -#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush) -#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter) -#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat) -#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc) -#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage) -#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx) -#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile) -#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale) -#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets) -#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read) -#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write) -#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush) -#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit) -#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter) -#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase) -#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen) -#define u_fopen_u U_ICU_ENTRY_POINT_RENAME(u_fopen_u) -#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit) -#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage) -#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError) -#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf) -#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u) -#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc) -#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs) -#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind) -#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf) -#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u) -#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage) -#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale) -#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator) -#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen) -#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc) -#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket) -#define u_getBinaryPropertySet U_ICU_ENTRY_POINT_RENAME(u_getBinaryPropertySet) -#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass) -#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory) -#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion) -#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter) -#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure) -#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment) -#define u_getIntPropertyMap U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMap) -#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue) -#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue) -#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue) -#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties) -#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue) -#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum) -#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName) -#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum) -#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName) -#define u_getTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_getTimeZoneFilesDirectory) -#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties) -#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion) -#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion) -#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout) -#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty) -#define u_init U_ICU_ENTRY_POINT_RENAME(u_init) -#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable) -#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart) -#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart) -#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl) -#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart) -#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart) -#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar) -#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored) -#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic) -#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase) -#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase) -#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace) -#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace) -#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum) -#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX) -#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha) -#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase) -#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank) -#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl) -#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined) -#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit) -#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph) -#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX) -#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower) -#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint) -#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX) -#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct) -#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace) -#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle) -#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper) -#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit) -#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close) -#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat) -#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init) -#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp) -#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr) -#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32) -#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp) -#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder) -#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy) -#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove) -#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr) -#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32) -#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset) -#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage) -#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError) -#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf) -#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse) -#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u) -#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter) -#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse) -#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions) -#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory) -#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions) -#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions) -#define u_setTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_setTimeZoneFilesDirectory) -#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic) -#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf) -#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u) -#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf) -#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u) -#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf) -#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u) -#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare) -#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare) -#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter) -#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst) -#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast) -#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase) -#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub) -#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode) -#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32) -#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub) -#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8) -#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient) -#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub) -#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS) -#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than) -#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8) -#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower) -#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode) -#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle) -#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32) -#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub) -#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8) -#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub) -#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper) -#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS) -#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp) -#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat) -#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr) -#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32) -#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp) -#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder) -#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold) -#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy) -#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn) -#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen) -#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp) -#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat) -#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp) -#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder) -#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy) -#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk) -#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr) -#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32) -#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr) -#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn) -#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr) -#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r) -#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars) -#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s) -#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars) -#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars) -#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower) -#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle) -#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper) -#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy) -#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy) -#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape) -#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt) -#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString) -#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString) -#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString) -#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage) -#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError) -#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf) -#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u) -#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf) -#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u) -#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage) -#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError) -#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf) -#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u) -#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf) -#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u) -#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf) -#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u) -#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun) -#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts) -#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close) -#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs) -#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns) -#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection) -#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass) -#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback) -#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass) -#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection) -#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup) -#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType) -#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength) -#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt) -#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels) -#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex) -#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap) -#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun) -#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue) -#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory) -#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror) -#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket) -#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType) -#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel) -#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex) -#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph) -#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex) -#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength) -#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode) -#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) -#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) -#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) -#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) -#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) -#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) -#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun) -#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap) -#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl) -#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse) -#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl) -#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored) -#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR) -#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open) -#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized) -#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR) -#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical) -#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual) -#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback) -#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext) -#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse) -#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine) -#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara) -#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode) -#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions) -#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered) -#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse) -#define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close) -#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open) -#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform) -#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode) -#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close) -#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable) -#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current) -#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first) -#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following) -#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable) -#define ubrk_getBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_getBinaryRules) -#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType) -#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus) -#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec) -#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary) -#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last) -#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next) -#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open) -#define ubrk_openBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_openBinaryRules) -#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules) -#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding) -#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous) -#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText) -#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone) -#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText) -#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText) -#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap) -#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys) -#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey) -#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys) -#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add) -#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear) -#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField) -#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone) -#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close) -#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable) -#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo) -#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get) -#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute) -#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable) -#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID) -#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings) -#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType) -#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone) -#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference) -#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange) -#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale) -#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit) -#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType) -#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis) -#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow) -#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion) -#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName) -#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID) -#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID) -#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate) -#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType) -#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition) -#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID) -#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime) -#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet) -#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend) -#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open) -#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones) -#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration) -#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones) -#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll) -#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set) -#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute) -#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate) -#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime) -#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone) -#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange) -#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis) -#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone) -#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure) -#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts) -#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) -#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) -#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) -#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie) -#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) -#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) -#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) -#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive) -#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted) -#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding) -#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower) -#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle) -#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper) -#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower) -#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle) -#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper) -#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close) -#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator) -#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale) -#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions) -#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle) -#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8) -#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open) -#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator) -#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale) -#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions) -#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle) -#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase) -#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower) -#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle) -#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper) -#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts) -#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames) -#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne) -#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup) -#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup) -#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup) -#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup) -#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup) -#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32) -#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets) -#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode) -#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType) -#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode) -#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte) -#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar) -#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets) -#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters) -#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter) -#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter) -#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes) -#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub) -#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars) -#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub) -#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars) -#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close) -#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames) -#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert) -#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx) -#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases) -#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable) -#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards) -#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter) -#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter) -#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage) -#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData) -#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature) -#define ucnv_enableCleanup U_ICU_ENTRY_POINT_RENAME(ucnv_enableCleanup) -#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU) -#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU) -#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet) -#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU) -#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU) -#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU) -#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU) -#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator) -#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache) -#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic) -#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars) -#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending) -#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes) -#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode) -#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8) -#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC) -#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias) -#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases) -#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName) -#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID) -#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName) -#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet) -#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName) -#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName) -#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack) -#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars) -#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars) -#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize) -#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize) -#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName) -#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar) -#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet) -#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform) -#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard) -#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName) -#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters) -#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars) -#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack) -#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType) -#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet) -#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount) -#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters) -#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName) -#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare) -#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare) -#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous) -#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth) -#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load) -#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData) -#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open) -#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames) -#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID) -#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage) -#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames) -#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU) -#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset) -#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode) -#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode) -#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone) -#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName) -#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback) -#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack) -#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars) -#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString) -#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack) -#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap) -#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases) -#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic) -#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars) -#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending) -#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint) -#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars) -#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode) -#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload) -#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady) -#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback) -#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close) -#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open) -#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized) -#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString) -#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8) -#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize) -#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary) -#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close) -#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements) -#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable) -#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal) -#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals) -#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute) -#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable) -#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound) -#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions) -#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions) -#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName) -#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes) -#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent) -#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues) -#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale) -#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords) -#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale) -#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType) -#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion) -#define ucol_getMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_getMaxVariable) -#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset) -#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes) -#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules) -#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx) -#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString) -#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey) -#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength) -#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet) -#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion) -#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet) -#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop) -#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion) -#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater) -#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual) -#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode) -#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary) -#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys) -#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next) -#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart) -#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString) -#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open) -#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales) -#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary) -#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements) -#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString) -#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules) -#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen) -#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous) -#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder) -#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset) -#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop) -#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone) -#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder) -#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute) -#define ucol_setMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_setMaxVariable) -#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset) -#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes) -#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength) -#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText) -#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop) -#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll) -#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter) -#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8) -#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap) -#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA) -#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder) -#define ucpmap_get U_ICU_ENTRY_POINT_RENAME(ucpmap_get) -#define ucpmap_getRange U_ICU_ENTRY_POINT_RENAME(ucpmap_getRange) -#define ucptrie_close U_ICU_ENTRY_POINT_RENAME(ucptrie_close) -#define ucptrie_get U_ICU_ENTRY_POINT_RENAME(ucptrie_get) -#define ucptrie_getRange U_ICU_ENTRY_POINT_RENAME(ucptrie_getRange) -#define ucptrie_getType U_ICU_ENTRY_POINT_RENAME(ucptrie_getType) -#define ucptrie_getValueWidth U_ICU_ENTRY_POINT_RENAME(ucptrie_getValueWidth) -#define ucptrie_internalGetRange U_ICU_ENTRY_POINT_RENAME(ucptrie_internalGetRange) -#define ucptrie_internalSmallIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallIndex) -#define ucptrie_internalSmallU8Index U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallU8Index) -#define ucptrie_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalU8PrevIndex) -#define ucptrie_openFromBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_openFromBinary) -#define ucptrie_swap U_ICU_ENTRY_POINT_RENAME(ucptrie_swap) -#define ucptrie_toBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_toBinary) -#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close) -#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect) -#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll) -#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter) -#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets) -#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence) -#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets) -#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage) -#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName) -#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars) -#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled) -#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open) -#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding) -#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset) -#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText) -#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies) -#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale) -#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate) -#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits) -#define ucurr_getDefaultFractionDigitsForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigitsForUsage) -#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale) -#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName) -#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode) -#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName) -#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement) -#define ucurr_getRoundingIncrementForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrementForUsage) -#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable) -#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies) -#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register) -#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister) -#define udat_adoptNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormat) -#define udat_adoptNumberFormatForFields U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormatForFields) -#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern) -#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative) -#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone) -#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close) -#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable) -#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols) -#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format) -#define udat_formatCalendar U_ICU_ENTRY_POINT_RENAME(udat_formatCalendar) -#define udat_formatCalendarForFields U_ICU_ENTRY_POINT_RENAME(udat_formatCalendarForFields) -#define udat_formatForFields U_ICU_ENTRY_POINT_RENAME(udat_formatForFields) -#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart) -#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable) -#define udat_getBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_getBooleanAttribute) -#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar) -#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext) -#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType) -#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat) -#define udat_getNumberFormatForField U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormatForField) -#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols) -#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient) -#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open) -#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse) -#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar) -#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener) -#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart) -#define udat_setBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_setBooleanAttribute) -#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar) -#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext) -#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient) -#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat) -#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols) -#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField) -#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern) -#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate) -#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime) -#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener) -#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData) -#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close) -#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper) -#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize) -#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo) -#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize) -#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength) -#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory) -#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory) -#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open) -#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice) -#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper) -#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData) -#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError) -#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16) -#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32) -#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData) -#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData) -#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess) -#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader) -#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock) -#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern) -#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone) -#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close) -#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat) -#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName) -#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton) -#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern) -#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) -#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) -#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) -#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName) -#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) -#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) -#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) -#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons) -#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty) -#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons) -#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes) -#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions) -#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat) -#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName) -#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat) -#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal) -#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap) -#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close) -#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format) -#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open) -#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close) -#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count) -#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next) -#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault) -#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration) -#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration) -#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration) -#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset) -#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext) -#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault) -#define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close) -#define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next) -#define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open) -#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit) -#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer) -#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io) -#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit) -#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch) -#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32) -#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou) -#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close) -#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode) -#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue) -#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex) -#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength) -#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate) -#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars) -#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble) -#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64) -#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong) -#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject) -#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType) -#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars) -#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric) -#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit) -#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open) -#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou) -#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64) -#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop) -#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance) -#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender) -#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close) -#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString) -#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars) -#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars) -#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong) -#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet) -#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars) -#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString) -#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count) -#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable) -#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet) -#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals) -#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet) -#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find) -#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get) -#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti) -#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString) -#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars) -#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars) -#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong) -#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet) -#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars) -#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString) -#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) -#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) -#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) -#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize) -#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) -#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) -#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) -#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei) -#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement) -#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open) -#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize) -#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put) -#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti) -#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove) -#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll) -#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement) -#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei) -#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator) -#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter) -#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher) -#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy) -#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator) -#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter) -#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII) -#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode) -#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close) -#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare) -#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII) -#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8) -#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode) -#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8) -#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII) -#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8) -#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode) -#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8) -#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46) -#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII) -#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode) -#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32) -#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState) -#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32) -#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32) -#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator) -#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable) -#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState) -#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString) -#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE) -#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8) -#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close) -#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext) -#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling) -#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale) -#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName) -#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName) -#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName) -#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName) -#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open) -#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext) -#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName) -#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName) -#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName) -#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName) -#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList) -#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList) -#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator) -#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString) -#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values) -#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList) -#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList) -#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum) -#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize) -#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext) -#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value) -#define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString) -#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList) -#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator) -#define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close) -#define ulistfmt_format U_ICU_ENTRY_POINT_RENAME(ulistfmt_format) -#define ulistfmt_open U_ICU_ENTRY_POINT_RENAME(ulistfmt_open) -#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage) -#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP) -#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags) -#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize) -#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable) -#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag) -#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable) -#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName) -#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation) -#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry) -#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID) -#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID) -#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault) -#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry) -#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword) -#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue) -#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage) -#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName) -#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript) -#define uloc_getDisplayScriptInContext U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScriptInContext) -#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant) -#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country) -#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language) -#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries) -#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages) -#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue) -#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID) -#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage) -#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation) -#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID) -#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName) -#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent) -#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript) -#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback) -#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant) -#define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft) -#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags) -#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList) -#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords) -#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault) -#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue) -#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag) -#define uloc_toLegacyKey U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyKey) -#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType) -#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey) -#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType) -#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close) -#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion) -#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter) -#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet) -#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern) -#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator) -#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem) -#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute) -#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize) -#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open) -#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute) -#define ulocimp_forLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_forLanguageTag) -#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry) -#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage) -#define ulocimp_getRegionForSupplementalData U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegionForSupplementalData) -#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript) -#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey) -#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType) -#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey) -#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType) -#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey) -#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType) -#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern) -#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe) -#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone) -#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close) -#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format) -#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale) -#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open) -#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse) -#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale) -#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern) -#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat) -#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse) -#define umtx_condBroadcast U_ICU_ENTRY_POINT_RENAME(umtx_condBroadcast) -#define umtx_condSignal U_ICU_ENTRY_POINT_RENAME(umtx_condSignal) -#define umtx_condWait U_ICU_ENTRY_POINT_RENAME(umtx_condWait) -#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock) -#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock) -#define umutablecptrie_buildImmutable U_ICU_ENTRY_POINT_RENAME(umutablecptrie_buildImmutable) -#define umutablecptrie_clone U_ICU_ENTRY_POINT_RENAME(umutablecptrie_clone) -#define umutablecptrie_close U_ICU_ENTRY_POINT_RENAME(umutablecptrie_close) -#define umutablecptrie_fromUCPMap U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPMap) -#define umutablecptrie_fromUCPTrie U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPTrie) -#define umutablecptrie_get U_ICU_ENTRY_POINT_RENAME(umutablecptrie_get) -#define umutablecptrie_getRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_getRange) -#define umutablecptrie_open U_ICU_ENTRY_POINT_RENAME(umutablecptrie_open) -#define umutablecptrie_set U_ICU_ENTRY_POINT_RENAME(umutablecptrie_set) -#define umutablecptrie_setRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_setRange) -#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance) -#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append) -#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close) -#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair) -#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass) -#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition) -#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance) -#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance) -#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance) -#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance) -#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance) -#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance) -#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition) -#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter) -#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore) -#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert) -#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized) -#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize) -#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend) -#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered) -#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck) -#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes) -#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap) -#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare) -#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate) -#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16) -#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck) -#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized) -#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions) -#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next) -#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize) -#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous) -#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck) -#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions) -#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern) -#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone) -#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close) -#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable) -#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format) -#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal) -#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble) -#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency) -#define unum_formatDoubleForFields U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleForFields) -#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64) -#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable) -#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute) -#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable) -#define unum_getContext U_ICU_ENTRY_POINT_RENAME(unum_getContext) -#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute) -#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType) -#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol) -#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute) -#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open) -#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse) -#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal) -#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble) -#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency) -#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64) -#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable) -#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute) -#define unum_setContext U_ICU_ENTRY_POINT_RENAME(unum_setContext) -#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute) -#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol) -#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute) -#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern) -#define unumf_close U_ICU_ENTRY_POINT_RENAME(unumf_close) -#define unumf_closeResult U_ICU_ENTRY_POINT_RENAME(unumf_closeResult) -#define unumf_formatDecimal U_ICU_ENTRY_POINT_RENAME(unumf_formatDecimal) -#define unumf_formatDouble U_ICU_ENTRY_POINT_RENAME(unumf_formatDouble) -#define unumf_formatInt U_ICU_ENTRY_POINT_RENAME(unumf_formatInt) -#define unumf_openForSkeletonAndLocale U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocale) -#define unumf_openResult U_ICU_ENTRY_POINT_RENAME(unumf_openResult) -#define unumf_resultGetAllFieldPositions U_ICU_ENTRY_POINT_RENAME(unumf_resultGetAllFieldPositions) -#define unumf_resultNextFieldPosition U_ICU_ENTRY_POINT_RENAME(unumf_resultNextFieldPosition) -#define unumf_resultToString U_ICU_ENTRY_POINT_RENAME(unumf_resultToString) -#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close) -#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription) -#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName) -#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix) -#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic) -#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open) -#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames) -#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName) -#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close) -#define uplrules_getKeywords U_ICU_ENTRY_POINT_RENAME(uplrules_getKeywords) -#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open) -#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType) -#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select) -#define uplrules_selectWithFormat U_ICU_ENTRY_POINT_RENAME(uplrules_selectWithFormat) -#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary) -#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary) -#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration) -#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext) -#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel) -#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary) -#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName) -#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal) -#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel) -#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus) -#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName) -#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile) -#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName) -#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init) -#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint) -#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary) -#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug) -#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary) -#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug) -#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext) -#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel) -#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName) -#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload) -#define uprops_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uprops_addPropertyStarts) -#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource) -#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts) -#define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow) -#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy) -#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic) -#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower) -#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc) -#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil) -#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames) -#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames) -#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii) -#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic) -#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii) -#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID) -#define uprv_convertToLCIDPlatform U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCIDPlatform) -#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) -#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) -#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) -#define uprv_currencyLeads U_ICU_ENTRY_POINT_RENAME(uprv_currencyLeads) -#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus) -#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault) -#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding) -#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus) -#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus) -#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus) -#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding) -#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus) -#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString) -#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet) -#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet) -#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString) -#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus) -#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus) -#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus) -#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs) -#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd) -#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd) -#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass) -#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString) -#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare) -#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal) -#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal) -#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag) -#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy) -#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs) -#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate) -#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign) -#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide) -#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger) -#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp) -#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA) -#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32) -#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString) -#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32) -#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD) -#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert) -#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal) -#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal) -#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn) -#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10) -#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB) -#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax) -#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag) -#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin) -#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag) -#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus) -#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply) -#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus) -#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus) -#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward) -#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize) -#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr) -#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus) -#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower) -#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize) -#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce) -#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder) -#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear) -#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale) -#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate) -#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum) -#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB) -#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD) -#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift) -#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot) -#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract) -#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString) -#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32) -#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact) -#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue) -#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString) -#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32) -#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim) -#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion) -#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor) -#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero) -#define uprv_deleteConditionalCE32 U_ICU_ENTRY_POINT_RENAME(uprv_deleteConditionalCE32) -#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject) -#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close) -#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open) -#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func) -#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy) -#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii) -#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii) -#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower) -#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs) -#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor) -#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax) -#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin) -#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod) -#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free) -#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters) -#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID) -#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity) -#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength) -#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues) -#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN) -#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) -#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) -#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) -#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) -#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) -#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) -#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString) -#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString) -#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN) -#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity) -#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity) -#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou) -#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log) -#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc) -#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile) -#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max) -#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa) -#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr) -#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min) -#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf) -#define uprv_mul32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_mul32_overflow) -#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency) -#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute) -#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow) -#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10) -#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc) -#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round) -#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray) -#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch) -#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare) -#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup) -#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp) -#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup) -#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp) -#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError) -#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone) -#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper) -#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc) -#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname) -#define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache) -#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset) -#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator) -#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator) -#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile) -#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray) -#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close) -#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact) -#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler) -#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes) -#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray) -#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow) -#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue) -#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open) -#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue) -#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement) -#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText) -#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail) -#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText) -#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone) -#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close) -#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end) -#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64) -#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find) -#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64) -#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext) -#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags) -#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback) -#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback) -#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit) -#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText) -#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit) -#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText) -#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group) -#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount) -#define uregex_groupNumberFromCName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromCName) -#define uregex_groupNumberFromName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromName) -#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText) -#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds) -#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds) -#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd) -#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt) -#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64) -#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches) -#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64) -#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open) -#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC) -#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText) -#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern) -#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText) -#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText) -#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd) -#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64) -#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart) -#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64) -#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll) -#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText) -#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst) -#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText) -#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd) -#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset) -#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64) -#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback) -#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback) -#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion) -#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64) -#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart) -#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit) -#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText) -#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit) -#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText) -#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split) -#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText) -#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start) -#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64) -#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt) -#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds) -#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds) -#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt) -#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual) -#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains) -#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable) -#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions) -#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType) -#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion) -#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType) -#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode) -#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues) -#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode) -#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode) -#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode) -#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType) -#define ureldatefmt_close U_ICU_ENTRY_POINT_RENAME(ureldatefmt_close) -#define ureldatefmt_combineDateAndTime U_ICU_ENTRY_POINT_RENAME(ureldatefmt_combineDateAndTime) -#define ureldatefmt_format U_ICU_ENTRY_POINT_RENAME(ureldatefmt_format) -#define ureldatefmt_formatNumeric U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumeric) -#define ureldatefmt_open U_ICU_ENTRY_POINT_RENAME(ureldatefmt_open) -#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close) -#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb) -#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems) -#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource) -#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource) -#define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback) -#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary) -#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex) -#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey) -#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback) -#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent) -#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt) -#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector) -#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey) -#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues) -#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale) -#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType) -#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal) -#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName) -#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource) -#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString) -#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize) -#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString) -#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex) -#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey) -#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback) -#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType) -#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt) -#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String) -#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex) -#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey) -#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion) -#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey) -#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber) -#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal) -#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext) -#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject) -#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open) -#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales) -#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect) -#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn) -#define ures_openNoDefault U_ICU_ENTRY_POINT_RENAME(ures_openNoDefault) -#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU) -#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator) -#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap) -#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters) -#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun) -#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode) -#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName) -#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString) -#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString) -#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript) -#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions) -#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName) -#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage) -#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript) -#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased) -#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft) -#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun) -#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun) -#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun) -#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText) -#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close) -#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first) -#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following) -#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute) -#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator) -#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator) -#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength) -#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart) -#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText) -#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset) -#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern) -#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText) -#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical) -#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact) -#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical) -#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact) -#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last) -#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next) -#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open) -#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator) -#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding) -#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous) -#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset) -#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search) -#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards) -#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute) -#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator) -#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator) -#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset) -#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern) -#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText) -#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add) -#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll) -#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints) -#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange) -#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString) -#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue) -#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern) -#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias) -#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt) -#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear) -#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone) -#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed) -#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close) -#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver) -#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact) -#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement) -#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll) -#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains) -#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll) -#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints) -#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone) -#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange) -#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome) -#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString) -#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals) -#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze) -#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem) -#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount) -#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange) -#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount) -#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet) -#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf) -#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty) -#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen) -#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open) -#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty) -#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern) -#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions) -#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove) -#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll) -#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings) -#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange) -#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString) -#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern) -#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain) -#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll) -#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize) -#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains) -#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set) -#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne) -#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size) -#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span) -#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack) -#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8) -#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8) -#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern) -#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable) -#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8) -#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString) -#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check) -#define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2) -#define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8) -#define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString) -#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8) -#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString) -#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone) -#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close) -#define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult) -#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars) -#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales) -#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet) -#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks) -#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics) -#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel) -#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks) -#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet) -#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet) -#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet) -#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet) -#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel) -#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton) -#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8) -#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString) -#define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics) -#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open) -#define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult) -#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized) -#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource) -#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize) -#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars) -#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales) -#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet) -#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks) -#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel) -#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap) -#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close) -#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open) -#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType) -#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare) -#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap) -#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN) -#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) -#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) -#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale) -#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator) -#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) -#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) -#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) -#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper) -#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map) -#define ustrcase_mapWithOverlap U_ICU_ENTRY_POINT_RENAME(ustrcase_mapWithOverlap) -#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At) -#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone) -#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close) -#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy) -#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32) -#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals) -#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract) -#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze) -#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex) -#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex) -#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData) -#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive) -#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable) -#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32) -#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength) -#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32) -#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From) -#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator) -#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString) -#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable) -#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars) -#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8) -#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString) -#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32) -#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From) -#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace) -#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex) -#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup) -#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody) -#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody) -#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes) -#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody) -#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody) -#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64) -#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue) -#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64) -#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup) -#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data) -#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry) -#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit) -#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format) -#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName) -#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions) -#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel) -#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions) -#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel) -#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat) -#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone) -#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close) -#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs) -#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID) -#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID) -#define utrans_getSourceSet U_ICU_ENTRY_POINT_RENAME(utrans_getSourceSet) -#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID) -#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open) -#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs) -#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse) -#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU) -#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register) -#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator) -#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter) -#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules) -#define utrans_toRules U_ICU_ENTRY_POINT_RENAME(utrans_toRules) -#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans) -#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental) -#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars) -#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars) -#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup) -#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister) -#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID) -#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone) -#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed) -#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close) -#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum) -#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate) -#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze) -#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie) -#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32) -#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit) -#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex) -#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex) -#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen) -#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open) -#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy) -#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized) -#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize) -#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32) -#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit) -#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32) -#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap) -#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone) -#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close) -#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset) -#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum) -#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32) -#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData) -#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open) -#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize) -#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32) -#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32) -#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap) -#define utrie_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie_swapAnyVersion) -#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize) -#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy) -#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone) -#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close) -#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules) -#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals) -#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID) -#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified) -#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition) -#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset) -#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2) -#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3) -#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition) -#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset) -#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID) -#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL) -#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules) -#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime) -#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData) -#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID) -#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified) -#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset) -#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL) -#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime) -#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write) -#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart) -#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple) -#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close) -#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals) -#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings) -#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName) -#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset) -#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo) -#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom) -#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo) -#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone) -#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close) -#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals) -#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID) -#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom) -#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID) -#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime) -#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo) -#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open) -#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty) -#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom) -#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime) -#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo) - -#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) */ -#endif /* U_DISABLE_RENAMING */ -#endif /* URENAME_H */ - diff --git a/deps/node/deps/icu-small/source/common/unicode/urep.h b/deps/node/deps/icu-small/source/common/unicode/urep.h deleted file mode 100644 index c54ba7c4..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/urep.h +++ /dev/null @@ -1,157 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1997-2010, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* Date Name Description -* 06/23/00 aliu Creation. -****************************************************************************** -*/ - -#ifndef __UREP_H -#define __UREP_H - -#include "unicode/utypes.h" - -U_CDECL_BEGIN - -/******************************************************************** - * General Notes - ******************************************************************** - * TODO - * Add usage scenario - * Add test code - * Talk about pinning - * Talk about "can truncate result if out of memory" - */ - -/******************************************************************** - * Data Structures - ********************************************************************/ -/** - * \file - * \brief C API: Callbacks for UReplaceable - */ -/** - * An opaque replaceable text object. This will be manipulated only - * through the caller-supplied UReplaceableFunctor struct. Related - * to the C++ class Replaceable. - * This is currently only used in the Transliterator C API, see utrans.h . - * @stable ICU 2.0 - */ -typedef void* UReplaceable; - -/** - * A set of function pointers that transliterators use to manipulate a - * UReplaceable. The caller should supply the required functions to - * manipulate their text appropriately. Related to the C++ class - * Replaceable. - * @stable ICU 2.0 - */ -typedef struct UReplaceableCallbacks { - - /** - * Function pointer that returns the number of UChar code units in - * this text. - * - * @param rep A pointer to "this" UReplaceable object. - * @return The length of the text. - * @stable ICU 2.0 - */ - int32_t (*length)(const UReplaceable* rep); - - /** - * Function pointer that returns a UChar code units at the given - * offset into this text; 0 <= offset < n, where n is the value - * returned by (*length)(rep). See unistr.h for a description of - * charAt() vs. char32At(). - * - * @param rep A pointer to "this" UReplaceable object. - * @param offset The index at which to fetch the UChar (code unit). - * @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds. - * @stable ICU 2.0 - */ - UChar (*charAt)(const UReplaceable* rep, - int32_t offset); - - /** - * Function pointer that returns a UChar32 code point at the given - * offset into this text. See unistr.h for a description of - * charAt() vs. char32At(). - * - * @param rep A pointer to "this" UReplaceable object. - * @param offset The index at which to fetch the UChar32 (code point). - * @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds. - * @stable ICU 2.0 - */ - UChar32 (*char32At)(const UReplaceable* rep, - int32_t offset); - - /** - * Function pointer that replaces text between start and limit in - * this text with the given text. Attributes (out of band info) - * should be retained. - * - * @param rep A pointer to "this" UReplaceable object. - * @param start the starting index of the text to be replaced, - * inclusive. - * @param limit the ending index of the text to be replaced, - * exclusive. - * @param text the new text to replace the UChars from - * start..limit-1. - * @param textLength the number of UChars at text, or -1 if text - * is null-terminated. - * @stable ICU 2.0 - */ - void (*replace)(UReplaceable* rep, - int32_t start, - int32_t limit, - const UChar* text, - int32_t textLength); - - /** - * Function pointer that copies the characters in the range - * [start, limit) into the array dst. - * - * @param rep A pointer to "this" UReplaceable object. - * @param start offset of first character which will be copied - * into the array - * @param limit offset immediately following the last character to - * be copied - * @param dst array in which to copy characters. The length of - * dst must be at least (limit - start). - * @stable ICU 2.1 - */ - void (*extract)(UReplaceable* rep, - int32_t start, - int32_t limit, - UChar* dst); - - /** - * Function pointer that copies text between start and limit in - * this text to another index in the text. Attributes (out of - * band info) should be retained. After this call, there will be - * (at least) two copies of the characters originally located at - * start..limit-1. - * - * @param rep A pointer to "this" UReplaceable object. - * @param start the starting index of the text to be copied, - * inclusive. - * @param limit the ending index of the text to be copied, - * exclusive. - * @param dest the index at which the copy of the UChars should be - * inserted. - * @stable ICU 2.0 - */ - void (*copy)(UReplaceable* rep, - int32_t start, - int32_t limit, - int32_t dest); - -} UReplaceableCallbacks; - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ures.h b/deps/node/deps/icu-small/source/common/unicode/ures.h deleted file mode 100644 index af0ce76f..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ures.h +++ /dev/null @@ -1,908 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File URES.H (formerly CRESBUND.H) -* -* Modification History: -* -* Date Name Description -* 04/01/97 aliu Creation. -* 02/22/99 damiba overhaul. -* 04/04/99 helena Fixed internal header inclusion. -* 04/15/99 Madhu Updated Javadoc -* 06/14/99 stephen Removed functions taking a filename suffix. -* 07/20/99 stephen Language-independent typedef to void* -* 11/09/99 weiv Added ures_getLocale() -* 06/24/02 weiv Added support for resource sharing -****************************************************************************** -*/ - -#ifndef URES_H -#define URES_H - -#include "unicode/utypes.h" -#include "unicode/uloc.h" -#include "unicode/localpointer.h" - -/** - * \file - * \brief C API: Resource Bundle - * - *

C API: Resource Bundle

- * - * C API representing a collection of resource information pertaining to a given - * locale. A resource bundle provides a way of accessing locale- specific information in - * a data file. You create a resource bundle that manages the resources for a given - * locale and then ask it for individual resources. - *

- * Resource bundles in ICU4C are currently defined using text files which conform to the following - * BNF definition. - * More on resource bundle concepts and syntax can be found in the - * Users Guide. - *

- */ - -/** - * UResourceBundle is an opaque type for handles for resource bundles in C APIs. - * @stable ICU 2.0 - */ -struct UResourceBundle; - -/** - * @stable ICU 2.0 - */ -typedef struct UResourceBundle UResourceBundle; - -/** - * Numeric constants for types of resource items. - * @see ures_getType - * @stable ICU 2.0 - */ -typedef enum { - /** Resource type constant for "no resource". @stable ICU 2.6 */ - URES_NONE=-1, - - /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */ - URES_STRING=0, - - /** Resource type constant for binary data. @stable ICU 2.6 */ - URES_BINARY=1, - - /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */ - URES_TABLE=2, - - /** - * Resource type constant for aliases; - * internally stores a string which identifies the actual resource - * storing the data (can be in a different resource bundle). - * Resolved internally before delivering the actual resource through the API. - * @stable ICU 2.6 - */ - URES_ALIAS=3, - - /** - * Resource type constant for a single 28-bit integer, interpreted as - * signed or unsigned by the ures_getInt() or ures_getUInt() function. - * @see ures_getInt - * @see ures_getUInt - * @stable ICU 2.6 - */ - URES_INT=7, - - /** Resource type constant for arrays of resources. @stable ICU 2.6 */ - URES_ARRAY=8, - - /** - * Resource type constant for vectors of 32-bit integers. - * @see ures_getIntVector - * @stable ICU 2.6 - */ - URES_INT_VECTOR = 14, -#ifndef U_HIDE_DEPRECATED_API - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_NONE=URES_NONE, - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_STRING=URES_STRING, - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_BINARY=URES_BINARY, - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_TABLE=URES_TABLE, - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_ALIAS=URES_ALIAS, - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_INT=URES_INT, - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_ARRAY=URES_ARRAY, - /** @deprecated ICU 2.6 Use the URES_ constant instead. */ - RES_INT_VECTOR=URES_INT_VECTOR, - /** @deprecated ICU 2.6 Not used. */ - RES_RESERVED=15, - - /** - * One more than the highest normal UResType value. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - URES_LIMIT = 16 -#endif // U_HIDE_DEPRECATED_API -} UResType; - -/* - * Functions to create and destroy resource bundles. - */ - -/** - * Opens a UResourceBundle, from which users can extract strings by using - * their corresponding keys. - * Note that the caller is responsible of calling ures_close on each successfully - * opened resource bundle. - * @param packageName The packageName and locale together point to an ICU udata object, - * as defined by udata_open( packageName, "res", locale, err) - * or equivalent. Typically, packageName will refer to a (.dat) file, or to - * a package registered with udata_setAppData(). Using a full file or directory - * pathname for packageName is deprecated. If NULL, ICU data will be used. - * @param locale specifies the locale for which we want to open the resource - * if NULL, the default locale will be used. If strlen(locale) == 0 - * root locale will be used. - * - * @param status fills in the outgoing error code. - * The UErrorCode err parameter is used to return status information to the user. To - * check whether the construction succeeded or not, you should check the value of - * U_SUCCESS(err). If you wish more detailed information, you can check for - * informational status results which still indicate success. U_USING_FALLBACK_WARNING - * indicates that a fall back locale was used. For example, 'de_CH' was requested, - * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that - * the default locale data or root locale data was used; neither the requested locale - * nor any of its fall back locales could be found. Please see the users guide for more - * information on this topic. - * @return a newly allocated resource bundle. - * @see ures_close - * @stable ICU 2.0 - */ -U_STABLE UResourceBundle* U_EXPORT2 -ures_open(const char* packageName, - const char* locale, - UErrorCode* status); - - -/** This function does not care what kind of localeID is passed in. It simply opens a bundle with - * that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains - * an %%ALIAS directive, the results are undefined. - * @param packageName The packageName and locale together point to an ICU udata object, - * as defined by udata_open( packageName, "res", locale, err) - * or equivalent. Typically, packageName will refer to a (.dat) file, or to - * a package registered with udata_setAppData(). Using a full file or directory - * pathname for packageName is deprecated. If NULL, ICU data will be used. - * @param locale specifies the locale for which we want to open the resource - * if NULL, the default locale will be used. If strlen(locale) == 0 - * root locale will be used. - * - * @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR - * @return a newly allocated resource bundle or NULL if it doesn't exist. - * @see ures_close - * @stable ICU 2.0 - */ -U_STABLE UResourceBundle* U_EXPORT2 -ures_openDirect(const char* packageName, - const char* locale, - UErrorCode* status); - -/** - * Same as ures_open() but takes a const UChar *path. - * This path will be converted to char * using the default converter, - * then ures_open() is called. - * - * @param packageName The packageName and locale together point to an ICU udata object, - * as defined by udata_open( packageName, "res", locale, err) - * or equivalent. Typically, packageName will refer to a (.dat) file, or to - * a package registered with udata_setAppData(). Using a full file or directory - * pathname for packageName is deprecated. If NULL, ICU data will be used. - * @param locale specifies the locale for which we want to open the resource - * if NULL, the default locale will be used. If strlen(locale) == 0 - * root locale will be used. - * @param status fills in the outgoing error code. - * @return a newly allocated resource bundle. - * @see ures_open - * @stable ICU 2.0 - */ -U_STABLE UResourceBundle* U_EXPORT2 -ures_openU(const UChar* packageName, - const char* locale, - UErrorCode* status); - -#ifndef U_HIDE_DEPRECATED_API -/** - * Returns the number of strings/arrays in resource bundles. - * Better to use ures_getSize, as this function will be deprecated. - * - *@param resourceBundle resource bundle containing the desired strings - *@param resourceKey key tagging the resource - *@param err fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_FALLBACK_WARNING - *@return: for Arrays: returns the number of resources in the array - * Tables: returns the number of resources in the table - * single string: returns 1 - *@see ures_getSize - * @deprecated ICU 2.8 User ures_getSize instead - */ -U_DEPRECATED int32_t U_EXPORT2 -ures_countArrayItems(const UResourceBundle* resourceBundle, - const char* resourceKey, - UErrorCode* err); -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Close a resource bundle, all pointers returned from the various ures_getXXX calls - * on this particular bundle should be considered invalid henceforth. - * - * @param resourceBundle a pointer to a resourceBundle struct. Can be NULL. - * @see ures_open - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ures_close(UResourceBundle* resourceBundle); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUResourceBundlePointer - * "Smart pointer" class, closes a UResourceBundle via ures_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUResourceBundlePointer, UResourceBundle, ures_close); - -U_NAMESPACE_END - -#endif - -#ifndef U_HIDE_DEPRECATED_API -/** - * Return the version number associated with this ResourceBundle as a string. Please - * use ures_getVersion as this function is going to be deprecated. - * - * @param resourceBundle The resource bundle for which the version is checked. - * @return A version number string as specified in the resource bundle or its parent. - * The caller does not own this string. - * @see ures_getVersion - * @deprecated ICU 2.8 Use ures_getVersion instead. - */ -U_DEPRECATED const char* U_EXPORT2 -ures_getVersionNumber(const UResourceBundle* resourceBundle); -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Return the version number associated with this ResourceBundle as an - * UVersionInfo array. - * - * @param resB The resource bundle for which the version is checked. - * @param versionInfo A UVersionInfo array that is filled with the version number - * as specified in the resource bundle or its parent. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ures_getVersion(const UResourceBundle* resB, - UVersionInfo versionInfo); - -#ifndef U_HIDE_DEPRECATED_API -/** - * Return the name of the Locale associated with this ResourceBundle. This API allows - * you to query for the real locale of the resource. For example, if you requested - * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. - * For subresources, the locale where this resource comes from will be returned. - * If fallback has occurred, getLocale will reflect this. - * - * @param resourceBundle resource bundle in question - * @param status just for catching illegal arguments - * @return A Locale name - * @deprecated ICU 2.8 Use ures_getLocaleByType instead. - */ -U_DEPRECATED const char* U_EXPORT2 -ures_getLocale(const UResourceBundle* resourceBundle, - UErrorCode* status); -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Return the name of the Locale associated with this ResourceBundle. - * You can choose between requested, valid and real locale. - * - * @param resourceBundle resource bundle in question - * @param type You can choose between requested, valid and actual - * locale. For description see the definition of - * ULocDataLocaleType in uloc.h - * @param status just for catching illegal arguments - * @return A Locale name - * @stable ICU 2.8 - */ -U_STABLE const char* U_EXPORT2 -ures_getLocaleByType(const UResourceBundle* resourceBundle, - ULocDataLocaleType type, - UErrorCode* status); - - -#ifndef U_HIDE_INTERNAL_API -/** - * Same as ures_open() but uses the fill-in parameter instead of allocating - * a bundle, if r!=NULL. - * TODO need to revisit usefulness of this function - * and usage model for fillIn parameters without knowing sizeof(UResourceBundle) - * @param r The resourcebundle to open - * @param packageName The packageName and locale together point to an ICU udata object, - * as defined by udata_open( packageName, "res", locale, err) - * or equivalent. Typically, packageName will refer to a (.dat) file, or to - * a package registered with udata_setAppData(). Using a full file or directory - * pathname for packageName is deprecated. If NULL, ICU data will be used. - * @param localeID specifies the locale for which we want to open the resource - * @param status The error code - * @return a newly allocated resource bundle or NULL if it doesn't exist. - * @internal - */ -U_INTERNAL void U_EXPORT2 -ures_openFillIn(UResourceBundle *r, - const char* packageName, - const char* localeID, - UErrorCode* status); -#endif /* U_HIDE_INTERNAL_API */ - -/** - * Returns a string from a string resource type - * - * @param resourceBundle a string resource - * @param len fills in the length of resulting string - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * Always check the value of status. Don't count on returning NULL. - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. - * @see ures_getBinary - * @see ures_getIntVector - * @see ures_getInt - * @see ures_getUInt - * @stable ICU 2.0 - */ -U_STABLE const UChar* U_EXPORT2 -ures_getString(const UResourceBundle* resourceBundle, - int32_t* len, - UErrorCode* status); - -/** - * Returns a UTF-8 string from a string resource. - * The UTF-8 string may be returnable directly as a pointer, or - * it may need to be copied, or transformed from UTF-16 using u_strToUTF8() - * or equivalent. - * - * If forceCopy==TRUE, then the string is always written to the dest buffer - * and dest is returned. - * - * If forceCopy==FALSE, then the string is returned as a pointer if possible, - * without needing a dest buffer (it can be NULL). If the string needs to be - * copied or transformed, then it may be placed into dest at an arbitrary offset. - * - * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and - * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual. - * - * If the string is transformed from UTF-16, then a conversion error may occur - * if an unpaired surrogate is encountered. If the function is successful, then - * the output UTF-8 string is always well-formed. - * - * @param resB Resource bundle. - * @param dest Destination buffer. Can be NULL only if capacity=*length==0. - * @param length Input: Capacity of destination buffer. - * Output: Actual length of the UTF-8 string, not counting the - * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR. - * Can be NULL, meaning capacity=0 and the string length is not - * returned to the caller. - * @param forceCopy If TRUE, then the output string will always be written to - * dest, with U_BUFFER_OVERFLOW_ERROR and - * U_STRING_NOT_TERMINATED_WARNING set if appropriate. - * If FALSE, then the dest buffer may or may not contain a - * copy of the string. dest may or may not be modified. - * If a copy needs to be written, then the UErrorCode parameter - * indicates overflow etc. as usual. - * @param status Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to the UTF-8 string. It may be dest, or at some offset - * from dest (only if !forceCopy), or in unrelated memory. - * Always NUL-terminated unless the string was written to dest and - * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set). - * - * @see ures_getString - * @see u_strToUTF8 - * @stable ICU 3.6 - */ -U_STABLE const char * U_EXPORT2 -ures_getUTF8String(const UResourceBundle *resB, - char *dest, int32_t *length, - UBool forceCopy, - UErrorCode *status); - -/** - * Returns a binary data from a binary resource. - * - * @param resourceBundle a string resource - * @param len fills in the length of resulting byte chunk - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * Always check the value of status. Don't count on returning NULL. - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file. - * @see ures_getString - * @see ures_getIntVector - * @see ures_getInt - * @see ures_getUInt - * @stable ICU 2.0 - */ -U_STABLE const uint8_t* U_EXPORT2 -ures_getBinary(const UResourceBundle* resourceBundle, - int32_t* len, - UErrorCode* status); - -/** - * Returns a 32 bit integer array from a resource. - * - * @param resourceBundle an int vector resource - * @param len fills in the length of resulting byte chunk - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * Always check the value of status. Don't count on returning NULL. - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a chunk of integers which live in a memory mapped/DLL file. - * @see ures_getBinary - * @see ures_getString - * @see ures_getInt - * @see ures_getUInt - * @stable ICU 2.0 - */ -U_STABLE const int32_t* U_EXPORT2 -ures_getIntVector(const UResourceBundle* resourceBundle, - int32_t* len, - UErrorCode* status); - -/** - * Returns an unsigned integer from a resource. - * This integer is originally 28 bits. - * - * @param resourceBundle a string resource - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return an integer value - * @see ures_getInt - * @see ures_getIntVector - * @see ures_getBinary - * @see ures_getString - * @stable ICU 2.0 - */ -U_STABLE uint32_t U_EXPORT2 -ures_getUInt(const UResourceBundle* resourceBundle, - UErrorCode *status); - -/** - * Returns a signed integer from a resource. - * This integer is originally 28 bit and the sign gets propagated. - * - * @param resourceBundle a string resource - * @param status fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return an integer value - * @see ures_getUInt - * @see ures_getIntVector - * @see ures_getBinary - * @see ures_getString - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ures_getInt(const UResourceBundle* resourceBundle, - UErrorCode *status); - -/** - * Returns the size of a resource. Size for scalar types is always 1, - * and for vector/table types is the number of child resources. - * @warning Integer array is treated as a scalar type. There are no - * APIs to access individual members of an integer array. It - * is always returned as a whole. - * @param resourceBundle a resource - * @return number of resources in a given resource. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ures_getSize(const UResourceBundle *resourceBundle); - -/** - * Returns the type of a resource. Available types are defined in enum UResType - * - * @param resourceBundle a resource - * @return type of the given resource. - * @see UResType - * @stable ICU 2.0 - */ -U_STABLE UResType U_EXPORT2 -ures_getType(const UResourceBundle *resourceBundle); - -/** - * Returns the key associated with a given resource. Not all the resources have a key - only - * those that are members of a table. - * - * @param resourceBundle a resource - * @return a key associated to this resource, or NULL if it doesn't have a key - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ures_getKey(const UResourceBundle *resourceBundle); - -/* ITERATION API - This API provides means for iterating through a resource -*/ - -/** - * Resets the internal context of a resource so that iteration starts from the first element. - * - * @param resourceBundle a resource - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ures_resetIterator(UResourceBundle *resourceBundle); - -/** - * Checks whether the given resource has another element to iterate over. - * - * @param resourceBundle a resource - * @return TRUE if there are more elements, FALSE if there is no more elements - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -ures_hasNext(const UResourceBundle *resourceBundle); - -/** - * Returns the next resource in a given resource or NULL if there are no more resources - * to iterate over. Features a fill-in parameter. - * - * @param resourceBundle a resource - * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. - * Alternatively, you can supply a struct to be filled by this function. - * @param status fills in the outgoing error code. You may still get a non NULL result even if an - * error occurred. Check status instead. - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it - * @stable ICU 2.0 - */ -U_STABLE UResourceBundle* U_EXPORT2 -ures_getNextResource(UResourceBundle *resourceBundle, - UResourceBundle *fillIn, - UErrorCode *status); - -/** - * Returns the next string in a given resource or NULL if there are no more resources - * to iterate over. - * - * @param resourceBundle a resource - * @param len fill in length of the string - * @param key fill in for key associated with this string. NULL if no key - * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't - * count on it. Check status instead! - * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. - * @stable ICU 2.0 - */ -U_STABLE const UChar* U_EXPORT2 -ures_getNextString(UResourceBundle *resourceBundle, - int32_t* len, - const char ** key, - UErrorCode *status); - -/** - * Returns the resource in a given resource at the specified index. Features a fill-in parameter. - * - * @param resourceBundle the resource bundle from which to get a sub-resource - * @param indexR an index to the wanted resource. - * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. - * Alternatively, you can supply a struct to be filled by this function. - * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has - * occurred. Check status instead. - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it - * @stable ICU 2.0 - */ -U_STABLE UResourceBundle* U_EXPORT2 -ures_getByIndex(const UResourceBundle *resourceBundle, - int32_t indexR, - UResourceBundle *fillIn, - UErrorCode *status); - -/** - * Returns the string in a given resource at the specified index. - * - * @param resourceBundle a resource - * @param indexS an index to the wanted string. - * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't - * count on it. Check status instead! - * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. - * @stable ICU 2.0 - */ -U_STABLE const UChar* U_EXPORT2 -ures_getStringByIndex(const UResourceBundle *resourceBundle, - int32_t indexS, - int32_t* len, - UErrorCode *status); - -/** - * Returns a UTF-8 string from a resource at the specified index. - * The UTF-8 string may be returnable directly as a pointer, or - * it may need to be copied, or transformed from UTF-16 using u_strToUTF8() - * or equivalent. - * - * If forceCopy==TRUE, then the string is always written to the dest buffer - * and dest is returned. - * - * If forceCopy==FALSE, then the string is returned as a pointer if possible, - * without needing a dest buffer (it can be NULL). If the string needs to be - * copied or transformed, then it may be placed into dest at an arbitrary offset. - * - * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and - * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual. - * - * If the string is transformed from UTF-16, then a conversion error may occur - * if an unpaired surrogate is encountered. If the function is successful, then - * the output UTF-8 string is always well-formed. - * - * @param resB Resource bundle. - * @param stringIndex An index to the wanted string. - * @param dest Destination buffer. Can be NULL only if capacity=*length==0. - * @param pLength Input: Capacity of destination buffer. - * Output: Actual length of the UTF-8 string, not counting the - * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR. - * Can be NULL, meaning capacity=0 and the string length is not - * returned to the caller. - * @param forceCopy If TRUE, then the output string will always be written to - * dest, with U_BUFFER_OVERFLOW_ERROR and - * U_STRING_NOT_TERMINATED_WARNING set if appropriate. - * If FALSE, then the dest buffer may or may not contain a - * copy of the string. dest may or may not be modified. - * If a copy needs to be written, then the UErrorCode parameter - * indicates overflow etc. as usual. - * @param status Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to the UTF-8 string. It may be dest, or at some offset - * from dest (only if !forceCopy), or in unrelated memory. - * Always NUL-terminated unless the string was written to dest and - * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set). - * - * @see ures_getStringByIndex - * @see u_strToUTF8 - * @stable ICU 3.6 - */ -U_STABLE const char * U_EXPORT2 -ures_getUTF8StringByIndex(const UResourceBundle *resB, - int32_t stringIndex, - char *dest, int32_t *pLength, - UBool forceCopy, - UErrorCode *status); - -/** - * Returns a resource in a given resource that has a given key. This procedure works only with table - * resources. Features a fill-in parameter. - * - * @param resourceBundle a resource - * @param key a key associated with the wanted resource - * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. - * Alternatively, you can supply a struct to be filled by this function. - * @param status fills in the outgoing error code. - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it - * @stable ICU 2.0 - */ -U_STABLE UResourceBundle* U_EXPORT2 -ures_getByKey(const UResourceBundle *resourceBundle, - const char* key, - UResourceBundle *fillIn, - UErrorCode *status); - -/** - * Returns a string in a given resource that has a given key. This procedure works only with table - * resources. - * - * @param resB a resource - * @param key a key associated with the wanted string - * @param len fill in length of the string - * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't - * count on it. Check status instead! - * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. - * @stable ICU 2.0 - */ -U_STABLE const UChar* U_EXPORT2 -ures_getStringByKey(const UResourceBundle *resB, - const char* key, - int32_t* len, - UErrorCode *status); - -/** - * Returns a UTF-8 string from a resource and a key. - * This function works only with table resources. - * - * The UTF-8 string may be returnable directly as a pointer, or - * it may need to be copied, or transformed from UTF-16 using u_strToUTF8() - * or equivalent. - * - * If forceCopy==TRUE, then the string is always written to the dest buffer - * and dest is returned. - * - * If forceCopy==FALSE, then the string is returned as a pointer if possible, - * without needing a dest buffer (it can be NULL). If the string needs to be - * copied or transformed, then it may be placed into dest at an arbitrary offset. - * - * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and - * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual. - * - * If the string is transformed from UTF-16, then a conversion error may occur - * if an unpaired surrogate is encountered. If the function is successful, then - * the output UTF-8 string is always well-formed. - * - * @param resB Resource bundle. - * @param key A key associated with the wanted resource - * @param dest Destination buffer. Can be NULL only if capacity=*length==0. - * @param pLength Input: Capacity of destination buffer. - * Output: Actual length of the UTF-8 string, not counting the - * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR. - * Can be NULL, meaning capacity=0 and the string length is not - * returned to the caller. - * @param forceCopy If TRUE, then the output string will always be written to - * dest, with U_BUFFER_OVERFLOW_ERROR and - * U_STRING_NOT_TERMINATED_WARNING set if appropriate. - * If FALSE, then the dest buffer may or may not contain a - * copy of the string. dest may or may not be modified. - * If a copy needs to be written, then the UErrorCode parameter - * indicates overflow etc. as usual. - * @param status Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to the UTF-8 string. It may be dest, or at some offset - * from dest (only if !forceCopy), or in unrelated memory. - * Always NUL-terminated unless the string was written to dest and - * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set). - * - * @see ures_getStringByKey - * @see u_strToUTF8 - * @stable ICU 3.6 - */ -U_STABLE const char * U_EXPORT2 -ures_getUTF8StringByKey(const UResourceBundle *resB, - const char *key, - char *dest, int32_t *pLength, - UBool forceCopy, - UErrorCode *status); - -#if U_SHOW_CPLUSPLUS_API -#include "unicode/unistr.h" - -U_NAMESPACE_BEGIN -/** - * Returns the string value from a string resource bundle. - * - * @param resB a resource, should have type URES_STRING - * @param status: fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return The string value, or a bogus string if there is a failure UErrorCode. - * @stable ICU 2.0 - */ -inline UnicodeString -ures_getUnicodeString(const UResourceBundle *resB, UErrorCode* status) { - UnicodeString result; - int32_t len = 0; - const UChar *r = ures_getString(resB, &len, status); - if(U_SUCCESS(*status)) { - result.setTo(TRUE, r, len); - } else { - result.setToBogus(); - } - return result; -} - -/** - * Returns the next string in a resource, or an empty string if there are no more resources - * to iterate over. - * Use ures_getNextString() instead to distinguish between - * the end of the iteration and a real empty string value. - * - * @param resB a resource - * @param key fill in for key associated with this string - * @param status fills in the outgoing error code - * @return The string value, or a bogus string if there is a failure UErrorCode. - * @stable ICU 2.0 - */ -inline UnicodeString -ures_getNextUnicodeString(UResourceBundle *resB, const char ** key, UErrorCode* status) { - UnicodeString result; - int32_t len = 0; - const UChar* r = ures_getNextString(resB, &len, key, status); - if(U_SUCCESS(*status)) { - result.setTo(TRUE, r, len); - } else { - result.setToBogus(); - } - return result; -} - -/** - * Returns the string in a given resource array or table at the specified index. - * - * @param resB a resource - * @param indexS an index to the wanted string. - * @param status fills in the outgoing error code - * @return The string value, or a bogus string if there is a failure UErrorCode. - * @stable ICU 2.0 - */ -inline UnicodeString -ures_getUnicodeStringByIndex(const UResourceBundle *resB, int32_t indexS, UErrorCode* status) { - UnicodeString result; - int32_t len = 0; - const UChar* r = ures_getStringByIndex(resB, indexS, &len, status); - if(U_SUCCESS(*status)) { - result.setTo(TRUE, r, len); - } else { - result.setToBogus(); - } - return result; -} - -/** - * Returns a string in a resource that has a given key. - * This procedure works only with table resources. - * - * @param resB a resource - * @param key a key associated with the wanted string - * @param status fills in the outgoing error code - * @return The string value, or a bogus string if there is a failure UErrorCode. - * @stable ICU 2.0 - */ -inline UnicodeString -ures_getUnicodeStringByKey(const UResourceBundle *resB, const char* key, UErrorCode* status) { - UnicodeString result; - int32_t len = 0; - const UChar* r = ures_getStringByKey(resB, key, &len, status); - if(U_SUCCESS(*status)) { - result.setTo(TRUE, r, len); - } else { - result.setToBogus(); - } - return result; -} - -U_NAMESPACE_END - -#endif - -/** - * Create a string enumerator, owned by the caller, of all locales located within - * the specified resource tree. - * @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or or "ICUDATA-coll" - * This call is similar to uloc_getAvailable(). - * @param status error code - * @stable ICU 3.2 - */ -U_STABLE UEnumeration* U_EXPORT2 -ures_openAvailableLocales(const char *packageName, UErrorCode *status); - - -#endif /*_URES*/ -/*eof*/ diff --git a/deps/node/deps/icu-small/source/common/unicode/uscript.h b/deps/node/deps/icu-small/source/common/unicode/uscript.h deleted file mode 100644 index faf9edf8..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uscript.h +++ /dev/null @@ -1,690 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ********************************************************************** - * Copyright (C) 1997-2016, International Business Machines - * Corporation and others. All Rights Reserved. - ********************************************************************** - * - * File USCRIPT.H - * - * Modification History: - * - * Date Name Description - * 07/06/2001 Ram Creation. - ****************************************************************************** - */ - -#ifndef USCRIPT_H -#define USCRIPT_H -#include "unicode/utypes.h" - -/** - * \file - * \brief C API: Unicode Script Information - */ - -/** - * Constants for ISO 15924 script codes. - * - * The current set of script code constants supports at least all scripts - * that are encoded in the version of Unicode which ICU currently supports. - * The names of the constants are usually derived from the - * Unicode script property value aliases. - * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) - * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . - * - * In addition, constants for many ISO 15924 script codes - * are included, for use with language tags, CLDR data, and similar. - * Some of those codes are not used in the Unicode Character Database (UCD). - * For example, there are no characters that have a UCD script property value of - * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. - * - * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR. - * - * Starting with ICU 55, script codes are only added when their scripts - * have been or will certainly be encoded in Unicode, - * and have been assigned Unicode script property value aliases, - * to ensure that their script names are stable and match the names of the constants. - * Script codes like Latf and Aran that are not subject to separate encoding - * may be added at any time. - * - * @stable ICU 2.2 - */ -typedef enum UScriptCode { - /* - * Note: UScriptCode constants and their ISO script code comments - * are parsed by preparseucd.py. - * It matches lines like - * USCRIPT_ = , / * * / - */ - - /** @stable ICU 2.2 */ - USCRIPT_INVALID_CODE = -1, - /** @stable ICU 2.2 */ - USCRIPT_COMMON = 0, /* Zyyy */ - /** @stable ICU 2.2 */ - USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ - /** @stable ICU 2.2 */ - USCRIPT_ARABIC = 2, /* Arab */ - /** @stable ICU 2.2 */ - USCRIPT_ARMENIAN = 3, /* Armn */ - /** @stable ICU 2.2 */ - USCRIPT_BENGALI = 4, /* Beng */ - /** @stable ICU 2.2 */ - USCRIPT_BOPOMOFO = 5, /* Bopo */ - /** @stable ICU 2.2 */ - USCRIPT_CHEROKEE = 6, /* Cher */ - /** @stable ICU 2.2 */ - USCRIPT_COPTIC = 7, /* Copt */ - /** @stable ICU 2.2 */ - USCRIPT_CYRILLIC = 8, /* Cyrl */ - /** @stable ICU 2.2 */ - USCRIPT_DESERET = 9, /* Dsrt */ - /** @stable ICU 2.2 */ - USCRIPT_DEVANAGARI = 10, /* Deva */ - /** @stable ICU 2.2 */ - USCRIPT_ETHIOPIC = 11, /* Ethi */ - /** @stable ICU 2.2 */ - USCRIPT_GEORGIAN = 12, /* Geor */ - /** @stable ICU 2.2 */ - USCRIPT_GOTHIC = 13, /* Goth */ - /** @stable ICU 2.2 */ - USCRIPT_GREEK = 14, /* Grek */ - /** @stable ICU 2.2 */ - USCRIPT_GUJARATI = 15, /* Gujr */ - /** @stable ICU 2.2 */ - USCRIPT_GURMUKHI = 16, /* Guru */ - /** @stable ICU 2.2 */ - USCRIPT_HAN = 17, /* Hani */ - /** @stable ICU 2.2 */ - USCRIPT_HANGUL = 18, /* Hang */ - /** @stable ICU 2.2 */ - USCRIPT_HEBREW = 19, /* Hebr */ - /** @stable ICU 2.2 */ - USCRIPT_HIRAGANA = 20, /* Hira */ - /** @stable ICU 2.2 */ - USCRIPT_KANNADA = 21, /* Knda */ - /** @stable ICU 2.2 */ - USCRIPT_KATAKANA = 22, /* Kana */ - /** @stable ICU 2.2 */ - USCRIPT_KHMER = 23, /* Khmr */ - /** @stable ICU 2.2 */ - USCRIPT_LAO = 24, /* Laoo */ - /** @stable ICU 2.2 */ - USCRIPT_LATIN = 25, /* Latn */ - /** @stable ICU 2.2 */ - USCRIPT_MALAYALAM = 26, /* Mlym */ - /** @stable ICU 2.2 */ - USCRIPT_MONGOLIAN = 27, /* Mong */ - /** @stable ICU 2.2 */ - USCRIPT_MYANMAR = 28, /* Mymr */ - /** @stable ICU 2.2 */ - USCRIPT_OGHAM = 29, /* Ogam */ - /** @stable ICU 2.2 */ - USCRIPT_OLD_ITALIC = 30, /* Ital */ - /** @stable ICU 2.2 */ - USCRIPT_ORIYA = 31, /* Orya */ - /** @stable ICU 2.2 */ - USCRIPT_RUNIC = 32, /* Runr */ - /** @stable ICU 2.2 */ - USCRIPT_SINHALA = 33, /* Sinh */ - /** @stable ICU 2.2 */ - USCRIPT_SYRIAC = 34, /* Syrc */ - /** @stable ICU 2.2 */ - USCRIPT_TAMIL = 35, /* Taml */ - /** @stable ICU 2.2 */ - USCRIPT_TELUGU = 36, /* Telu */ - /** @stable ICU 2.2 */ - USCRIPT_THAANA = 37, /* Thaa */ - /** @stable ICU 2.2 */ - USCRIPT_THAI = 38, /* Thai */ - /** @stable ICU 2.2 */ - USCRIPT_TIBETAN = 39, /* Tibt */ - /** Canadian_Aboriginal script. @stable ICU 2.6 */ - USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ - /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ - USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, - /** @stable ICU 2.2 */ - USCRIPT_YI = 41, /* Yiii */ - /* New scripts in Unicode 3.2 */ - /** @stable ICU 2.2 */ - USCRIPT_TAGALOG = 42, /* Tglg */ - /** @stable ICU 2.2 */ - USCRIPT_HANUNOO = 43, /* Hano */ - /** @stable ICU 2.2 */ - USCRIPT_BUHID = 44, /* Buhd */ - /** @stable ICU 2.2 */ - USCRIPT_TAGBANWA = 45, /* Tagb */ - - /* New scripts in Unicode 4 */ - /** @stable ICU 2.6 */ - USCRIPT_BRAILLE = 46, /* Brai */ - /** @stable ICU 2.6 */ - USCRIPT_CYPRIOT = 47, /* Cprt */ - /** @stable ICU 2.6 */ - USCRIPT_LIMBU = 48, /* Limb */ - /** @stable ICU 2.6 */ - USCRIPT_LINEAR_B = 49, /* Linb */ - /** @stable ICU 2.6 */ - USCRIPT_OSMANYA = 50, /* Osma */ - /** @stable ICU 2.6 */ - USCRIPT_SHAVIAN = 51, /* Shaw */ - /** @stable ICU 2.6 */ - USCRIPT_TAI_LE = 52, /* Tale */ - /** @stable ICU 2.6 */ - USCRIPT_UGARITIC = 53, /* Ugar */ - - /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ - USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ - - /* New scripts in Unicode 4.1 */ - /** @stable ICU 3.4 */ - USCRIPT_BUGINESE = 55, /* Bugi */ - /** @stable ICU 3.4 */ - USCRIPT_GLAGOLITIC = 56, /* Glag */ - /** @stable ICU 3.4 */ - USCRIPT_KHAROSHTHI = 57, /* Khar */ - /** @stable ICU 3.4 */ - USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ - /** @stable ICU 3.4 */ - USCRIPT_NEW_TAI_LUE = 59, /* Talu */ - /** @stable ICU 3.4 */ - USCRIPT_TIFINAGH = 60, /* Tfng */ - /** @stable ICU 3.4 */ - USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ - - /* New script codes from Unicode and ISO 15924 */ - /** @stable ICU 3.6 */ - USCRIPT_BALINESE = 62, /* Bali */ - /** @stable ICU 3.6 */ - USCRIPT_BATAK = 63, /* Batk */ - /** @stable ICU 3.6 */ - USCRIPT_BLISSYMBOLS = 64, /* Blis */ - /** @stable ICU 3.6 */ - USCRIPT_BRAHMI = 65, /* Brah */ - /** @stable ICU 3.6 */ - USCRIPT_CHAM = 66, /* Cham */ - /** @stable ICU 3.6 */ - USCRIPT_CIRTH = 67, /* Cirt */ - /** @stable ICU 3.6 */ - USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ - /** @stable ICU 3.6 */ - USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ - /** @stable ICU 3.6 */ - USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ - /** @stable ICU 3.6 */ - USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ - /** @stable ICU 3.6 */ - USCRIPT_KHUTSURI = 72, /* Geok */ - /** @stable ICU 3.6 */ - USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ - /** @stable ICU 3.6 */ - USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ - /** @stable ICU 3.6 */ - USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ - /** @stable ICU 3.6 */ - USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ - /** @stable ICU 3.6 */ - USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ - /** @stable ICU 3.6 */ - USCRIPT_JAVANESE = 78, /* Java */ - /** @stable ICU 3.6 */ - USCRIPT_KAYAH_LI = 79, /* Kali */ - /** @stable ICU 3.6 */ - USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ - /** @stable ICU 3.6 */ - USCRIPT_LATIN_GAELIC = 81, /* Latg */ - /** @stable ICU 3.6 */ - USCRIPT_LEPCHA = 82, /* Lepc */ - /** @stable ICU 3.6 */ - USCRIPT_LINEAR_A = 83, /* Lina */ - /** @stable ICU 4.6 */ - USCRIPT_MANDAIC = 84, /* Mand */ - /** @stable ICU 3.6 */ - USCRIPT_MANDAEAN = USCRIPT_MANDAIC, - /** @stable ICU 3.6 */ - USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ - /** @stable ICU 4.6 */ - USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ - /** @stable ICU 3.6 */ - USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, - /** @stable ICU 3.6 */ - USCRIPT_NKO = 87, /* Nkoo */ - /** @stable ICU 3.6 */ - USCRIPT_ORKHON = 88, /* Orkh */ - /** @stable ICU 3.6 */ - USCRIPT_OLD_PERMIC = 89, /* Perm */ - /** @stable ICU 3.6 */ - USCRIPT_PHAGS_PA = 90, /* Phag */ - /** @stable ICU 3.6 */ - USCRIPT_PHOENICIAN = 91, /* Phnx */ - /** @stable ICU 52 */ - USCRIPT_MIAO = 92, /* Plrd */ - /** @stable ICU 3.6 */ - USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, - /** @stable ICU 3.6 */ - USCRIPT_RONGORONGO = 93, /* Roro */ - /** @stable ICU 3.6 */ - USCRIPT_SARATI = 94, /* Sara */ - /** @stable ICU 3.6 */ - USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ - /** @stable ICU 3.6 */ - USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ - /** @stable ICU 3.6 */ - USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ - /** @stable ICU 3.6 */ - USCRIPT_TENGWAR = 98, /* Teng */ - /** @stable ICU 3.6 */ - USCRIPT_VAI = 99, /* Vaii */ - /** @stable ICU 3.6 */ - USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ - /** @stable ICU 3.6 */ - USCRIPT_CUNEIFORM = 101,/* Xsux */ - /** @stable ICU 3.6 */ - USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ - /** @stable ICU 3.6 */ - USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ - - /** @stable ICU 3.8 */ - USCRIPT_CARIAN = 104,/* Cari */ - /** @stable ICU 3.8 */ - USCRIPT_JAPANESE = 105,/* Jpan */ - /** @stable ICU 3.8 */ - USCRIPT_LANNA = 106,/* Lana */ - /** @stable ICU 3.8 */ - USCRIPT_LYCIAN = 107,/* Lyci */ - /** @stable ICU 3.8 */ - USCRIPT_LYDIAN = 108,/* Lydi */ - /** @stable ICU 3.8 */ - USCRIPT_OL_CHIKI = 109,/* Olck */ - /** @stable ICU 3.8 */ - USCRIPT_REJANG = 110,/* Rjng */ - /** @stable ICU 3.8 */ - USCRIPT_SAURASHTRA = 111,/* Saur */ - /** Sutton SignWriting @stable ICU 3.8 */ - USCRIPT_SIGN_WRITING = 112,/* Sgnw */ - /** @stable ICU 3.8 */ - USCRIPT_SUNDANESE = 113,/* Sund */ - /** @stable ICU 3.8 */ - USCRIPT_MOON = 114,/* Moon */ - /** @stable ICU 3.8 */ - USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ - - /** @stable ICU 4.0 */ - USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ - /** @stable ICU 4.0 */ - USCRIPT_AVESTAN = 117,/* Avst */ - /** @stable ICU 4.0 */ - USCRIPT_CHAKMA = 118,/* Cakm */ - /** @stable ICU 4.0 */ - USCRIPT_KOREAN = 119,/* Kore */ - /** @stable ICU 4.0 */ - USCRIPT_KAITHI = 120,/* Kthi */ - /** @stable ICU 4.0 */ - USCRIPT_MANICHAEAN = 121,/* Mani */ - /** @stable ICU 4.0 */ - USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ - /** @stable ICU 4.0 */ - USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ - /** @stable ICU 4.0 */ - USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ - /** @stable ICU 4.0 */ - USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ - /** @stable ICU 4.0 */ - USCRIPT_SAMARITAN = 126,/* Samr */ - /** @stable ICU 4.0 */ - USCRIPT_TAI_VIET = 127,/* Tavt */ - /** @stable ICU 4.0 */ - USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ - /** @stable ICU 4.0 */ - USCRIPT_SYMBOLS = 129,/* Zsym */ - - /** @stable ICU 4.4 */ - USCRIPT_BAMUM = 130,/* Bamu */ - /** @stable ICU 4.4 */ - USCRIPT_LISU = 131,/* Lisu */ - /** @stable ICU 4.4 */ - USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ - /** @stable ICU 4.4 */ - USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ - - /** @stable ICU 4.6 */ - USCRIPT_BASSA_VAH = 134,/* Bass */ - /** @stable ICU 54 */ - USCRIPT_DUPLOYAN = 135,/* Dupl */ -#ifndef U_HIDE_DEPRECATED_API - /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */ - USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN, -#endif /* U_HIDE_DEPRECATED_API */ - /** @stable ICU 4.6 */ - USCRIPT_ELBASAN = 136,/* Elba */ - /** @stable ICU 4.6 */ - USCRIPT_GRANTHA = 137,/* Gran */ - /** @stable ICU 4.6 */ - USCRIPT_KPELLE = 138,/* Kpel */ - /** @stable ICU 4.6 */ - USCRIPT_LOMA = 139,/* Loma */ - /** Mende Kikakui @stable ICU 4.6 */ - USCRIPT_MENDE = 140,/* Mend */ - /** @stable ICU 4.6 */ - USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ - /** @stable ICU 4.6 */ - USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ - /** @stable ICU 4.6 */ - USCRIPT_NABATAEAN = 143,/* Nbat */ - /** @stable ICU 4.6 */ - USCRIPT_PALMYRENE = 144,/* Palm */ - /** @stable ICU 54 */ - USCRIPT_KHUDAWADI = 145,/* Sind */ - /** @stable ICU 4.6 */ - USCRIPT_SINDHI = USCRIPT_KHUDAWADI, - /** @stable ICU 4.6 */ - USCRIPT_WARANG_CITI = 146,/* Wara */ - - /** @stable ICU 4.8 */ - USCRIPT_AFAKA = 147,/* Afak */ - /** @stable ICU 4.8 */ - USCRIPT_JURCHEN = 148,/* Jurc */ - /** @stable ICU 4.8 */ - USCRIPT_MRO = 149,/* Mroo */ - /** @stable ICU 4.8 */ - USCRIPT_NUSHU = 150,/* Nshu */ - /** @stable ICU 4.8 */ - USCRIPT_SHARADA = 151,/* Shrd */ - /** @stable ICU 4.8 */ - USCRIPT_SORA_SOMPENG = 152,/* Sora */ - /** @stable ICU 4.8 */ - USCRIPT_TAKRI = 153,/* Takr */ - /** @stable ICU 4.8 */ - USCRIPT_TANGUT = 154,/* Tang */ - /** @stable ICU 4.8 */ - USCRIPT_WOLEAI = 155,/* Wole */ - - /** @stable ICU 49 */ - USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ - /** @stable ICU 49 */ - USCRIPT_KHOJKI = 157,/* Khoj */ - /** @stable ICU 49 */ - USCRIPT_TIRHUTA = 158,/* Tirh */ - - /** @stable ICU 52 */ - USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ - /** @stable ICU 52 */ - USCRIPT_MAHAJANI = 160,/* Mahj */ - - /** @stable ICU 54 */ - USCRIPT_AHOM = 161,/* Ahom */ - /** @stable ICU 54 */ - USCRIPT_HATRAN = 162,/* Hatr */ - /** @stable ICU 54 */ - USCRIPT_MODI = 163,/* Modi */ - /** @stable ICU 54 */ - USCRIPT_MULTANI = 164,/* Mult */ - /** @stable ICU 54 */ - USCRIPT_PAU_CIN_HAU = 165,/* Pauc */ - /** @stable ICU 54 */ - USCRIPT_SIDDHAM = 166,/* Sidd */ - - /** @stable ICU 58 */ - USCRIPT_ADLAM = 167,/* Adlm */ - /** @stable ICU 58 */ - USCRIPT_BHAIKSUKI = 168,/* Bhks */ - /** @stable ICU 58 */ - USCRIPT_MARCHEN = 169,/* Marc */ - /** @stable ICU 58 */ - USCRIPT_NEWA = 170,/* Newa */ - /** @stable ICU 58 */ - USCRIPT_OSAGE = 171,/* Osge */ - - /** @stable ICU 58 */ - USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */ - /** @stable ICU 58 */ - USCRIPT_JAMO = 173,/* Jamo */ - /** @stable ICU 58 */ - USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */ - - /** @stable ICU 60 */ - USCRIPT_MASARAM_GONDI = 175,/* Gonm */ - /** @stable ICU 60 */ - USCRIPT_SOYOMBO = 176,/* Soyo */ - /** @stable ICU 60 */ - USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */ - - /** @stable ICU 62 */ - USCRIPT_DOGRA = 178,/* Dogr */ - /** @stable ICU 62 */ - USCRIPT_GUNJALA_GONDI = 179,/* Gong */ - /** @stable ICU 62 */ - USCRIPT_MAKASAR = 180,/* Maka */ - /** @stable ICU 62 */ - USCRIPT_MEDEFAIDRIN = 181,/* Medf */ - /** @stable ICU 62 */ - USCRIPT_HANIFI_ROHINGYA = 182,/* Rohg */ - /** @stable ICU 62 */ - USCRIPT_SOGDIAN = 183,/* Sogd */ - /** @stable ICU 62 */ - USCRIPT_OLD_SOGDIAN = 184,/* Sogo */ - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UScriptCode value. - * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT). - * - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - USCRIPT_CODE_LIMIT = 185 -#endif // U_HIDE_DEPRECATED_API -} UScriptCode; - -/** - * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. - * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". - * Fills in USCRIPT_LATIN given "en" OR "en_US" - * If the required capacity is greater than the capacity of the destination buffer, - * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned. - * - *

Note: To search by short or long script alias only, use - * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does - * a fast lookup with no access of the locale data. - * - * @param nameOrAbbrOrLocale name of the script, as given in - * PropertyValueAliases.txt, or ISO 15924 code or locale - * @param fillIn the UScriptCode buffer to fill in the script code - * @param capacity the capacity (size) of UScriptCode buffer passed in. - * @param err the error status code. - * @return The number of script codes filled in the buffer passed in - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); - -/** - * Returns the long Unicode script name, if there is one. - * Otherwise returns the 4-letter ISO 15924 script code. - * Returns "Malayam" given USCRIPT_MALAYALAM. - * - * @param scriptCode UScriptCode enum - * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code, - * or NULL if scriptCode is invalid - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -uscript_getName(UScriptCode scriptCode); - -/** - * Returns the 4-letter ISO 15924 script code, - * which is the same as the short Unicode script name if Unicode has names for the script. - * Returns "Mlym" given USCRIPT_MALAYALAM. - * - * @param scriptCode UScriptCode enum - * @return short script name (4-letter code), or NULL if scriptCode is invalid - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -uscript_getShortName(UScriptCode scriptCode); - -/** - * Gets the script code associated with the given codepoint. - * Returns USCRIPT_MALAYALAM given 0x0D02 - * @param codepoint UChar32 codepoint - * @param err the error status code. - * @return The UScriptCode, or 0 if codepoint is invalid - * @stable ICU 2.4 - */ -U_STABLE UScriptCode U_EXPORT2 -uscript_getScript(UChar32 codepoint, UErrorCode *err); - -/** - * Do the Script_Extensions of code point c contain script sc? - * If c does not have explicit Script_Extensions, then this tests whether - * c has the Script property value sc. - * - * Some characters are commonly used in multiple scripts. - * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. - * @param c code point - * @param sc script code - * @return TRUE if sc is in Script_Extensions(c) - * @stable ICU 49 - */ -U_STABLE UBool U_EXPORT2 -uscript_hasScript(UChar32 c, UScriptCode sc); - -/** - * Writes code point c's Script_Extensions as a list of UScriptCode values - * to the output scripts array and returns the number of script codes. - * - If c does have Script_Extensions, then the Script property value - * (normally Common or Inherited) is not included. - * - If c does not have Script_Extensions, then the one Script code is written to the output array. - * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. - * In other words, if the return value is 1, - * then the output array contains exactly c's single Script code. - * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. - * - * Some characters are commonly used in multiple scripts. - * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. - * - * If there are more than capacity script codes to be written, then - * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. - * (Usual ICU buffer handling behavior.) - * - * @param c code point - * @param scripts output script code array - * @param capacity capacity of the scripts array - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, - * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity - * @stable ICU 49 - */ -U_STABLE int32_t U_EXPORT2 -uscript_getScriptExtensions(UChar32 c, - UScriptCode *scripts, int32_t capacity, - UErrorCode *errorCode); - -/** - * Script usage constants. - * See UAX #31 Unicode Identifier and Pattern Syntax. - * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers - * - * @stable ICU 51 - */ -typedef enum UScriptUsage { - /** Not encoded in Unicode. @stable ICU 51 */ - USCRIPT_USAGE_NOT_ENCODED, - /** Unknown script usage. @stable ICU 51 */ - USCRIPT_USAGE_UNKNOWN, - /** Candidate for Exclusion from Identifiers. @stable ICU 51 */ - USCRIPT_USAGE_EXCLUDED, - /** Limited Use script. @stable ICU 51 */ - USCRIPT_USAGE_LIMITED_USE, - /** Aspirational Use script. @stable ICU 51 */ - USCRIPT_USAGE_ASPIRATIONAL, - /** Recommended script. @stable ICU 51 */ - USCRIPT_USAGE_RECOMMENDED -} UScriptUsage; - -/** - * Writes the script sample character string. - * This string normally consists of one code point but might be longer. - * The string is empty if the script is not encoded. - * - * @param script script code - * @param dest output string array - * @param capacity number of UChars in the dest array - * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input - * @return the string length, even if U_BUFFER_OVERFLOW_ERROR - * @stable ICU 51 - */ -U_STABLE int32_t U_EXPORT2 -uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN -class UnicodeString; -U_NAMESPACE_END - -/** - * Returns the script sample character string. - * This string normally consists of one code point but might be longer. - * The string is empty if the script is not encoded. - * - * @param script script code - * @return the sample character string - * @stable ICU 51 - */ -U_COMMON_API icu::UnicodeString U_EXPORT2 -uscript_getSampleUnicodeString(UScriptCode script); - -#endif - -/** - * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. - * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. - * - * @param script script code - * @return script usage - * @see UScriptUsage - * @stable ICU 51 - */ -U_STABLE UScriptUsage U_EXPORT2 -uscript_getUsage(UScriptCode script); - -/** - * Returns TRUE if the script is written right-to-left. - * For example, Arab and Hebr. - * - * @param script script code - * @return TRUE if the script is right-to-left - * @stable ICU 51 - */ -U_STABLE UBool U_EXPORT2 -uscript_isRightToLeft(UScriptCode script); - -/** - * Returns TRUE if the script allows line breaks between letters (excluding hyphenation). - * Such a script typically requires dictionary-based line breaking. - * For example, Hani and Thai. - * - * @param script script code - * @return TRUE if the script allows line breaks between letters - * @stable ICU 51 - */ -U_STABLE UBool U_EXPORT2 -uscript_breaksBetweenLetters(UScriptCode script); - -/** - * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. - * For example, Latn and Cyrl. - * - * @param script script code - * @return TRUE if the script is cased - * @stable ICU 51 - */ -U_STABLE UBool U_EXPORT2 -uscript_isCased(UScriptCode script); - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uset.h b/deps/node/deps/icu-small/source/common/unicode/uset.h deleted file mode 100644 index ef6bbb5c..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uset.h +++ /dev/null @@ -1,1134 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002mar07 -* created by: Markus W. Scherer -* -* C version of UnicodeSet. -*/ - - -/** - * \file - * \brief C API: Unicode Set - * - *

This is a C wrapper around the C++ UnicodeSet class.

- */ - -#ifndef __USET_H__ -#define __USET_H__ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/localpointer.h" - -#ifndef USET_DEFINED - -#ifndef U_IN_DOXYGEN -#define USET_DEFINED -#endif -/** - * USet is the C API type corresponding to C++ class UnicodeSet. - * Use the uset_* API to manipulate. Create with - * uset_open*, and destroy with uset_close. - * @stable ICU 2.4 - */ -typedef struct USet USet; -#endif - -/** - * Bitmask values to be passed to uset_openPatternOptions() or - * uset_applyPattern() taking an option parameter. - * @stable ICU 2.4 - */ -enum { - /** - * Ignore white space within patterns unless quoted or escaped. - * @stable ICU 2.4 - */ - USET_IGNORE_SPACE = 1, - - /** - * Enable case insensitive matching. E.g., "[ab]" with this flag - * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will - * match all except 'a', 'A', 'b', and 'B'. This performs a full - * closure over case mappings, e.g. U+017F for s. - * - * The resulting set is a superset of the input for the code points but - * not for the strings. - * It performs a case mapping closure of the code points and adds - * full case folding strings for the code points, and reduces strings of - * the original set to their full case folding equivalents. - * - * This is designed for case-insensitive matches, for example - * in regular expressions. The full code point case closure allows checking of - * an input character directly against the closure set. - * Strings are matched by comparing the case-folded form from the closure - * set with an incremental case folding of the string in question. - * - * The closure set will also contain single code points if the original - * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). - * This is not necessary (that is, redundant) for the above matching method - * but results in the same closure sets regardless of whether the original - * set contained the code point or a string. - * - * @stable ICU 2.4 - */ - USET_CASE_INSENSITIVE = 2, - - /** - * Enable case insensitive matching. E.g., "[ab]" with this flag - * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will - * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, - * title-, and uppercase mappings as well as the case folding - * of each existing element in the set. - * @stable ICU 3.2 - */ - USET_ADD_CASE_MAPPINGS = 4 -}; - -/** - * Argument values for whether span() and similar functions continue while - * the current character is contained vs. not contained in the set. - * - * The functionality is straightforward for sets with only single code points, - * without strings (which is the common case): - * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. - * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED. - * - span() and spanBack() partition any string the same way when - * alternating between span(USET_SPAN_NOT_CONTAINED) and - * span(either "contained" condition). - * - Using a complemented (inverted) set and the opposite span conditions - * yields the same results. - * - * When a set contains multi-code point strings, then these statements may not - * be true, depending on the strings in the set (for example, whether they - * overlap with each other) and the string that is processed. - * For a set with strings: - * - The complement of the set contains the opposite set of code points, - * but the same set of strings. - * Therefore, complementing both the set and the span conditions - * may yield different results. - * - When starting spans at different positions in a string - * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different - * because a set string may start before the later position. - * - span(USET_SPAN_SIMPLE) may be shorter than - * span(USET_SPAN_CONTAINED) because it will not recursively try - * all possible paths. - * For example, with a set which contains the three strings "xy", "xya" and "ax", - * span("xyax", USET_SPAN_CONTAINED) will return 4 but - * span("xyax", USET_SPAN_SIMPLE) will return 3. - * span(USET_SPAN_SIMPLE) will never be longer than - * span(USET_SPAN_CONTAINED). - * - With either "contained" condition, span() and spanBack() may partition - * a string in different ways. - * For example, with a set which contains the two strings "ab" and "ba", - * and when processing the string "aba", - * span() will yield contained/not-contained boundaries of { 0, 2, 3 } - * while spanBack() will yield boundaries of { 0, 1, 3 }. - * - * Note: If it is important to get the same boundaries whether iterating forward - * or backward through a string, then either only span() should be used and - * the boundaries cached for backward operation, or an ICU BreakIterator - * could be used. - * - * Note: Unpaired surrogates are treated like surrogate code points. - * Similarly, set strings match only on code point boundaries, - * never in the middle of a surrogate pair. - * Illegal UTF-8 sequences are treated like U+FFFD. - * When processing UTF-8 strings, malformed set strings - * (strings with unpaired surrogates which cannot be converted to UTF-8) - * are ignored. - * - * @stable ICU 3.8 - */ -typedef enum USetSpanCondition { - /** - * Continues a span() while there is no set element at the current position. - * Increments by one code point at a time. - * Stops before the first set element (character or string). - * (For code points only, this is like while contains(current)==FALSE). - * - * When span() returns, the substring between where it started and the position - * it returned consists only of characters that are not in the set, - * and none of its strings overlap with the span. - * - * @stable ICU 3.8 - */ - USET_SPAN_NOT_CONTAINED = 0, - /** - * Spans the longest substring that is a concatenation of set elements (characters or strings). - * (For characters only, this is like while contains(current)==TRUE). - * - * When span() returns, the substring between where it started and the position - * it returned consists only of set elements (characters or strings) that are in the set. - * - * If a set contains strings, then the span will be the longest substring for which there - * exists at least one non-overlapping concatenation of set elements (characters or strings). - * This is equivalent to a POSIX regular expression for (OR of each set element)*. - * (Java/ICU/Perl regex stops at the first match of an OR.) - * - * @stable ICU 3.8 - */ - USET_SPAN_CONTAINED = 1, - /** - * Continues a span() while there is a set element at the current position. - * Increments by the longest matching element at each position. - * (For characters only, this is like while contains(current)==TRUE). - * - * When span() returns, the substring between where it started and the position - * it returned consists only of set elements (characters or strings) that are in the set. - * - * If a set only contains single characters, then this is the same - * as USET_SPAN_CONTAINED. - * - * If a set contains strings, then the span will be the longest substring - * with a match at each position with the longest single set element (character or string). - * - * Use this span condition together with other longest-match algorithms, - * such as ICU converters (ucnv_getUnicodeSet()). - * - * @stable ICU 3.8 - */ - USET_SPAN_SIMPLE = 2, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the last span condition. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - USET_SPAN_CONDITION_COUNT -#endif // U_HIDE_DEPRECATED_API -} USetSpanCondition; - -enum { - /** - * Capacity of USerializedSet::staticArray. - * Enough for any single-code point set. - * Also provides padding for nice sizeof(USerializedSet). - * @stable ICU 2.4 - */ - USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 -}; - -/** - * A serialized form of a Unicode set. Limited manipulations are - * possible directly on a serialized set. See below. - * @stable ICU 2.4 - */ -typedef struct USerializedSet { - /** - * The serialized Unicode Set. - * @stable ICU 2.4 - */ - const uint16_t *array; - /** - * The length of the array that contains BMP characters. - * @stable ICU 2.4 - */ - int32_t bmpLength; - /** - * The total length of the array. - * @stable ICU 2.4 - */ - int32_t length; - /** - * A small buffer for the array to reduce memory allocations. - * @stable ICU 2.4 - */ - uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; -} USerializedSet; - -/********************************************************************* - * USet API - *********************************************************************/ - -/** - * Create an empty USet object. - * Equivalent to uset_open(1, 0). - * @return a newly created USet. The caller must call uset_close() on - * it when done. - * @stable ICU 4.2 - */ -U_STABLE USet* U_EXPORT2 -uset_openEmpty(void); - -/** - * Creates a USet object that contains the range of characters - * start..end, inclusive. If start > end - * then an empty set is created (same as using uset_openEmpty()). - * @param start first character of the range, inclusive - * @param end last character of the range, inclusive - * @return a newly created USet. The caller must call uset_close() on - * it when done. - * @stable ICU 2.4 - */ -U_STABLE USet* U_EXPORT2 -uset_open(UChar32 start, UChar32 end); - -/** - * Creates a set from the given pattern. See the UnicodeSet class - * description for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param patternLength the length of the pattern, or -1 if null - * terminated - * @param ec the error code - * @stable ICU 2.4 - */ -U_STABLE USet* U_EXPORT2 -uset_openPattern(const UChar* pattern, int32_t patternLength, - UErrorCode* ec); - -/** - * Creates a set from the given pattern. See the UnicodeSet class - * description for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param patternLength the length of the pattern, or -1 if null - * terminated - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param ec the error code - * @stable ICU 2.4 - */ -U_STABLE USet* U_EXPORT2 -uset_openPatternOptions(const UChar* pattern, int32_t patternLength, - uint32_t options, - UErrorCode* ec); - -/** - * Disposes of the storage used by a USet object. This function should - * be called exactly once for objects returned by uset_open(). - * @param set the object to dispose of - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_close(USet* set); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUSetPointer - * "Smart pointer" class, closes a USet via uset_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); - -U_NAMESPACE_END - -#endif - -/** - * Returns a copy of this object. - * If this set is frozen, then the clone will be frozen as well. - * Use uset_cloneAsThawed() for a mutable clone of a frozen set. - * @param set the original set - * @return the newly allocated copy of the set - * @see uset_cloneAsThawed - * @stable ICU 3.8 - */ -U_STABLE USet * U_EXPORT2 -uset_clone(const USet *set); - -/** - * Determines whether the set has been frozen (made immutable) or not. - * See the ICU4J Freezable interface for details. - * @param set the set - * @return TRUE/FALSE for whether the set has been frozen - * @see uset_freeze - * @see uset_cloneAsThawed - * @stable ICU 3.8 - */ -U_STABLE UBool U_EXPORT2 -uset_isFrozen(const USet *set); - -/** - * Freeze the set (make it immutable). - * Once frozen, it cannot be unfrozen and is therefore thread-safe - * until it is deleted. - * See the ICU4J Freezable interface for details. - * Freezing the set may also make some operations faster, for example - * uset_contains() and uset_span(). - * A frozen set will not be modified. (It remains frozen.) - * @param set the set - * @return the same set, now frozen - * @see uset_isFrozen - * @see uset_cloneAsThawed - * @stable ICU 3.8 - */ -U_STABLE void U_EXPORT2 -uset_freeze(USet *set); - -/** - * Clone the set and make the clone mutable. - * See the ICU4J Freezable interface for details. - * @param set the set - * @return the mutable clone - * @see uset_freeze - * @see uset_isFrozen - * @see uset_clone - * @stable ICU 3.8 - */ -U_STABLE USet * U_EXPORT2 -uset_cloneAsThawed(const USet *set); - -/** - * Causes the USet object to represent the range start - end. - * If start > end then this USet is set to an empty range. - * A frozen set will not be modified. - * @param set the object to set to the given range - * @param start first character in the set, inclusive - * @param end last character in the set, inclusive - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_set(USet* set, - UChar32 start, UChar32 end); - -/** - * Modifies the set to represent the set specified by the given - * pattern. See the UnicodeSet class description for the syntax of - * the pattern language. See also the User Guide chapter about UnicodeSet. - * Empties the set passed before applying the pattern. - * A frozen set will not be modified. - * @param set The set to which the pattern is to be applied. - * @param pattern A pointer to UChar string specifying what characters are in the set. - * The character at pattern[0] must be a '['. - * @param patternLength The length of the UChar string. -1 if NUL terminated. - * @param options A bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param status Returns an error if the pattern cannot be parsed. - * @return Upon successful parse, the value is either - * the index of the character after the closing ']' - * of the parsed pattern. - * If the status code indicates failure, then the return value - * is the index of the error in the source. - * - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uset_applyPattern(USet *set, - const UChar *pattern, int32_t patternLength, - uint32_t options, - UErrorCode *status); - -/** - * Modifies the set to contain those code points which have the given value - * for the given binary or enumerated property, as returned by - * u_getIntPropertyValue. Prior contents of this set are lost. - * A frozen set will not be modified. - * - * @param set the object to contain the code points defined by the property - * - * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 - * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 - * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. - * - * @param value a value in the range u_getIntPropertyMinValue(prop).. - * u_getIntPropertyMaxValue(prop), with one exception. If prop is - * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but - * rather a mask value produced by U_GET_GC_MASK(). This allows grouped - * categories such as [:L:] to be represented. - * - * @param ec error code input/output parameter - * - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_applyIntPropertyValue(USet* set, - UProperty prop, int32_t value, UErrorCode* ec); - -/** - * Modifies the set to contain those code points which have the - * given value for the given property. Prior contents of this - * set are lost. - * A frozen set will not be modified. - * - * @param set the object to contain the code points defined by the given - * property and value alias - * - * @param prop a string specifying a property alias, either short or long. - * The name is matched loosely. See PropertyAliases.txt for names and a - * description of loose matching. If the value string is empty, then this - * string is interpreted as either a General_Category value alias, a Script - * value alias, a binary property alias, or a special ID. Special IDs are - * matched loosely and correspond to the following sets: - * - * "ANY" = [\\u0000-\\U0010FFFF], - * "ASCII" = [\\u0000-\\u007F], - * "Assigned" = [:^Cn:]. - * - * @param propLength the length of the prop, or -1 if NULL - * - * @param value a string specifying a value alias, either short or long. - * The name is matched loosely. See PropertyValueAliases.txt for names - * and a description of loose matching. In addition to aliases listed, - * numeric values and canonical combining classes may be expressed - * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string - * may also be empty. - * - * @param valueLength the length of the value, or -1 if NULL - * - * @param ec error code input/output parameter - * - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_applyPropertyAlias(USet* set, - const UChar *prop, int32_t propLength, - const UChar *value, int32_t valueLength, - UErrorCode* ec); - -/** - * Return true if the given position, in the given pattern, appears - * to be the start of a UnicodeSet pattern. - * - * @param pattern a string specifying the pattern - * @param patternLength the length of the pattern, or -1 if NULL - * @param pos the given position - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_resemblesPattern(const UChar *pattern, int32_t patternLength, - int32_t pos); - -/** - * Returns a string representation of this set. If the result of - * calling this function is passed to a uset_openPattern(), it - * will produce another set that is equal to this one. - * @param set the set - * @param result the string to receive the rules, may be NULL - * @param resultCapacity the capacity of result, may be 0 if result is NULL - * @param escapeUnprintable if TRUE then convert unprintable - * character to their hex escape representations, \\uxxxx or - * \\Uxxxxxxxx. Unprintable characters are those other than - * U+000A, U+0020..U+007E. - * @param ec error code. - * @return length of string, possibly larger than resultCapacity - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_toPattern(const USet* set, - UChar* result, int32_t resultCapacity, - UBool escapeUnprintable, - UErrorCode* ec); - -/** - * Adds the given character to the given USet. After this call, - * uset_contains(set, c) will return TRUE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param c the character to add - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_add(USet* set, UChar32 c); - -/** - * Adds all of the elements in the specified set to this set if - * they're not already present. This operation effectively - * modifies this set so that its value is the union of the two - * sets. The behavior of this operation is unspecified if the specified - * collection is modified while the operation is in progress. - * A frozen set will not be modified. - * - * @param set the object to which to add the set - * @param additionalSet the source set whose elements are to be added to this set. - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uset_addAll(USet* set, const USet *additionalSet); - -/** - * Adds the given range of characters to the given USet. After this call, - * uset_contains(set, start, end) will return TRUE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param start the first character of the range to add, inclusive - * @param end the last character of the range to add, inclusive - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uset_addRange(USet* set, UChar32 start, UChar32 end); - -/** - * Adds the given string to the given USet. After this call, - * uset_containsString(set, str, strLen) will return TRUE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param str the string to add - * @param strLen the length of the string or -1 if null terminated. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_addString(USet* set, const UChar* str, int32_t strLen); - -/** - * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param str the source string - * @param strLen the length of the string or -1 if null terminated. - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen); - -/** - * Removes the given character from the given USet. After this call, - * uset_contains(set, c) will return FALSE. - * A frozen set will not be modified. - * @param set the object from which to remove the character - * @param c the character to remove - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_remove(USet* set, UChar32 c); - -/** - * Removes the given range of characters from the given USet. After this call, - * uset_contains(set, start, end) will return FALSE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param start the first character of the range to remove, inclusive - * @param end the last character of the range to remove, inclusive - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uset_removeRange(USet* set, UChar32 start, UChar32 end); - -/** - * Removes the given string to the given USet. After this call, - * uset_containsString(set, str, strLen) will return FALSE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param str the string to remove - * @param strLen the length of the string or -1 if null terminated. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_removeString(USet* set, const UChar* str, int32_t strLen); - -/** - * Removes from this set all of its elements that are contained in the - * specified set. This operation effectively modifies this - * set so that its value is the asymmetric set difference of - * the two sets. - * A frozen set will not be modified. - * @param set the object from which the elements are to be removed - * @param removeSet the object that defines which elements will be - * removed from this set - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_removeAll(USet* set, const USet* removeSet); - -/** - * Retain only the elements in this set that are contained in the - * specified range. If start > end then an empty range is - * retained, leaving the set empty. This is equivalent to - * a boolean logic AND, or a set INTERSECTION. - * A frozen set will not be modified. - * - * @param set the object for which to retain only the specified range - * @param start first character, inclusive, of range to be retained - * to this set. - * @param end last character, inclusive, of range to be retained - * to this set. - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_retain(USet* set, UChar32 start, UChar32 end); - -/** - * Retains only the elements in this set that are contained in the - * specified set. In other words, removes from this set all of - * its elements that are not contained in the specified set. This - * operation effectively modifies this set so that its value is - * the intersection of the two sets. - * A frozen set will not be modified. - * - * @param set the object on which to perform the retain - * @param retain set that defines which elements this set will retain - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_retainAll(USet* set, const USet* retain); - -/** - * Reallocate this objects internal structures to take up the least - * possible space, without changing this object's value. - * A frozen set will not be modified. - * - * @param set the object on which to perfrom the compact - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_compact(USet* set); - -/** - * Inverts this set. This operation modifies this set so that - * its value is its complement. This operation does not affect - * the multicharacter strings, if any. - * A frozen set will not be modified. - * @param set the set - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_complement(USet* set); - -/** - * Complements in this set all elements contained in the specified - * set. Any character in the other set will be removed if it is - * in this set, or will be added if it is not in this set. - * A frozen set will not be modified. - * - * @param set the set with which to complement - * @param complement set that defines which elements will be xor'ed - * from this set. - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_complementAll(USet* set, const USet* complement); - -/** - * Removes all of the elements from this set. This set will be - * empty after this call returns. - * A frozen set will not be modified. - * @param set the set - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_clear(USet* set); - -/** - * Close this set over the given attribute. For the attribute - * USET_CASE, the result is to modify this set so that: - * - * 1. For each character or string 'a' in this set, all strings or - * characters 'b' such that foldCase(a) == foldCase(b) are added - * to this set. - * - * 2. For each string 'e' in the resulting set, if e != - * foldCase(e), 'e' will be removed. - * - * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] - * - * (Here foldCase(x) refers to the operation u_strFoldCase, and a - * == b denotes that the contents are the same, not pointer - * comparison.) - * - * A frozen set will not be modified. - * - * @param set the set - * - * @param attributes bitmask for attributes to close over. - * Currently only the USET_CASE bit is supported. Any undefined bits - * are ignored. - * @stable ICU 4.2 - */ -U_STABLE void U_EXPORT2 -uset_closeOver(USet* set, int32_t attributes); - -/** - * Remove all strings from this set. - * - * @param set the set - * @stable ICU 4.2 - */ -U_STABLE void U_EXPORT2 -uset_removeAllStrings(USet* set); - -/** - * Returns TRUE if the given USet contains no characters and no - * strings. - * @param set the set - * @return true if set is empty - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_isEmpty(const USet* set); - -/** - * Returns TRUE if the given USet contains the given character. - * This function works faster with a frozen set. - * @param set the set - * @param c The codepoint to check for within the set - * @return true if set contains c - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_contains(const USet* set, UChar32 c); - -/** - * Returns TRUE if the given USet contains all characters c - * where start <= c && c <= end. - * @param set the set - * @param start the first character of the range to test, inclusive - * @param end the last character of the range to test, inclusive - * @return TRUE if set contains the range - * @stable ICU 2.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsRange(const USet* set, UChar32 start, UChar32 end); - -/** - * Returns TRUE if the given USet contains the given string. - * @param set the set - * @param str the string - * @param strLen the length of the string or -1 if null terminated. - * @return true if set contains str - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_containsString(const USet* set, const UChar* str, int32_t strLen); - -/** - * Returns the index of the given character within this set, where - * the set is ordered by ascending code point. If the character - * is not in this set, return -1. The inverse of this method is - * charAt(). - * @param set the set - * @param c the character to obtain the index for - * @return an index from 0..size()-1, or -1 - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uset_indexOf(const USet* set, UChar32 c); - -/** - * Returns the character at the given index within this set, where - * the set is ordered by ascending code point. If the index is - * out of range, return (UChar32)-1. The inverse of this method is - * indexOf(). - * @param set the set - * @param charIndex an index from 0..size()-1 to obtain the char for - * @return the character at the given index, or (UChar32)-1. - * @stable ICU 3.2 - */ -U_STABLE UChar32 U_EXPORT2 -uset_charAt(const USet* set, int32_t charIndex); - -/** - * Returns the number of characters and strings contained in the given - * USet. - * @param set the set - * @return a non-negative integer counting the characters and strings - * contained in set - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_size(const USet* set); - -/** - * Returns the number of items in this set. An item is either a range - * of characters or a single multicharacter string. - * @param set the set - * @return a non-negative integer counting the character ranges - * and/or strings contained in set - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_getItemCount(const USet* set); - -/** - * Returns an item of this set. An item is either a range of - * characters or a single multicharacter string. - * @param set the set - * @param itemIndex a non-negative integer in the range 0.. - * uset_getItemCount(set)-1 - * @param start pointer to variable to receive first character - * in range, inclusive - * @param end pointer to variable to receive last character in range, - * inclusive - * @param str buffer to receive the string, may be NULL - * @param strCapacity capacity of str, or 0 if str is NULL - * @param ec error code - * @return the length of the string (>= 2), or 0 if the item is a - * range, in which case it is the range *start..*end, or -1 if - * itemIndex is out of range - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_getItem(const USet* set, int32_t itemIndex, - UChar32* start, UChar32* end, - UChar* str, int32_t strCapacity, - UErrorCode* ec); - -/** - * Returns true if set1 contains all the characters and strings - * of set2. It answers the question, 'Is set1 a superset of set2?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsAll(const USet* set1, const USet* set2); - -/** - * Returns true if this set contains all the characters - * of the given string. This is does not check containment of grapheme - * clusters, like uset_containsString. - * @param set set of characters to be checked for containment - * @param str string containing codepoints to be checked for containment - * @param strLen the length of the string or -1 if null terminated. - * @return true if the test condition is met - * @stable ICU 3.4 - */ -U_STABLE UBool U_EXPORT2 -uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen); - -/** - * Returns true if set1 contains none of the characters and strings - * of set2. It answers the question, 'Is set1 a disjoint set of set2?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsNone(const USet* set1, const USet* set2); - -/** - * Returns true if set1 contains some of the characters and strings - * of set2. It answers the question, 'Does set1 and set2 have an intersection?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsSome(const USet* set1, const USet* set2); - -/** - * Returns the length of the initial substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Similar to the strspn() C library function. - * Unpaired surrogates are treated according to contains() of their surrogate code points. - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the length of the initial substring according to the spanCondition; - * 0 if the start of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns the start of the trailing substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Unpaired surrogates are treated according to contains() of their surrogate code points. - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the start of the trailing substring according to the spanCondition; - * the string length if the end of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns the length of the initial substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Similar to the strspn() C library function. - * Malformed byte sequences are treated according to contains(0xfffd). - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string (UTF-8) - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the length of the initial substring according to the spanCondition; - * 0 if the start of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns the start of the trailing substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Malformed byte sequences are treated according to contains(0xfffd). - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string (UTF-8) - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the start of the trailing substring according to the spanCondition; - * the string length if the end of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns true if set1 contains all of the characters and strings - * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_equals(const USet* set1, const USet* set2); - -/********************************************************************* - * Serialized set API - *********************************************************************/ - -/** - * Serializes this set into an array of 16-bit integers. Serialization - * (currently) only records the characters in the set; multicharacter - * strings are ignored. - * - * The array - * has following format (each line is one 16-bit integer): - * - * length = (n+2*m) | (m!=0?0x8000:0) - * bmpLength = n; present if m!=0 - * bmp[0] - * bmp[1] - * ... - * bmp[n-1] - * supp-high[0] - * supp-low[0] - * supp-high[1] - * supp-low[1] - * ... - * supp-high[m-1] - * supp-low[m-1] - * - * The array starts with a header. After the header are n bmp - * code points, then m supplementary code points. Either n or m - * or both may be zero. n+2*m is always <= 0x7FFF. - * - * If there are no supplementary characters (if m==0) then the - * header is one 16-bit integer, 'length', with value n. - * - * If there are supplementary characters (if m!=0) then the header - * is two 16-bit integers. The first, 'length', has value - * (n+2*m)|0x8000. The second, 'bmpLength', has value n. - * - * After the header the code points are stored in ascending order. - * Supplementary code points are stored as most significant 16 - * bits followed by least significant 16 bits. - * - * @param set the set - * @param dest pointer to buffer of destCapacity 16-bit integers. - * May be NULL only if destCapacity is zero. - * @param destCapacity size of dest, or zero. Must not be negative. - * @param pErrorCode pointer to the error code. Will be set to - * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to - * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. - * @return the total length of the serialized format, including - * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other - * than U_BUFFER_OVERFLOW_ERROR. - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode); - -/** - * Given a serialized array, fill in the given serialized set object. - * @param fillSet pointer to result - * @param src pointer to start of array - * @param srcLength length of array - * @return true if the given array is valid, otherwise false - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength); - -/** - * Set the USerializedSet to contain the given character (and nothing - * else). - * @param fillSet pointer to result - * @param c The codepoint to set - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c); - -/** - * Returns TRUE if the given USerializedSet contains the given - * character. - * @param set the serialized set - * @param c The codepoint to check for within the set - * @return true if set contains c - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_serializedContains(const USerializedSet* set, UChar32 c); - -/** - * Returns the number of disjoint ranges of characters contained in - * the given serialized set. Ignores any strings contained in the - * set. - * @param set the serialized set - * @return a non-negative integer counting the character ranges - * contained in set - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_getSerializedRangeCount(const USerializedSet* set); - -/** - * Returns a range of characters contained in the given serialized - * set. - * @param set the serialized set - * @param rangeIndex a non-negative integer in the range 0.. - * uset_getSerializedRangeCount(set)-1 - * @param pStart pointer to variable to receive first character - * in range, inclusive - * @param pEnd pointer to variable to receive last character in range, - * inclusive - * @return true if rangeIndex is valid, otherwise false - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, - UChar32* pStart, UChar32* pEnd); - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/usetiter.h b/deps/node/deps/icu-small/source/common/unicode/usetiter.h deleted file mode 100644 index 057adbc0..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/usetiter.h +++ /dev/null @@ -1,320 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ -#ifndef USETITER_H -#define USETITER_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/unistr.h" - -/** - * \file - * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. - */ - -U_NAMESPACE_BEGIN - -class UnicodeSet; -class UnicodeString; - -/** - * - * UnicodeSetIterator iterates over the contents of a UnicodeSet. It - * iterates over either code points or code point ranges. After all - * code points or ranges have been returned, it returns the - * multicharacter strings of the UnicodeSet, if any. - * - * This class is not intended to be subclassed. Consider any fields - * or methods declared as "protected" to be private. The use of - * protected in this class is an artifact of history. - * - *

To iterate over code points and strings, use a loop like this: - *

- * UnicodeSetIterator it(set);
- * while (it.next()) {
- *     processItem(it.getString());
- * }
- * 
- *

Each item in the set is accessed as a string. Set elements - * consisting of single code points are returned as strings containing - * just the one code point. - * - *

To iterate over code point ranges, instead of individual code points, - * use a loop like this: - *

- * UnicodeSetIterator it(set);
- * while (it.nextRange()) {
- *   if (it.isString()) {
- *     processString(it.getString());
- *   } else {
- *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
- *   }
- * }
- * 
- * @author M. Davis - * @stable ICU 2.4 - */ -class U_COMMON_API UnicodeSetIterator : public UObject { - - protected: - - /** - * Value of codepoint if the iterator points to a string. - * If codepoint == IS_STRING, then examine - * string for the current iteration result. - * @stable ICU 2.4 - */ - enum { IS_STRING = -1 }; - - /** - * Current code point, or the special value IS_STRING, if - * the iterator points to a string. - * @stable ICU 2.4 - */ - UChar32 codepoint; - - /** - * When iterating over ranges using nextRange(), - * codepointEnd contains the inclusive end of the - * iteration range, if codepoint != IS_STRING. If - * iterating over code points using next(), or if - * codepoint == IS_STRING, then the value of - * codepointEnd is undefined. - * @stable ICU 2.4 - */ - UChar32 codepointEnd; - - /** - * If codepoint == IS_STRING, then string points - * to the current string. If codepoint != IS_STRING, the - * value of string is undefined. - * @stable ICU 2.4 - */ - const UnicodeString* string; - - public: - - /** - * Create an iterator over the given set. The iterator is valid - * only so long as set is valid. - * @param set set to iterate over - * @stable ICU 2.4 - */ - UnicodeSetIterator(const UnicodeSet& set); - - /** - * Create an iterator over nothing. next() and - * nextRange() return false. This is a convenience - * constructor allowing the target to be set later. - * @stable ICU 2.4 - */ - UnicodeSetIterator(); - - /** - * Destructor. - * @stable ICU 2.4 - */ - virtual ~UnicodeSetIterator(); - - /** - * Returns true if the current element is a string. If so, the - * caller can retrieve it with getString(). If this - * method returns false, the current element is a code point or - * code point range, depending on whether next() or - * nextRange() was called. - * Elements of types string and codepoint can both be retrieved - * with the function getString(). - * Elements of type codepoint can also be retrieved with - * getCodepoint(). - * For ranges, getCodepoint() returns the starting codepoint - * of the range, and getCodepointEnd() returns the end - * of the range. - * @stable ICU 2.4 - */ - inline UBool isString() const; - - /** - * Returns the current code point, if isString() returned - * false. Otherwise returns an undefined result. - * @stable ICU 2.4 - */ - inline UChar32 getCodepoint() const; - - /** - * Returns the end of the current code point range, if - * isString() returned false and nextRange() was - * called. Otherwise returns an undefined result. - * @stable ICU 2.4 - */ - inline UChar32 getCodepointEnd() const; - - /** - * Returns the current string, if isString() returned - * true. If the current iteration item is a code point, a UnicodeString - * containing that single code point is returned. - * - * Ownership of the returned string remains with the iterator. - * The string is guaranteed to remain valid only until the iterator is - * advanced to the next item, or until the iterator is deleted. - * - * @stable ICU 2.4 - */ - const UnicodeString& getString(); - - /** - * Advances the iteration position to the next element in the set, - * which can be either a single code point or a string. - * If there are no more elements in the set, return false. - * - *

- * If isString() == TRUE, the value is a - * string, otherwise the value is a - * single code point. Elements of either type can be retrieved - * with the function getString(), while elements of - * consisting of a single code point can be retrieved with - * getCodepoint() - * - *

The order of iteration is all code points in sorted order, - * followed by all strings sorted order. Do not mix - * calls to next() and nextRange() without - * calling reset() between them. The results of doing so - * are undefined. - * - * @return true if there was another element in the set. - * @stable ICU 2.4 - */ - UBool next(); - - /** - * Returns the next element in the set, either a code point range - * or a string. If there are no more elements in the set, return - * false. If isString() == TRUE, the value is a - * string and can be accessed with getString(). Otherwise the value is a - * range of one or more code points from getCodepoint() to - * getCodepointeEnd() inclusive. - * - *

The order of iteration is all code points ranges in sorted - * order, followed by all strings sorted order. Ranges are - * disjoint and non-contiguous. The value returned from getString() - * is undefined unless isString() == TRUE. Do not mix calls to - * next() and nextRange() without calling - * reset() between them. The results of doing so are - * undefined. - * - * @return true if there was another element in the set. - * @stable ICU 2.4 - */ - UBool nextRange(); - - /** - * Sets this iterator to visit the elements of the given set and - * resets it to the start of that set. The iterator is valid only - * so long as set is valid. - * @param set the set to iterate over. - * @stable ICU 2.4 - */ - void reset(const UnicodeSet& set); - - /** - * Resets this iterator to the start of the set. - * @stable ICU 2.4 - */ - void reset(); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.4 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.4 - */ - virtual UClassID getDynamicClassID() const; - - // ======================= PRIVATES =========================== - - protected: - - // endElement and nextElements are really UChar32's, but we keep - // them as signed int32_t's so we can do comparisons with - // endElement set to -1. Leave them as int32_t's. - /** The set - * @stable ICU 2.4 - */ - const UnicodeSet* set; - /** End range - * @stable ICU 2.4 - */ - int32_t endRange; - /** Range - * @stable ICU 2.4 - */ - int32_t range; - /** End element - * @stable ICU 2.4 - */ - int32_t endElement; - /** Next element - * @stable ICU 2.4 - */ - int32_t nextElement; - //UBool abbreviated; - /** Next string - * @stable ICU 2.4 - */ - int32_t nextString; - /** String count - * @stable ICU 2.4 - */ - int32_t stringCount; - - /** - * Points to the string to use when the caller asks for a - * string and the current iteration item is a code point, not a string. - * @internal - */ - UnicodeString *cpString; - - /** Copy constructor. Disallowed. - * @stable ICU 2.4 - */ - UnicodeSetIterator(const UnicodeSetIterator&); // disallow - - /** Assignment operator. Disallowed. - * @stable ICU 2.4 - */ - UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow - - /** Load range - * @stable ICU 2.4 - */ - virtual void loadRange(int32_t range); - -}; - -inline UBool UnicodeSetIterator::isString() const { - return codepoint == (UChar32)IS_STRING; -} - -inline UChar32 UnicodeSetIterator::getCodepoint() const { - return codepoint; -} - -inline UChar32 UnicodeSetIterator::getCodepointEnd() const { - return codepointEnd; -} - - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ushape.h b/deps/node/deps/icu-small/source/common/unicode/ushape.h deleted file mode 100644 index 3064e085..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ushape.h +++ /dev/null @@ -1,476 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2000-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: ushape.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2000jun29 -* created by: Markus W. Scherer -*/ - -#ifndef __USHAPE_H__ -#define __USHAPE_H__ - -#include "unicode/utypes.h" - -/** - * \file - * \brief C API: Arabic shaping - * - */ - -/** - * Shape Arabic text on a character basis. - * - *

This function performs basic operations for "shaping" Arabic text. It is most - * useful for use with legacy data formats and legacy display technology - * (simple terminals). All operations are performed on Unicode characters.

- * - *

Text-based shaping means that some character code points in the text are - * replaced by others depending on the context. It transforms one kind of text - * into another. In comparison, modern displays for Arabic text select - * appropriate, context-dependent font glyphs for each text element, which means - * that they transform text into a glyph vector.

- * - *

Text transformations are necessary when modern display technology is not - * available or when text needs to be transformed to or from legacy formats that - * use "shaped" characters. Since the Arabic script is cursive, connecting - * adjacent letters to each other, computers select images for each letter based - * on the surrounding letters. This usually results in four images per Arabic - * letter: initial, middle, final, and isolated forms. In Unicode, on the other - * hand, letters are normally stored abstract, and a display system is expected - * to select the necessary glyphs. (This makes searching and other text - * processing easier because the same letter has only one code.) It is possible - * to mimic this with text transformations because there are characters in - * Unicode that are rendered as letters with a specific shape - * (or cursive connectivity). They were included for interoperability with - * legacy systems and codepages, and for unsophisticated display systems.

- * - *

A second kind of text transformations is supported for Arabic digits: - * For compatibility with legacy codepages that only include European digits, - * it is possible to replace one set of digits by another, changing the - * character code points. These operations can be performed for either - * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic - * digits (U+06f0...U+06f9).

- * - *

Some replacements may result in more or fewer characters (code points). - * By default, this means that the destination buffer may receive text with a - * length different from the source length. Some legacy systems rely on the - * length of the text to be constant. They expect extra spaces to be added - * or consumed either next to the affected character or at the end of the - * text.

- * - *

For details about the available operations, see the description of the - * U_SHAPE_... options.

- * - * @param source The input text. - * - * @param sourceLength The number of UChars in source. - * - * @param dest The destination buffer that will receive the results of the - * requested operations. It may be NULL only if - * destSize is 0. The source and destination must not - * overlap. - * - * @param destSize The size (capacity) of the destination buffer in UChars. - * If destSize is 0, then no output is produced, - * but the necessary buffer size is returned ("preflighting"). - * - * @param options This is a 32-bit set of flags that specify the operations - * that are performed on the input text. If no error occurs, - * then the result will always be written to the destination - * buffer. - * - * @param pErrorCode must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @return The number of UChars written to the destination buffer. - * If an error occurred, then no output was written, or it may be - * incomplete. If U_BUFFER_OVERFLOW_ERROR is set, then - * the return value indicates the necessary destination buffer size. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_shapeArabic(const UChar *source, int32_t sourceLength, - UChar *dest, int32_t destSize, - uint32_t options, - UErrorCode *pErrorCode); - -/** - * Memory option: allow the result to have a different length than the source. - * Affects: LamAlef options - * @stable ICU 2.0 - */ -#define U_SHAPE_LENGTH_GROW_SHRINK 0 - -/** - * Memory option: allow the result to have a different length than the source. - * Affects: LamAlef options - * This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK - * @stable ICU 4.2 - */ -#define U_SHAPE_LAMALEF_RESIZE 0 - -/** - * Memory option: the result must have the same length as the source. - * If more room is necessary, then try to consume spaces next to modified characters. - * @stable ICU 2.0 - */ -#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1 - -/** - * Memory option: the result must have the same length as the source. - * If more room is necessary, then try to consume spaces next to modified characters. - * Affects: LamAlef options - * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR - * @stable ICU 4.2 - */ -#define U_SHAPE_LAMALEF_NEAR 1 - -/** - * Memory option: the result must have the same length as the source. - * If more room is necessary, then try to consume spaces at the end of the text. - * @stable ICU 2.0 - */ -#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2 - -/** - * Memory option: the result must have the same length as the source. - * If more room is necessary, then try to consume spaces at the end of the text. - * Affects: LamAlef options - * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END - * @stable ICU 4.2 - */ -#define U_SHAPE_LAMALEF_END 2 - -/** - * Memory option: the result must have the same length as the source. - * If more room is necessary, then try to consume spaces at the beginning of the text. - * @stable ICU 2.0 - */ -#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3 - -/** - * Memory option: the result must have the same length as the source. - * If more room is necessary, then try to consume spaces at the beginning of the text. - * Affects: LamAlef options - * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING - * @stable ICU 4.2 - */ -#define U_SHAPE_LAMALEF_BEGIN 3 - - -/** - * Memory option: the result must have the same length as the source. - * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end. - * If there is no space at end, use spaces at beginning of the buffer. If there - * is no space at beginning of the buffer, use spaces at the near (i.e. the space - * after the LAMALEF character). - * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) - * will be set in pErrorCode - * - * Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END. - * Affects: LamAlef options - * @stable ICU 4.2 - */ -#define U_SHAPE_LAMALEF_AUTO 0x10000 - -/** Bit mask for memory options. @stable ICU 2.0 */ -#define U_SHAPE_LENGTH_MASK 0x10003 /* Changed old value 3 */ - - -/** - * Bit mask for LamAlef memory options. - * @stable ICU 4.2 - */ -#define U_SHAPE_LAMALEF_MASK 0x10003 /* updated */ - -/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */ -#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0 - -/** - * Direction indicator: - * the source is in visual RTL order, - * the rightmost displayed character stored first. - * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL - * @stable ICU 4.2 - */ -#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL 0 - -/** - * Direction indicator: - * the source is in visual LTR order, - * the leftmost displayed character stored first. - * @stable ICU 2.0 - */ -#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4 - -/** Bit mask for direction indicators. @stable ICU 2.0 */ -#define U_SHAPE_TEXT_DIRECTION_MASK 4 - - -/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */ -#define U_SHAPE_LETTERS_NOOP 0 - -/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */ -#define U_SHAPE_LETTERS_SHAPE 8 - -/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */ -#define U_SHAPE_LETTERS_UNSHAPE 0x10 - -/** - * Letter shaping option: replace abstract letter characters by "shaped" ones. - * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters - * are always "shaped" into the isolated form instead of the medial form - * (selecting code points from the Arabic Presentation Forms-B block). - * @stable ICU 2.0 - */ -#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18 - - -/** Bit mask for letter shaping options. @stable ICU 2.0 */ -#define U_SHAPE_LETTERS_MASK 0x18 - - -/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */ -#define U_SHAPE_DIGITS_NOOP 0 - -/** - * Digit shaping option: - * Replace European digits (U+0030...) by Arabic-Indic digits. - * @stable ICU 2.0 - */ -#define U_SHAPE_DIGITS_EN2AN 0x20 - -/** - * Digit shaping option: - * Replace Arabic-Indic digits by European digits (U+0030...). - * @stable ICU 2.0 - */ -#define U_SHAPE_DIGITS_AN2EN 0x40 - -/** - * Digit shaping option: - * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent - * strongly directional character is an Arabic letter - * (u_charDirection() result U_RIGHT_TO_LEFT_ARABIC [AL]).
- * The direction of "preceding" depends on the direction indicator option. - * For the first characters, the preceding strongly directional character - * (initial state) is assumed to be not an Arabic letter - * (it is U_LEFT_TO_RIGHT [L] or U_RIGHT_TO_LEFT [R]). - * @stable ICU 2.0 - */ -#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60 - -/** - * Digit shaping option: - * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent - * strongly directional character is an Arabic letter - * (u_charDirection() result U_RIGHT_TO_LEFT_ARABIC [AL]).
- * The direction of "preceding" depends on the direction indicator option. - * For the first characters, the preceding strongly directional character - * (initial state) is assumed to be an Arabic letter. - * @stable ICU 2.0 - */ -#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80 - -/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ -#define U_SHAPE_DIGITS_RESERVED 0xa0 - -/** Bit mask for digit shaping options. @stable ICU 2.0 */ -#define U_SHAPE_DIGITS_MASK 0xe0 - - -/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */ -#define U_SHAPE_DIGIT_TYPE_AN 0 - -/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */ -#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100 - -/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ -#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200 - -/** Bit mask for digit type options. @stable ICU 2.0 */ -#define U_SHAPE_DIGIT_TYPE_MASK 0x300 /* I need to change this from 0x3f00 to 0x300 */ - -/** - * Tashkeel aggregation option: - * Replaces any combination of U+0651 with one of - * U+064C, U+064D, U+064E, U+064F, U+0650 with - * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively. - * @stable ICU 3.6 - */ -#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000 -/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */ -#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0 -/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */ -#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000 - -/** - * Presentation form option: - * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B - * characters with 0+06xx characters, before shaping. - * @stable ICU 3.6 - */ -#define U_SHAPE_PRESERVE_PRESENTATION 0x8000 -/** Presentation form option: - * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with - * their unshaped correspondants in range 0+06xx, before shaping. - * @stable ICU 3.6 - */ -#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0 -/** Bit mask for preserve presentation form. @stable ICU 3.6 */ -#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000 - -/* Seen Tail option */ -/** - * Memory option: the result must have the same length as the source. - * Shaping mode: The SEEN family character will expand into two characters using space near - * the SEEN family character(i.e. the space after the character). - * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) - * will be set in pErrorCode - * - * De-shaping mode: Any Seen character followed by Tail character will be - * replaced by one cell Seen and a space will replace the Tail. - * Affects: Seen options - * @stable ICU 4.2 - */ -#define U_SHAPE_SEEN_TWOCELL_NEAR 0x200000 - -/** - * Bit mask for Seen memory options. - * @stable ICU 4.2 - */ -#define U_SHAPE_SEEN_MASK 0x700000 - -/* YehHamza option */ -/** - * Memory option: the result must have the same length as the source. - * Shaping mode: The YEHHAMZA character will expand into two characters using space near it - * (i.e. the space after the character - * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) - * will be set in pErrorCode - * - * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be - * replaced by one cell YehHamza and space will replace the Hamza. - * Affects: YehHamza options - * @stable ICU 4.2 - */ -#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR 0x1000000 - - -/** - * Bit mask for YehHamza memory options. - * @stable ICU 4.2 - */ -#define U_SHAPE_YEHHAMZA_MASK 0x3800000 - -/* New Tashkeel options */ -/** - * Memory option: the result must have the same length as the source. - * Shaping mode: Tashkeel characters will be replaced by spaces. - * Spaces will be placed at beginning of the buffer - * - * De-shaping mode: N/A - * Affects: Tashkeel options - * @stable ICU 4.2 - */ -#define U_SHAPE_TASHKEEL_BEGIN 0x40000 - -/** - * Memory option: the result must have the same length as the source. - * Shaping mode: Tashkeel characters will be replaced by spaces. - * Spaces will be placed at end of the buffer - * - * De-shaping mode: N/A - * Affects: Tashkeel options - * @stable ICU 4.2 - */ -#define U_SHAPE_TASHKEEL_END 0x60000 - -/** - * Memory option: allow the result to have a different length than the source. - * Shaping mode: Tashkeel characters will be removed, buffer length will shrink. - * De-shaping mode: N/A - * - * Affect: Tashkeel options - * @stable ICU 4.2 - */ -#define U_SHAPE_TASHKEEL_RESIZE 0x80000 - -/** - * Memory option: the result must have the same length as the source. - * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent - * characters (i.e. shaped on Tatweel) or replaced by space if it is not connected. - * - * De-shaping mode: N/A - * Affects: YehHamza options - * @stable ICU 4.2 - */ -#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL 0xC0000 - -/** - * Bit mask for Tashkeel replacement with Space or Tatweel memory options. - * @stable ICU 4.2 - */ -#define U_SHAPE_TASHKEEL_MASK 0xE0000 - - -/* Space location Control options */ -/** - * This option affect the meaning of BEGIN and END options. if this option is not used the default - * for BEGIN and END will be as following: - * The Default (for both Visual LTR, Visual RTL and Logical Text) - * 1. BEGIN always refers to the start address of physical memory. - * 2. END always refers to the end address of physical memory. - * - * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text. - * - * The effect on BEGIN and END Memory Options will be as following: - * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text( - * corresponding to the physical memory address end for Visual LTR text, Same as END in - * default behavior) - * B. BEGIN For Logical text: Same as BEGIN in default behavior. - * C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding - * to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior. - * D. END For Logical text: Same as END in default behavior). - * Affects: All LamAlef BEGIN, END and AUTO options. - * @stable ICU 4.2 - */ -#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000 - -/** - * Bit mask for swapping BEGIN and END for Visual LTR text - * @stable ICU 4.2 - */ -#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK 0x4000000 - -/** - * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73). - * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B) - * De-shaping will not use this option as it will always search for both the new Unicode code point for the - * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the - * Seen-Family letter accordingly. - * - * Shaping Mode: Only shaping. - * De-shaping Mode: N/A. - * Affects: All Seen options - * @stable ICU 4.8 - */ -#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000 - -/** - * Bit mask for new Unicode Tail option - * @stable ICU 4.8 - */ -#define U_SHAPE_TAIL_TYPE_MASK 0x8000000 - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/usprep.h b/deps/node/deps/icu-small/source/common/unicode/usprep.h deleted file mode 100644 index 7cdc6cdd..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/usprep.h +++ /dev/null @@ -1,271 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * - * Copyright (C) 2003-2014, International Business Machines - * Corporation and others. All Rights Reserved. - * - ******************************************************************************* - * file name: usprep.h - * encoding: UTF-8 - * tab size: 8 (not used) - * indentation:4 - * - * created on: 2003jul2 - * created by: Ram Viswanadha - */ - -#ifndef __USPREP_H__ -#define __USPREP_H__ - -/** - * \file - * \brief C API: Implements the StringPrep algorithm. - */ - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" - -/** - * - * StringPrep API implements the StingPrep framework as described by RFC 3454. - * StringPrep prepares Unicode strings for use in network protocols. - * Profiles of StingPrep are set of rules and data according to with the - * Unicode Strings are prepared. Each profiles contains tables which describe - * how a code point should be treated. The tables are broadly classified into - *
    - *
  • Unassigned Table: Contains code points that are unassigned - * in the Unicode Version supported by StringPrep. Currently - * RFC 3454 supports Unicode 3.2.
  • - *
  • Prohibited Table: Contains code points that are prohibited from - * the output of the StringPrep processing function.
  • - *
  • Mapping Table: Contains code points that are deleted from the output or case mapped.
  • - *
- * - * The procedure for preparing Unicode strings: - *
    - *
  1. Map: For each character in the input, check if it has a mapping - * and, if so, replace it with its mapping.
  2. - *
  3. Normalize: Possibly normalize the result of step 1 using Unicode - * normalization.
  4. - *
  5. Prohibit: Check for any characters that are not allowed in the - * output. If any are found, return an error.
  6. - *
  7. Check bidi: Possibly check for right-to-left characters, and if - * any are found, make sure that the whole string satisfies the - * requirements for bidirectional strings. If the string does not - * satisfy the requirements for bidirectional strings, return an - * error.
  8. - *
- * @author Ram Viswanadha - */ -#if !UCONFIG_NO_IDNA - -#include "unicode/parseerr.h" - -/** - * The StringPrep profile - * @stable ICU 2.8 - */ -typedef struct UStringPrepProfile UStringPrepProfile; - - -/** - * Option to prohibit processing of unassigned code points in the input - * - * @see usprep_prepare - * @stable ICU 2.8 - */ -#define USPREP_DEFAULT 0x0000 - -/** - * Option to allow processing of unassigned code points in the input - * - * @see usprep_prepare - * @stable ICU 2.8 - */ -#define USPREP_ALLOW_UNASSIGNED 0x0001 - -/** - * enums for the standard stringprep profile types - * supported by usprep_openByType. - * @see usprep_openByType - * @stable ICU 4.2 - */ -typedef enum UStringPrepProfileType { - /** - * RFC3491 Nameprep - * @stable ICU 4.2 - */ - USPREP_RFC3491_NAMEPREP, - /** - * RFC3530 nfs4_cs_prep - * @stable ICU 4.2 - */ - USPREP_RFC3530_NFS4_CS_PREP, - /** - * RFC3530 nfs4_cs_prep with case insensitive option - * @stable ICU 4.2 - */ - USPREP_RFC3530_NFS4_CS_PREP_CI, - /** - * RFC3530 nfs4_cis_prep - * @stable ICU 4.2 - */ - USPREP_RFC3530_NFS4_CIS_PREP, - /** - * RFC3530 nfs4_mixed_prep for prefix - * @stable ICU 4.2 - */ - USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX, - /** - * RFC3530 nfs4_mixed_prep for suffix - * @stable ICU 4.2 - */ - USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX, - /** - * RFC3722 iSCSI - * @stable ICU 4.2 - */ - USPREP_RFC3722_ISCSI, - /** - * RFC3920 XMPP Nodeprep - * @stable ICU 4.2 - */ - USPREP_RFC3920_NODEPREP, - /** - * RFC3920 XMPP Resourceprep - * @stable ICU 4.2 - */ - USPREP_RFC3920_RESOURCEPREP, - /** - * RFC4011 Policy MIB Stringprep - * @stable ICU 4.2 - */ - USPREP_RFC4011_MIB, - /** - * RFC4013 SASLprep - * @stable ICU 4.2 - */ - USPREP_RFC4013_SASLPREP, - /** - * RFC4505 trace - * @stable ICU 4.2 - */ - USPREP_RFC4505_TRACE, - /** - * RFC4518 LDAP - * @stable ICU 4.2 - */ - USPREP_RFC4518_LDAP, - /** - * RFC4518 LDAP for case ignore, numeric and stored prefix - * matching rules - * @stable ICU 4.2 - */ - USPREP_RFC4518_LDAP_CI -} UStringPrepProfileType; - -/** - * Creates a StringPrep profile from the data file. - * - * @param path string containing the full path pointing to the directory - * where the profile reside followed by the package name - * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system. - * if NULL, ICU default data files will be used. - * @param fileName name of the profile file to be opened - * @param status ICU error code in/out parameter. Must not be NULL. - * Must fulfill U_SUCCESS before the function call. - * @return Pointer to UStringPrepProfile that is opened. Should be closed by - * calling usprep_close() - * @see usprep_close() - * @stable ICU 2.8 - */ -U_STABLE UStringPrepProfile* U_EXPORT2 -usprep_open(const char* path, - const char* fileName, - UErrorCode* status); - -/** - * Creates a StringPrep profile for the specified profile type. - * - * @param type The profile type - * @param status ICU error code in/out parameter. Must not be NULL. - * Must fulfill U_SUCCESS before the function call. - * @return Pointer to UStringPrepProfile that is opened. Should be closed by - * calling usprep_close() - * @see usprep_close() - * @stable ICU 4.2 - */ -U_STABLE UStringPrepProfile* U_EXPORT2 -usprep_openByType(UStringPrepProfileType type, - UErrorCode* status); - -/** - * Closes the profile - * @param profile The profile to close - * @stable ICU 2.8 - */ -U_STABLE void U_EXPORT2 -usprep_close(UStringPrepProfile* profile); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUStringPrepProfilePointer - * "Smart pointer" class, closes a UStringPrepProfile via usprep_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close); - -U_NAMESPACE_END - -#endif - -/** - * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC), - * checks for prohibited and BiDi characters in the order defined by RFC 3454 - * depending on the options specified in the profile. - * - * @param prep The profile to use - * @param src Pointer to UChar buffer containing the string to prepare - * @param srcLength Number of characters in the source string - * @param dest Pointer to the destination buffer to receive the output - * @param destCapacity The capacity of destination array - * @param options A bit set of options: - * - * - USPREP_DEFAULT Prohibit processing of unassigned code points in the input - * - * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input - * as normal Unicode code points. - * - * @param parseError Pointer to UParseError struct to receive information on position - * of error if an error is encountered. Can be NULL. - * @param status ICU in/out error code parameter. - * U_INVALID_CHAR_FOUND if src contains - * unmatched single surrogates. - * U_INDEX_OUTOFBOUNDS_ERROR if src contains - * too many code points. - * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough - * @return The number of UChars in the destination buffer - * @stable ICU 2.8 - */ - -U_STABLE int32_t U_EXPORT2 -usprep_prepare( const UStringPrepProfile* prep, - const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status ); - - -#endif /* #if !UCONFIG_NO_IDNA */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ustring.h b/deps/node/deps/icu-small/source/common/unicode/ustring.h deleted file mode 100644 index 0d2274a0..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ustring.h +++ /dev/null @@ -1,1689 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1998-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File ustring.h -* -* Modification History: -* -* Date Name Description -* 12/07/98 bertrand Creation. -****************************************************************************** -*/ - -#ifndef USTRING_H -#define USTRING_H - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/uiter.h" - -/** - * \def UBRK_TYPEDEF_UBREAK_ITERATOR - * @internal - */ - -#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR -# define UBRK_TYPEDEF_UBREAK_ITERATOR -/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/ - typedef struct UBreakIterator UBreakIterator; -#endif - -/** - * \file - * \brief C API: Unicode string handling functions - * - * These C API functions provide general Unicode string handling. - * - * Some functions are equivalent in name, signature, and behavior to the ANSI C - * functions. (For example, they do not check for bad arguments like NULL string pointers.) - * In some cases, only the thread-safe variant of such a function is implemented here - * (see u_strtok_r()). - * - * Other functions provide more Unicode-specific functionality like locale-specific - * upper/lower-casing and string comparison in code point order. - * - * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. - * UTF-16 encodes each Unicode code point with either one or two UChar code units. - * (This is the default form of Unicode, and a forward-compatible extension of the original, - * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 - * in 1996.) - * - * Some APIs accept a 32-bit UChar32 value for a single code point. - * - * ICU also handles 16-bit Unicode text with unpaired surrogates. - * Such text is not well-formed UTF-16. - * Code-point-related functions treat unpaired surrogates as surrogate code points, - * i.e., as separate units. - * - * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), - * it is much more efficient even for random access because the code unit values - * for single-unit characters vs. lead units vs. trail units are completely disjoint. - * This means that it is easy to determine character (code point) boundaries from - * random offsets in the string. - * - * Unicode (UTF-16) string processing is optimized for the single-unit case. - * Although it is important to support supplementary characters - * (which use pairs of lead/trail code units called "surrogates"), - * their occurrence is rare. Almost all characters in modern use require only - * a single UChar code unit (i.e., their code point values are <=0xffff). - * - * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html). - * For a discussion of the handling of unpaired surrogates see also - * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. - */ - -/** - * \defgroup ustring_ustrlen String Length - * \ingroup ustring_strlen - */ -/*@{*/ -/** - * Determine the length of an array of UChar. - * - * @param s The array of UChars, NULL (U+0000) terminated. - * @return The number of UChars in chars, minus the terminator. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strlen(const UChar *s); -/*@}*/ - -/** - * Count Unicode code points in the length UChar code units of the string. - * A code point may occupy either one or two UChar code units. - * Counting code points involves reading all code units. - * - * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). - * - * @param s The input string. - * @param length The number of UChar code units to be checked, or -1 to count all - * code points before the first NUL (U+0000). - * @return The number of code points in the specified code units. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_countChar32(const UChar *s, int32_t length); - -/** - * Check if the string contains more Unicode code points than a certain number. - * This is more efficient than counting all code points in the entire string - * and comparing that number with a threshold. - * This function may not need to scan the string at all if the length is known - * (not -1 for NUL-termination) and falls within a certain range, and - * never needs to count more than 'number+1' code points. - * Logically equivalent to (u_countChar32(s, length)>number). - * A Unicode code point may occupy either one or two UChar code units. - * - * @param s The input string. - * @param length The length of the string, or -1 if it is NUL-terminated. - * @param number The number of code points in the string is compared against - * the 'number' parameter. - * @return Boolean value for whether the string contains more Unicode code points - * than 'number'. Same as (u_countChar32(s, length)>number). - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number); - -/** - * Concatenate two ustrings. Appends a copy of src, - * including the null terminator, to dst. The initial copied - * character from src overwrites the null terminator in dst. - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strcat(UChar *dst, - const UChar *src); - -/** - * Concatenate two ustrings. - * Appends at most n characters from src to dst. - * Adds a terminating NUL. - * If src is too long, then only n-1 characters will be copied - * before the terminating NUL. - * If n<=0 then dst is not modified. - * - * @param dst The destination string. - * @param src The source string (can be NULL/invalid if n<=0). - * @param n The maximum number of characters to append; no-op if <=0. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strncat(UChar *dst, - const UChar *src, - int32_t n); - -/** - * Find the first occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search (NUL-terminated). - * @param substring The substring to find (NUL-terminated). - * @return A pointer to the first occurrence of substring in s, - * or s itself if the substring is empty, - * or NULL if substring is not in s. - * @stable ICU 2.0 - * - * @see u_strrstr - * @see u_strFindFirst - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strstr(const UChar *s, const UChar *substring); - -/** - * Find the first occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search. - * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. - * @param substring The substring to find (NUL-terminated). - * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. - * @return A pointer to the first occurrence of substring in s, - * or s itself if the substring is empty, - * or NULL if substring is not in s. - * @stable ICU 2.4 - * - * @see u_strstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); - -/** - * Find the first occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The BMP code point to find. - * @return A pointer to the first occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.0 - * - * @see u_strchr32 - * @see u_memchr - * @see u_strstr - * @see u_strFindFirst - */ -U_STABLE UChar * U_EXPORT2 -u_strchr(const UChar *s, UChar c); - -/** - * Find the first occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The code point to find. - * @return A pointer to the first occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.0 - * - * @see u_strchr - * @see u_memchr32 - * @see u_strstr - * @see u_strFindFirst - */ -U_STABLE UChar * U_EXPORT2 -u_strchr32(const UChar *s, UChar32 c); - -/** - * Find the last occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search (NUL-terminated). - * @param substring The substring to find (NUL-terminated). - * @return A pointer to the last occurrence of substring in s, - * or s itself if the substring is empty, - * or NULL if substring is not in s. - * @stable ICU 2.4 - * - * @see u_strstr - * @see u_strFindFirst - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strrstr(const UChar *s, const UChar *substring); - -/** - * Find the last occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search. - * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. - * @param substring The substring to find (NUL-terminated). - * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. - * @return A pointer to the last occurrence of substring in s, - * or s itself if the substring is empty, - * or NULL if substring is not in s. - * @stable ICU 2.4 - * - * @see u_strstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); - -/** - * Find the last occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The BMP code point to find. - * @return A pointer to the last occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.4 - * - * @see u_strrchr32 - * @see u_memrchr - * @see u_strrstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strrchr(const UChar *s, UChar c); - -/** - * Find the last occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The code point to find. - * @return A pointer to the last occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.4 - * - * @see u_strrchr - * @see u_memchr32 - * @see u_strrstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strrchr32(const UChar *s, UChar32 c); - -/** - * Locates the first occurrence in the string string of any of the characters - * in the string matchSet. - * Works just like C's strpbrk but with Unicode. - * - * @param string The string in which to search, NUL-terminated. - * @param matchSet A NUL-terminated string defining a set of code points - * for which to search in the text string. - * @return A pointer to the character in string that matches one of the - * characters in matchSet, or NULL if no such character is found. - * @stable ICU 2.0 - */ -U_STABLE UChar * U_EXPORT2 -u_strpbrk(const UChar *string, const UChar *matchSet); - -/** - * Returns the number of consecutive characters in string, - * beginning with the first, that do not occur somewhere in matchSet. - * Works just like C's strcspn but with Unicode. - * - * @param string The string in which to search, NUL-terminated. - * @param matchSet A NUL-terminated string defining a set of code points - * for which to search in the text string. - * @return The number of initial characters in string that do not - * occur in matchSet. - * @see u_strspn - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcspn(const UChar *string, const UChar *matchSet); - -/** - * Returns the number of consecutive characters in string, - * beginning with the first, that occur somewhere in matchSet. - * Works just like C's strspn but with Unicode. - * - * @param string The string in which to search, NUL-terminated. - * @param matchSet A NUL-terminated string defining a set of code points - * for which to search in the text string. - * @return The number of initial characters in string that do - * occur in matchSet. - * @see u_strcspn - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strspn(const UChar *string, const UChar *matchSet); - -/** - * The string tokenizer API allows an application to break a string into - * tokens. Unlike strtok(), the saveState (the current pointer within the - * original string) is maintained in saveState. In the first call, the - * argument src is a pointer to the string. In subsequent calls to - * return successive tokens of that string, src must be specified as - * NULL. The value saveState is set by this function to maintain the - * function's position within the string, and on each subsequent call - * you must give this argument the same variable. This function does - * handle surrogate pairs. This function is similar to the strtok_r() - * the POSIX Threads Extension (1003.1c-1995) version. - * - * @param src String containing token(s). This string will be modified. - * After the first call to u_strtok_r(), this argument must - * be NULL to get to the next token. - * @param delim Set of delimiter characters (Unicode code points). - * @param saveState The current pointer within the original string, - * which is set by this function. The saveState - * parameter should the address of a local variable of type - * UChar *. (i.e. defined "UChar *myLocalSaveState" and use - * &myLocalSaveState for this parameter). - * @return A pointer to the next token found in src, or NULL - * when there are no more tokens. - * @stable ICU 2.0 - */ -U_STABLE UChar * U_EXPORT2 -u_strtok_r(UChar *src, - const UChar *delim, - UChar **saveState); - -/** - * Compare two Unicode strings for bitwise equality (code unit order). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @return 0 if s1 and s2 are bitwise equal; a negative - * value if s1 is bitwise less than s2,; a positive - * value if s1 is bitwise greater than s2. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcmp(const UChar *s1, - const UChar *s2); - -/** - * Compare two Unicode strings in code point order. - * See u_strCompare for details. - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @return a negative/zero/positive integer corresponding to whether - * the first string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcmpCodePointOrder(const UChar *s1, const UChar *s2); - -/** - * Compare two Unicode strings (binary order). - * - * The comparison can be done in code unit order or in code point order. - * They differ only in UTF-16 when - * comparing supplementary code points (U+10000..U+10ffff) - * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). - * In code unit order, high BMP code points sort after supplementary code points - * because they are stored as pairs of surrogates which are at U+d800..U+dfff. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * NUL-terminated strings are possible with length arguments of -1. - * - * @param s1 First source string. - * @param length1 Length of first source string, or -1 if NUL-terminated. - * - * @param s2 Second source string. - * @param length2 Length of second source string, or -1 if NUL-terminated. - * - * @param codePointOrder Choose between code unit order (FALSE) - * and code point order (TRUE). - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_strCompare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - UBool codePointOrder); - -/** - * Compare two Unicode strings (binary order) - * as presented by UCharIterator objects. - * Works otherwise just like u_strCompare(). - * - * Both iterators are reset to their start positions. - * When the function returns, it is undefined where the iterators - * have stopped. - * - * @param iter1 First source string iterator. - * @param iter2 Second source string iterator. - * @param codePointOrder Choose between code unit order (FALSE) - * and code point order (TRUE). - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @see u_strCompare - * - * @stable ICU 2.6 - */ -U_STABLE int32_t U_EXPORT2 -u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to - * u_strCompare(u_strFoldCase(s1, options), - * u_strFoldCase(s2, options), - * (options&U_COMPARE_CODE_POINT_ORDER)!=0). - * - * The comparison can be done in UTF-16 code unit order or in code point order. - * They differ only when comparing supplementary code points (U+10000..U+10ffff) - * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). - * In code unit order, high BMP code points sort after supplementary code points - * because they are stored as pairs of surrogates which are at U+d800..U+dfff. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * NUL-terminated strings are possible with length arguments of -1. - * - * @param s1 First source string. - * @param length1 Length of first source string, or -1 if NUL-terminated. - * - * @param s2 Second source string. - * @param length2 Length of second source string, or -1 if NUL-terminated. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_strCaseCompare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode); - -/** - * Compare two ustrings for bitwise equality. - * Compares at most n characters. - * - * @param ucs1 A string to compare (can be NULL/invalid if n<=0). - * @param ucs2 A string to compare (can be NULL/invalid if n<=0). - * @param n The maximum number of characters to compare; always returns 0 if n<=0. - * @return 0 if s1 and s2 are bitwise equal; a negative - * value if s1 is bitwise less than s2; a positive - * value if s1 is bitwise greater than s2. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strncmp(const UChar *ucs1, - const UChar *ucs2, - int32_t n); - -/** - * Compare two Unicode strings in code point order. - * This is different in UTF-16 from u_strncmp() if supplementary characters are present. - * For details, see u_strCompare(). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param n The maximum number of characters to compare. - * @return a negative/zero/positive integer corresponding to whether - * the first string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n); - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options); - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), - * u_strFoldCase(s2, at most n, options)). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param n The maximum number of characters each string to case-fold and then compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options); - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), - * u_strFoldCase(s2, n, options)). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param length The number of characters in each string to case-fold and then compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options); - -/** - * Copy a ustring. Adds a null terminator. - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strcpy(UChar *dst, - const UChar *src); - -/** - * Copy a ustring. - * Copies at most n characters. The result will be null terminated - * if the length of src is less than n. - * - * @param dst The destination string. - * @param src The source string (can be NULL/invalid if n<=0). - * @param n The maximum number of characters to copy; no-op if <=0. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strncpy(UChar *dst, - const UChar *src, - int32_t n); - -#if !UCONFIG_NO_CONVERSION - -/** - * Copy a byte string encoded in the default codepage to a ustring. - * Adds a null terminator. - * Performs a host byte to UChar conversion - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst, - const char *src ); - -/** - * Copy a byte string encoded in the default codepage to a ustring. - * Copies at most n characters. The result will be null terminated - * if the length of src is less than n. - * Performs a host byte to UChar conversion - * - * @param dst The destination string. - * @param src The source string. - * @param n The maximum number of characters to copy. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst, - const char *src, - int32_t n); - -/** - * Copy ustring to a byte string encoded in the default codepage. - * Adds a null terminator. - * Performs a UChar to host byte conversion - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE char* U_EXPORT2 u_austrcpy(char *dst, - const UChar *src ); - -/** - * Copy ustring to a byte string encoded in the default codepage. - * Copies at most n characters. The result will be null terminated - * if the length of src is less than n. - * Performs a UChar to host byte conversion - * - * @param dst The destination string. - * @param src The source string. - * @param n The maximum number of characters to copy. - * @return A pointer to dst. - * @stable ICU 2.0 - */ -U_STABLE char* U_EXPORT2 u_austrncpy(char *dst, - const UChar *src, - int32_t n ); - -#endif - -/** - * Synonym for memcpy(), but with UChars only. - * @param dest The destination string - * @param src The source string (can be NULL/invalid if count<=0) - * @param count The number of characters to copy; no-op if <=0 - * @return A pointer to dest - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_memcpy(UChar *dest, const UChar *src, int32_t count); - -/** - * Synonym for memmove(), but with UChars only. - * @param dest The destination string - * @param src The source string (can be NULL/invalid if count<=0) - * @param count The number of characters to move; no-op if <=0 - * @return A pointer to dest - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_memmove(UChar *dest, const UChar *src, int32_t count); - -/** - * Initialize count characters of dest to c. - * - * @param dest The destination string. - * @param c The character to initialize the string. - * @param count The maximum number of characters to set. - * @return A pointer to dest. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_memset(UChar *dest, UChar c, int32_t count); - -/** - * Compare the first count UChars of each buffer. - * - * @param buf1 The first string to compare. - * @param buf2 The second string to compare. - * @param count The maximum number of UChars to compare. - * @return When buf1 < buf2, a negative number is returned. - * When buf1 == buf2, 0 is returned. - * When buf1 > buf2, a positive number is returned. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count); - -/** - * Compare two Unicode strings in code point order. - * This is different in UTF-16 from u_memcmp() if supplementary characters are present. - * For details, see u_strCompare(). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param count The maximum number of characters to compare. - * @return a negative/zero/positive integer corresponding to whether - * the first string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count); - -/** - * Find the first occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains count UChars). - * @param c The BMP code point to find. - * @param count The length of the string. - * @return A pointer to the first occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.0 - * - * @see u_strchr - * @see u_memchr32 - * @see u_strFindFirst - */ -U_STABLE UChar* U_EXPORT2 -u_memchr(const UChar *s, UChar c, int32_t count); - -/** - * Find the first occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains count UChars). - * @param c The code point to find. - * @param count The length of the string. - * @return A pointer to the first occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.0 - * - * @see u_strchr32 - * @see u_memchr - * @see u_strFindFirst - */ -U_STABLE UChar* U_EXPORT2 -u_memchr32(const UChar *s, UChar32 c, int32_t count); - -/** - * Find the last occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains count UChars). - * @param c The BMP code point to find. - * @param count The length of the string. - * @return A pointer to the last occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.4 - * - * @see u_strrchr - * @see u_memrchr32 - * @see u_strFindLast - */ -U_STABLE UChar* U_EXPORT2 -u_memrchr(const UChar *s, UChar c, int32_t count); - -/** - * Find the last occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains count UChars). - * @param c The code point to find. - * @param count The length of the string. - * @return A pointer to the last occurrence of c in s - * or NULL if c is not in s. - * @stable ICU 2.4 - * - * @see u_strrchr32 - * @see u_memrchr - * @see u_strFindLast - */ -U_STABLE UChar* U_EXPORT2 -u_memrchr32(const UChar *s, UChar32 c, int32_t count); - -/** - * Unicode String literals in C. - * We need one macro to declare a variable for the string - * and to statically preinitialize it if possible, - * and a second macro to dynamically initialize such a string variable if necessary. - * - * The macros are defined for maximum performance. - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. - * - * A pair of macros for a single string must be used with the same - * parameters. - * The string parameter must be a C string literal. - * The length of the string, not including the terminating - * `NUL`, must be specified as a constant. - * The U_STRING_DECL macro should be invoked exactly once for one - * such string variable before it is used. - * - * Usage: - * - * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); - * U_STRING_DECL(ustringVar2, "jumps 5%", 8); - * static UBool didInit=FALSE; - * - * int32_t function() { - * if(!didInit) { - * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); - * U_STRING_INIT(ustringVar2, "jumps 5%", 8); - * didInit=TRUE; - * } - * return u_strcmp(ustringVar1, ustringVar2); - * } - * - * Note that the macros will NOT consistently work if their argument is another #`define`. - * The following will not work on all platforms, don't use it. - * - * #define GLUCK "Mr. Gluck" - * U_STRING_DECL(var, GLUCK, 9) - * U_STRING_INIT(var, GLUCK, 9) - * - * Instead, use the string literal "Mr. Gluck" as the argument to both macro - * calls. - * - * - * @stable ICU 2.0 - */ -#if defined(U_DECLARE_UTF16) -# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs) - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) -#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) -# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) -#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY -# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) -#else -# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1] - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) -#endif - -/** - * Unescape a string of characters and write the resulting - * Unicode characters to the destination buffer. The following escape - * sequences are recognized: - * - * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] - * \\Uhhhhhhhh 8 hex digits - * \\xhh 1-2 hex digits - * \\x{h...} 1-8 hex digits - * \\ooo 1-3 octal digits; o in [0-7] - * \\cX control-X; X is masked with 0x1F - * - * as well as the standard ANSI C escapes: - * - * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, - * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, - * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C - * - * Anything else following a backslash is generically escaped. For - * example, "[a\\-z]" returns "[a-z]". - * - * If an escape sequence is ill-formed, this method returns an empty - * string. An example of an ill-formed sequence is "\\u" followed by - * fewer than 4 hex digits. - * - * The above characters are recognized in the compiler's codepage, - * that is, they are coded as 'u', '\\', etc. Characters that are - * not parts of escape sequences are converted using u_charsToUChars(). - * - * This function is similar to UnicodeString::unescape() but not - * identical to it. The latter takes a source UnicodeString, so it - * does escape recognition but no conversion. - * - * @param src a zero-terminated string of invariant characters - * @param dest pointer to buffer to receive converted and unescaped - * text and, if there is room, a zero terminator. May be NULL for - * preflighting, in which case no UChars will be written, but the - * return value will still be valid. On error, an empty string is - * stored here (if possible). - * @param destCapacity the number of UChars that may be written at - * dest. Ignored if dest == NULL. - * @return the length of unescaped string. - * @see u_unescapeAt - * @see UnicodeString#unescape() - * @see UnicodeString#unescapeAt() - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_unescape(const char *src, - UChar *dest, int32_t destCapacity); - -U_CDECL_BEGIN -/** - * Callback function for u_unescapeAt() that returns a character of - * the source text given an offset and a context pointer. The context - * pointer will be whatever is passed into u_unescapeAt(). - * - * @param offset pointer to the offset that will be passed to u_unescapeAt(). - * @param context an opaque pointer passed directly into u_unescapeAt() - * @return the character represented by the escape sequence at - * offset - * @see u_unescapeAt - * @stable ICU 2.0 - */ -typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context); -U_CDECL_END - -/** - * Unescape a single sequence. The character at offset-1 is assumed - * (without checking) to be a backslash. This method takes a callback - * pointer to a function that returns the UChar at a given offset. By - * varying this callback, ICU functions are able to unescape char* - * strings, UnicodeString objects, and UFILE pointers. - * - * If offset is out of range, or if the escape sequence is ill-formed, - * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape() - * for a list of recognized sequences. - * - * @param charAt callback function that returns a UChar of the source - * text given an offset and a context pointer. - * @param offset pointer to the offset that will be passed to charAt. - * The offset value will be updated upon return to point after the - * last parsed character of the escape sequence. On error the offset - * is unchanged. - * @param length the number of characters in the source text. The - * last character of the source text is considered to be at offset - * length-1. - * @param context an opaque pointer passed directly into charAt. - * @return the character represented by the escape sequence at - * offset, or (UChar32)0xFFFFFFFF on error. - * @see u_unescape() - * @see UnicodeString#unescape() - * @see UnicodeString#unescapeAt() - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_unescapeAt(UNESCAPE_CHAR_AT charAt, - int32_t *offset, - int32_t length, - void *context); - -/** - * Uppercase the characters in a string. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strToUpper(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode); - -/** - * Lowercase the characters in a string. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strToLower(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** - * Titlecase a string. - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (NULL), then a standard titlecase - * break iterator is opened. - * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.1 - */ -U_STABLE int32_t U_EXPORT2 -u_strToTitle(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, - UErrorCode *pErrorCode); - -#endif - -/** - * Case-folds the characters in a string. - * - * Case-folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strFoldCase(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode); - -#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION -/** - * Convert a UTF-16 string to a wchar_t string. - * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then - * this function simply calls the fast, dedicated function for that. - * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - */ -U_STABLE wchar_t* U_EXPORT2 -u_strToWCS(wchar_t *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode); -/** - * Convert a wchar_t string to UTF-16. - * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then - * this function simply calls the fast, dedicated function for that. - * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromWCS(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const wchar_t *src, - int32_t srcLength, - UErrorCode *pErrorCode); -#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */ - -/** - * Convert a UTF-16 string to UTF-8. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of chars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - * @see u_strToUTF8WithSub - * @see u_strFromUTF8 - */ -U_STABLE char* U_EXPORT2 -u_strToUTF8(char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-8 string to UTF-16. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - * @see u_strFromUTF8WithSub - * @see u_strFromUTF8Lenient - */ -U_STABLE UChar* U_EXPORT2 -u_strFromUTF8(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-16 string to UTF-8. - * - * Same as u_strToUTF8() except for the additional subchar which is output for - * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. - * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8(). - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of chars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strToUTF8 - * @see u_strFromUTF8WithSub - * @stable ICU 3.6 - */ -U_STABLE char* U_EXPORT2 -u_strToUTF8WithSub(char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-8 string to UTF-16. - * - * Same as u_strFromUTF8() except for the additional subchar which is output for - * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. - * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8(). - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strFromUTF8 - * @see u_strFromUTF8Lenient - * @see u_strToUTF8WithSub - * @stable ICU 3.6 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromUTF8WithSub(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-8 string to UTF-16. - * - * Same as u_strFromUTF8() except that this function is designed to be very fast, - * which it achieves by being lenient about malformed UTF-8 sequences. - * This function is intended for use in environments where UTF-8 text is - * expected to be well-formed. - * - * Its semantics are: - * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text. - * - The function will not read beyond the input string, nor write beyond - * the destCapacity. - * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not - * be well-formed UTF-16. - * The function will resynchronize to valid code point boundaries - * within a small number of code points after an illegal sequence. - * - Non-shortest forms are not detected and will result in "spoofing" output. - * - * For further performance improvement, if srcLength is given (>=0), - * then it must be destCapacity>=srcLength. - * - * There is no inverse u_strToUTF8Lenient() function because there is practically - * no performance gain from not checking that a UTF-16 string is well-formed. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * Unlike for other ICU functions, if srcLength>=0 then it - * must be destCapacity>=srcLength. - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * Unlike for other ICU functions, if srcLength>=0 but - * destCapacity=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strToUTF32 - * @see u_strFromUTF32WithSub - * @stable ICU 4.2 - */ -U_STABLE UChar32* U_EXPORT2 -u_strToUTF32WithSub(UChar32 *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-32 string to UTF-16. - * - * Same as u_strFromUTF32() except for the additional subchar which is output for - * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. - * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32(). - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strFromUTF32 - * @see u_strToUTF32WithSub - * @stable ICU 4.2 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromUTF32WithSub(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar32 *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a 16-bit Unicode string to Java Modified UTF-8. - * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8 - * - * This function behaves according to the documentation for Java DataOutput.writeUTF() - * except that it does not encode the output length in the destination buffer - * and does not have an output length restriction. - * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String) - * - * The input string need not be well-formed UTF-16. - * (Therefore there is no subchar parameter.) - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of chars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @stable ICU 4.4 - * @see u_strToUTF8WithSub - * @see u_strFromJavaModifiedUTF8WithSub - */ -U_STABLE char* U_EXPORT2 -u_strToJavaModifiedUTF8( - char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a Java Modified UTF-8 string to a 16-bit Unicode string. - * If the input string is not well-formed and no substitution char is specified, - * then the U_INVALID_CHAR_FOUND error code is set. - * - * This function behaves according to the documentation for Java DataInput.readUTF() - * except that it takes a length parameter rather than - * interpreting the first two input bytes as the length. - * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF() - * - * The output string may not be well-formed UTF-16. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strFromUTF8WithSub - * @see u_strFromUTF8Lenient - * @see u_strToJavaModifiedUTF8 - * @stable ICU 4.4 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromJavaModifiedUTF8WithSub( - UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/ustringtrie.h b/deps/node/deps/icu-small/source/common/unicode/ustringtrie.h deleted file mode 100644 index fd856482..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/ustringtrie.h +++ /dev/null @@ -1,97 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: udicttrie.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010dec17 -* created by: Markus W. Scherer -*/ - -#ifndef __USTRINGTRIE_H__ -#define __USTRINGTRIE_H__ - -/** - * \file - * \brief C API: Helper definitions for dictionary trie APIs. - */ - -#include "unicode/utypes.h" - - -/** - * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods. - * @see USTRINGTRIE_MATCHES - * @see USTRINGTRIE_HAS_VALUE - * @see USTRINGTRIE_HAS_NEXT - * @stable ICU 4.8 - */ -enum UStringTrieResult { - /** - * The input unit(s) did not continue a matching string. - * Once current()/next() return USTRINGTRIE_NO_MATCH, - * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH, - * until the trie is reset to its original state or to a saved state. - * @stable ICU 4.8 - */ - USTRINGTRIE_NO_MATCH, - /** - * The input unit(s) continued a matching string - * but there is no value for the string so far. - * (It is a prefix of a longer string.) - * @stable ICU 4.8 - */ - USTRINGTRIE_NO_VALUE, - /** - * The input unit(s) continued a matching string - * and there is a value for the string so far. - * This value will be returned by getValue(). - * No further input byte/unit can continue a matching string. - * @stable ICU 4.8 - */ - USTRINGTRIE_FINAL_VALUE, - /** - * The input unit(s) continued a matching string - * and there is a value for the string so far. - * This value will be returned by getValue(). - * Another input byte/unit can continue a matching string. - * @stable ICU 4.8 - */ - USTRINGTRIE_INTERMEDIATE_VALUE -}; - -/** - * Same as (result!=USTRINGTRIE_NO_MATCH). - * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. - * @return true if the input bytes/units so far are part of a matching string/byte sequence. - * @stable ICU 4.8 - */ -#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH) - -/** - * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but - * this macro evaluates result exactly once. - * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. - * @return true if there is a value for the input bytes/units so far. - * @see BytesTrie::getValue - * @see UCharsTrie::getValue - * @stable ICU 4.8 - */ -#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE) - -/** - * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but - * this macro evaluates result exactly once. - * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. - * @return true if another input byte/unit can continue a matching string. - * @stable ICU 4.8 - */ -#define USTRINGTRIE_HAS_NEXT(result) ((result)&1) - -#endif /* __USTRINGTRIE_H__ */ diff --git a/deps/node/deps/icu-small/source/common/unicode/utext.h b/deps/node/deps/icu-small/source/common/unicode/utext.h deleted file mode 100644 index 51d11a2e..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/utext.h +++ /dev/null @@ -1,1602 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: utext.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004oct06 -* created by: Markus W. Scherer -*/ - -#ifndef __UTEXT_H__ -#define __UTEXT_H__ - -/** - * \file - * \brief C API: Abstract Unicode Text API - * - * The Text Access API provides a means to allow text that is stored in alternative - * formats to work with ICU services. ICU normally operates on text that is - * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type - * UnicodeString for C++ APIs. - * - * ICU Text Access allows other formats, such as UTF-8 or non-contiguous - * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. - * - * There are three general classes of usage for UText: - * - * Application Level Use. This is the simplest usage - applications would - * use one of the utext_open() functions on their input text, and pass - * the resulting UText to the desired ICU service. - * - * Second is usage in ICU Services, such as break iteration, that will need to - * operate on input presented to them as a UText. These implementations - * will need to use the iteration and related UText functions to gain - * access to the actual text. - * - * The third class of UText users are "text providers." These are the - * UText implementations for the various text storage formats. An application - * or system with a unique text storage format can implement a set of - * UText provider functions for that format, which will then allow - * ICU services to operate on that format. - * - * - * Iterating over text - * - * Here is sample code for a forward iteration over the contents of a UText - * - * \code - * UChar32 c; - * UText *ut = whatever(); - * - * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { - * // do whatever with the codepoint c here. - * } - * \endcode - * - * And here is similar code to iterate in the reverse direction, from the end - * of the text towards the beginning. - * - * \code - * UChar32 c; - * UText *ut = whatever(); - * int textLength = utext_nativeLength(ut); - * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { - * // do whatever with the codepoint c here. - * } - * \endcode - * - * Characters and Indexing - * - * Indexing into text by UText functions is nearly always in terms of the native - * indexing of the underlying text storage. The storage format could be UTF-8 - * or UTF-32, for example. When coding to the UText access API, no assumptions - * can be made regarding the size of characters, or how far an index - * may move when iterating between characters. - * - * All indices supplied to UText functions are pinned to the length of the - * text. An out-of-bounds index is not considered to be an error, but is - * adjusted to be in the range 0 <= index <= length of input text. - * - * - * When an index position is returned from a UText function, it will be - * a native index to the underlying text. In the case of multi-unit characters, - * it will always refer to the first position of the character, - * never to the interior. This is essentially the same thing as saying that - * a returned index will always point to a boundary between characters. - * - * When a native index is supplied to a UText function, all indices that - * refer to any part of a multi-unit character representation are considered - * to be equivalent. In the case of multi-unit characters, an incoming index - * will be logically normalized to refer to the start of the character. - * - * It is possible to test whether a native index is on a code point boundary - * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). - * If the index is returned unchanged, it was on a code point boundary. If - * an adjusted index is returned, the original index referred to the - * interior of a character. - * - * Conventions for calling UText functions - * - * Most UText access functions have as their first parameter a (UText *) pointer, - * which specifies the UText to be used. Unless otherwise noted, the - * pointer must refer to a valid, open UText. Attempting to - * use a closed UText or passing a NULL pointer is a programming error and - * will produce undefined results or NULL pointer exceptions. - * - * The UText_Open family of functions can either open an existing (closed) - * UText, or heap allocate a new UText. Here is sample code for creating - * a stack-allocated UText. - * - * \code - * char *s = whatever(); // A utf-8 string - * U_ErrorCode status = U_ZERO_ERROR; - * UText ut = UTEXT_INITIALIZER; - * utext_openUTF8(ut, s, -1, &status); - * if (U_FAILURE(status)) { - * // error handling - * } else { - * // work with the UText - * } - * \endcode - * - * Any existing UText passed to an open function _must_ have been initialized, - * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated - * by an open function. Passing NULL will cause the open function to - * heap-allocate and fully initialize a new UText. - * - */ - - - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#if U_SHOW_CPLUSPLUS_API -#include "unicode/localpointer.h" -#include "unicode/rep.h" -#include "unicode/unistr.h" -#include "unicode/chariter.h" -#endif - - -U_CDECL_BEGIN - -struct UText; -typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ - - -/*************************************************************************************** - * - * C Functions for creating UText wrappers around various kinds of text strings. - * - ****************************************************************************************/ - - -/** - * Close function for UText instances. - * Cleans up, releases any resources being held by an open UText. - *

- * If the UText was originally allocated by one of the utext_open functions, - * the storage associated with the utext will also be freed. - * If the UText storage originated with the application, as it would with - * a local or static instance, the storage will not be deleted. - * - * An open UText can be reset to refer to new string by using one of the utext_open() - * functions without first closing the UText. - * - * @param ut The UText to be closed. - * @return NULL if the UText struct was deleted by the close. If the UText struct - * was originally provided by the caller to the open function, it is - * returned by this function, and may be safely used again in - * a subsequent utext_open. - * - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_close(UText *ut); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUTextPointer - * "Smart pointer" class, closes a UText via utext_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); - -U_NAMESPACE_END - -#endif - -/** - * Open a read-only UText implementation for UTF-8 strings. - * - * \htmlonly - * Any invalid UTF-8 in the input will be handled in this way: - * a sequence of bytes that has the form of a truncated, but otherwise valid, - * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. - * Any other illegal bytes will each be replaced by a \uFFFD. - * \endhtmlonly - * - * @param ut Pointer to a UText struct. If NULL, a new UText will be created. - * If non-NULL, must refer to an initialized UText struct, which will then - * be reset to reference the specified UTF-8 string. - * @param s A UTF-8 string. Must not be NULL. - * @param length The length of the UTF-8 string in bytes, or -1 if the string is - * zero terminated. - * @param status Errors are returned here. - * @return A pointer to the UText. If a pre-allocated UText was provided, it - * will always be used and returned. - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); - - -/** - * Open a read-only UText for UChar * string. - * - * @param ut Pointer to a UText struct. If NULL, a new UText will be created. - * If non-NULL, must refer to an initialized UText struct, which will then - * be reset to reference the specified UChar string. - * @param s A UChar (UTF-16) string - * @param length The number of UChars in the input string, or -1 if the string is - * zero terminated. - * @param status Errors are returned here. - * @return A pointer to the UText. If a pre-allocated UText was provided, it - * will always be used and returned. - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); - - -#if U_SHOW_CPLUSPLUS_API -/** - * Open a writable UText for a non-const UnicodeString. - * - * @param ut Pointer to a UText struct. If NULL, a new UText will be created. - * If non-NULL, must refer to an initialized UText struct, which will then - * be reset to reference the specified input string. - * @param s A UnicodeString. - * @param status Errors are returned here. - * @return Pointer to the UText. If a UText was supplied as input, this - * will always be used and returned. - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); - - -/** - * Open a UText for a const UnicodeString. The resulting UText will not be writable. - * - * @param ut Pointer to a UText struct. If NULL, a new UText will be created. - * If non-NULL, must refer to an initialized UText struct, which will then - * be reset to reference the specified input string. - * @param s A const UnicodeString to be wrapped. - * @param status Errors are returned here. - * @return Pointer to the UText. If a UText was supplied as input, this - * will always be used and returned. - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); - - -/** - * Open a writable UText implementation for an ICU Replaceable object. - * @param ut Pointer to a UText struct. If NULL, a new UText will be created. - * If non-NULL, must refer to an already existing UText, which will then - * be reset to reference the specified replaceable text. - * @param rep A Replaceable text object. - * @param status Errors are returned here. - * @return Pointer to the UText. If a UText was supplied as input, this - * will always be used and returned. - * @see Replaceable - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); - -/** - * Open a UText implementation over an ICU CharacterIterator. - * @param ut Pointer to a UText struct. If NULL, a new UText will be created. - * If non-NULL, must refer to an already existing UText, which will then - * be reset to reference the specified replaceable text. - * @param ci A Character Iterator. - * @param status Errors are returned here. - * @return Pointer to the UText. If a UText was supplied as input, this - * will always be used and returned. - * @see Replaceable - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); - -#endif - - -/** - * Clone a UText. This is much like opening a UText where the source text is itself - * another UText. - * - * A deep clone will copy both the UText data structures and the underlying text. - * The original and cloned UText will operate completely independently; modifications - * made to the text in one will not affect the other. Text providers are not - * required to support deep clones. The user of clone() must check the status return - * and be prepared to handle failures. - * - * The standard UText implementations for UTF8, UChar *, UnicodeString and - * Replaceable all support deep cloning. - * - * The UText returned from a deep clone will be writable, assuming that the text - * provider is able to support writing, even if the source UText had been made - * non-writable by means of UText_freeze(). - * - * A shallow clone replicates only the UText data structures; it does not make - * a copy of the underlying text. Shallow clones can be used as an efficient way to - * have multiple iterators active in a single text string that is not being - * modified. - * - * A shallow clone operation will not fail, barring truly exceptional conditions such - * as memory allocation failures. - * - * Shallow UText clones should be avoided if the UText functions that modify the - * text are expected to be used, either on the original or the cloned UText. - * Any such modifications can cause unpredictable behavior. Read Only - * shallow clones provide some protection against errors of this type by - * disabling text modification via the cloned UText. - * - * A shallow clone made with the readOnly parameter == FALSE will preserve the - * utext_isWritable() state of the source object. Note, however, that - * write operations must be avoided while more than one UText exists that refer - * to the same underlying text. - * - * A UText and its clone may be safely concurrently accessed by separate threads. - * This is true for read access only with shallow clones, and for both read and - * write access with deep clones. - * It is the responsibility of the Text Provider to ensure that this thread safety - * constraint is met. - * - * @param dest A UText struct to be filled in with the result of the clone operation, - * or NULL if the clone function should heap-allocate a new UText struct. - * If non-NULL, must refer to an already existing UText, which will then - * be reset to become the clone. - * @param src The UText to be cloned. - * @param deep TRUE to request a deep clone, FALSE for a shallow clone. - * @param readOnly TRUE to request that the cloned UText have read only access to the - * underlying text. - - * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR - * will be returned if the text provider is unable to clone the - * original text. - * @return The newly created clone, or NULL if the clone operation failed. - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); - - -/** - * Compare two UText objects for equality. - * UTexts are equal if they are iterating over the same text, and - * have the same iteration position within the text. - * If either or both of the parameters are NULL, the comparison is FALSE. - * - * @param a The first of the two UTexts to compare. - * @param b The other UText to be compared. - * @return TRUE if the two UTexts are equal. - * @stable ICU 3.6 - */ -U_STABLE UBool U_EXPORT2 -utext_equals(const UText *a, const UText *b); - - -/***************************************************************************** - * - * Functions to work with the text represented by a UText wrapper - * - *****************************************************************************/ - -/** - * Get the length of the text. Depending on the characteristics - * of the underlying text representation, this may be expensive. - * @see utext_isLengthExpensive() - * - * - * @param ut the text to be accessed. - * @return the length of the text, expressed in native units. - * - * @stable ICU 3.4 - */ -U_STABLE int64_t U_EXPORT2 -utext_nativeLength(UText *ut); - -/** - * Return TRUE if calculating the length of the text could be expensive. - * Finding the length of NUL terminated strings is considered to be expensive. - * - * Note that the value of this function may change - * as the result of other operations on a UText. - * Once the length of a string has been discovered, it will no longer - * be expensive to report it. - * - * @param ut the text to be accessed. - * @return TRUE if determining the length of the text could be time consuming. - * @stable ICU 3.4 - */ -U_STABLE UBool U_EXPORT2 -utext_isLengthExpensive(const UText *ut); - -/** - * Returns the code point at the requested index, - * or U_SENTINEL (-1) if it is out of bounds. - * - * If the specified index points to the interior of a multi-unit - * character - one of the trail bytes of a UTF-8 sequence, for example - - * the complete code point will be returned. - * - * The iteration position will be set to the start of the returned code point. - * - * This function is roughly equivalent to the sequence - * utext_setNativeIndex(index); - * utext_current32(); - * (There is a subtle difference if the index is out of bounds by being less than zero - - * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() - * will return the char at zero. utext_char32At(negative index), on the other hand, will - * return the U_SENTINEL value of -1.) - * - * @param ut the text to be accessed - * @param nativeIndex the native index of the character to be accessed. If the index points - * to other than the first unit of a multi-unit character, it will be adjusted - * to the start of the character. - * @return the code point at the specified index. - * @stable ICU 3.4 - */ -U_STABLE UChar32 U_EXPORT2 -utext_char32At(UText *ut, int64_t nativeIndex); - - -/** - * - * Get the code point at the current iteration position, - * or U_SENTINEL (-1) if the iteration has reached the end of - * the input text. - * - * @param ut the text to be accessed. - * @return the Unicode code point at the current iterator position. - * @stable ICU 3.4 - */ -U_STABLE UChar32 U_EXPORT2 -utext_current32(UText *ut); - - -/** - * Get the code point at the current iteration position of the UText, and - * advance the position to the first index following the character. - * - * If the position is at the end of the text (the index following - * the last character, which is also the length of the text), - * return U_SENTINEL (-1) and do not advance the index. - * - * This is a post-increment operation. - * - * An inline macro version of this function, UTEXT_NEXT32(), - * is available for performance critical use. - * - * @param ut the text to be accessed. - * @return the Unicode code point at the iteration position. - * @see UTEXT_NEXT32 - * @stable ICU 3.4 - */ -U_STABLE UChar32 U_EXPORT2 -utext_next32(UText *ut); - - -/** - * Move the iterator position to the character (code point) whose - * index precedes the current position, and return that character. - * This is a pre-decrement operation. - * - * If the initial position is at the start of the text (index of 0) - * return U_SENTINEL (-1), and leave the position unchanged. - * - * An inline macro version of this function, UTEXT_PREVIOUS32(), - * is available for performance critical use. - * - * @param ut the text to be accessed. - * @return the previous UChar32 code point, or U_SENTINEL (-1) - * if the iteration has reached the start of the text. - * @see UTEXT_PREVIOUS32 - * @stable ICU 3.4 - */ -U_STABLE UChar32 U_EXPORT2 -utext_previous32(UText *ut); - - -/** - * Set the iteration index and return the code point at that index. - * Leave the iteration index at the start of the following code point. - * - * This function is the most efficient and convenient way to - * begin a forward iteration. The results are identical to the those - * from the sequence - * \code - * utext_setIndex(); - * utext_next32(); - * \endcode - * - * @param ut the text to be accessed. - * @param nativeIndex Iteration index, in the native units of the text provider. - * @return Code point which starts at or before index, - * or U_SENTINEL (-1) if it is out of bounds. - * @stable ICU 3.4 - */ -U_STABLE UChar32 U_EXPORT2 -utext_next32From(UText *ut, int64_t nativeIndex); - - - -/** - * Set the iteration index, and return the code point preceding the - * one specified by the initial index. Leave the iteration position - * at the start of the returned code point. - * - * This function is the most efficient and convenient way to - * begin a backwards iteration. - * - * @param ut the text to be accessed. - * @param nativeIndex Iteration index in the native units of the text provider. - * @return Code point preceding the one at the initial index, - * or U_SENTINEL (-1) if it is out of bounds. - * - * @stable ICU 3.4 - */ -U_STABLE UChar32 U_EXPORT2 -utext_previous32From(UText *ut, int64_t nativeIndex); - -/** - * Get the current iterator position, which can range from 0 to - * the length of the text. - * The position is a native index into the input text, in whatever format it - * may have (possibly UTF-8 for example), and may not always be the same as - * the corresponding UChar (UTF-16) index. - * The returned position will always be aligned to a code point boundary. - * - * @param ut the text to be accessed. - * @return the current index position, in the native units of the text provider. - * @stable ICU 3.4 - */ -U_STABLE int64_t U_EXPORT2 -utext_getNativeIndex(const UText *ut); - -/** - * Set the current iteration position to the nearest code point - * boundary at or preceding the specified index. - * The index is in the native units of the original input text. - * If the index is out of range, it will be pinned to be within - * the range of the input text. - *

- * It will usually be more efficient to begin an iteration - * using the functions utext_next32From() or utext_previous32From() - * rather than setIndex(). - *

- * Moving the index position to an adjacent character is best done - * with utext_next32(), utext_previous32() or utext_moveIndex32(). - * Attempting to do direct arithmetic on the index position is - * complicated by the fact that the size (in native units) of a - * character depends on the underlying representation of the character - * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not - * easily knowable. - * - * @param ut the text to be accessed. - * @param nativeIndex the native unit index of the new iteration position. - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -utext_setNativeIndex(UText *ut, int64_t nativeIndex); - -/** - * Move the iterator position by delta code points. The number of code points - * is a signed number; a negative delta will move the iterator backwards, - * towards the start of the text. - *

- * The index is moved by delta code points - * forward or backward, but no further backward than to 0 and - * no further forward than to utext_nativeLength(). - * The resulting index value will be in between 0 and length, inclusive. - * - * @param ut the text to be accessed. - * @param delta the signed number of code points to move the iteration position. - * @return TRUE if the position could be moved the requested number of positions while - * staying within the range [0 - text length]. - * @stable ICU 3.4 - */ -U_STABLE UBool U_EXPORT2 -utext_moveIndex32(UText *ut, int32_t delta); - -/** - * Get the native index of the character preceding the current position. - * If the iteration position is already at the start of the text, zero - * is returned. - * The value returned is the same as that obtained from the following sequence, - * but without the side effect of changing the iteration position. - * - * \code - * UText *ut = whatever; - * ... - * utext_previous(ut) - * utext_getNativeIndex(ut); - * \endcode - * - * This function is most useful during forwards iteration, where it will get the - * native index of the character most recently returned from utext_next(). - * - * @param ut the text to be accessed - * @return the native index of the character preceding the current index position, - * or zero if the current position is at the start of the text. - * @stable ICU 3.6 - */ -U_STABLE int64_t U_EXPORT2 -utext_getPreviousNativeIndex(UText *ut); - - -/** - * - * Extract text from a UText into a UChar buffer. The range of text to be extracted - * is specified in the native indices of the UText provider. These may not necessarily - * be UTF-16 indices. - *

- * The size (number of 16 bit UChars) of the data to be extracted is returned. The - * full number of UChars is returned, even when the extracted text is truncated - * because the specified buffer size is too small. - *

- * The extracted string will (if you are a user) / must (if you are a text provider) - * be NUL-terminated if there is sufficient space in the destination buffer. This - * terminating NUL is not included in the returned length. - *

- * The iteration index is left at the position following the last extracted character. - * - * @param ut the UText from which to extract data. - * @param nativeStart the native index of the first character to extract.\ - * If the specified index is out of range, - * it will be pinned to be within 0 <= index <= textLength - * @param nativeLimit the native string index of the position following the last - * character to extract. If the specified index is out of range, - * it will be pinned to be within 0 <= index <= textLength. - * nativeLimit must be >= nativeStart. - * @param dest the UChar (UTF-16) buffer into which the extracted text is placed - * @param destCapacity The size, in UChars, of the destination buffer. May be zero - * for precomputing the required size. - * @param status receives any error status. - * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the - * buffer was too small. Returns number of UChars for preflighting. - * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. - * - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -utext_extract(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - UChar *dest, int32_t destCapacity, - UErrorCode *status); - - - -/************************************************************************************ - * - * #define inline versions of selected performance-critical text access functions - * Caution: do not use auto increment++ or decrement-- expressions - * as parameters to these macros. - * - * For most use, where there is no extreme performance constraint, the - * normal, non-inline functions are a better choice. The resulting code - * will be smaller, and, if the need ever arises, easier to debug. - * - * These are implemented as #defines rather than real functions - * because there is no fully portable way to do inline functions in plain C. - * - ************************************************************************************/ - -#ifndef U_HIDE_INTERNAL_API -/** - * inline version of utext_current32(), for performance-critical situations. - * - * Get the code point at the current iteration position of the UText. - * Returns U_SENTINEL (-1) if the position is at the end of the - * text. - * - * @internal ICU 4.4 technology preview - */ -#define UTEXT_CURRENT32(ut) \ - ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ - ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) -#endif /* U_HIDE_INTERNAL_API */ - -/** - * inline version of utext_next32(), for performance-critical situations. - * - * Get the code point at the current iteration position of the UText, and - * advance the position to the first index following the character. - * This is a post-increment operation. - * Returns U_SENTINEL (-1) if the position is at the end of the - * text. - * - * @stable ICU 3.4 - */ -#define UTEXT_NEXT32(ut) \ - ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ - ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) - -/** - * inline version of utext_previous32(), for performance-critical situations. - * - * Move the iterator position to the character (code point) whose - * index precedes the current position, and return that character. - * This is a pre-decrement operation. - * Returns U_SENTINEL (-1) if the position is at the start of the text. - * - * @stable ICU 3.4 - */ -#define UTEXT_PREVIOUS32(ut) \ - ((ut)->chunkOffset > 0 && \ - (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ - (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) - -/** - * inline version of utext_getNativeIndex(), for performance-critical situations. - * - * Get the current iterator position, which can range from 0 to - * the length of the text. - * The position is a native index into the input text, in whatever format it - * may have (possibly UTF-8 for example), and may not always be the same as - * the corresponding UChar (UTF-16) index. - * The returned position will always be aligned to a code point boundary. - * - * @stable ICU 3.6 - */ -#define UTEXT_GETNATIVEINDEX(ut) \ - ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ - (ut)->chunkNativeStart+(ut)->chunkOffset : \ - (ut)->pFuncs->mapOffsetToNative(ut)) - -/** - * inline version of utext_setNativeIndex(), for performance-critical situations. - * - * Set the current iteration position to the nearest code point - * boundary at or preceding the specified index. - * The index is in the native units of the original input text. - * If the index is out of range, it will be pinned to be within - * the range of the input text. - * - * @stable ICU 3.8 - */ -#define UTEXT_SETNATIVEINDEX(ut, ix) \ - { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ - if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ - (ut)->chunkOffset=(int32_t)__offset; \ - } else { \ - utext_setNativeIndex((ut), (ix)); } } - - - -/************************************************************************************ - * - * Functions related to writing or modifying the text. - * These will work only with modifiable UTexts. Attempting to - * modify a read-only UText will return an error status. - * - ************************************************************************************/ - - -/** - * Return TRUE if the text can be written (modified) with utext_replace() or - * utext_copy(). For the text to be writable, the text provider must - * be of a type that supports writing and the UText must not be frozen. - * - * Attempting to modify text when utext_isWriteable() is FALSE will fail - - * the text will not be modified, and an error will be returned from the function - * that attempted the modification. - * - * @param ut the UText to be tested. - * @return TRUE if the text is modifiable. - * - * @see utext_freeze() - * @see utext_replace() - * @see utext_copy() - * @stable ICU 3.4 - * - */ -U_STABLE UBool U_EXPORT2 -utext_isWritable(const UText *ut); - - -/** - * Test whether there is meta data associated with the text. - * @see Replaceable::hasMetaData() - * - * @param ut The UText to be tested - * @return TRUE if the underlying text includes meta data. - * @stable ICU 3.4 - */ -U_STABLE UBool U_EXPORT2 -utext_hasMetaData(const UText *ut); - - -/** - * Replace a range of the original text with a replacement text. - * - * Leaves the current iteration position at the position following the - * newly inserted replacement text. - * - * This function is only available on UText types that support writing, - * that is, ones where utext_isWritable() returns TRUE. - * - * When using this function, there should be only a single UText opened onto the - * underlying native text string. Behavior after a replace operation - * on a UText is undefined for any other additional UTexts that refer to the - * modified string. - * - * @param ut the UText representing the text to be operated on. - * @param nativeStart the native index of the start of the region to be replaced - * @param nativeLimit the native index of the character following the region to be replaced. - * @param replacementText pointer to the replacement text - * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. - * @param status receives any error status. Possible errors include - * U_NO_WRITE_PERMISSION - * - * @return The signed number of (native) storage units by which - * the length of the text expanded or contracted. - * - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -utext_replace(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - const UChar *replacementText, int32_t replacementLength, - UErrorCode *status); - - - -/** - * - * Copy or move a substring from one position to another within the text, - * while retaining any metadata associated with the text. - * This function is used to duplicate or reorder substrings. - * The destination index must not overlap the source range. - * - * The text to be copied or moved is inserted at destIndex; - * it does not replace or overwrite any existing text. - * - * The iteration position is left following the newly inserted text - * at the destination position. - * - * This function is only available on UText types that support writing, - * that is, ones where utext_isWritable() returns TRUE. - * - * When using this function, there should be only a single UText opened onto the - * underlying native text string. Behavior after a copy operation - * on a UText is undefined in any other additional UTexts that refer to the - * modified string. - * - * @param ut The UText representing the text to be operated on. - * @param nativeStart The native index of the start of the region to be copied or moved - * @param nativeLimit The native index of the character position following the region - * to be copied. - * @param destIndex The native destination index to which the source substring is - * copied or moved. - * @param move If TRUE, then the substring is moved, not copied/duplicated. - * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION - * - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -utext_copy(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - int64_t destIndex, - UBool move, - UErrorCode *status); - - -/** - *

- * Freeze a UText. This prevents any modification to the underlying text itself - * by means of functions operating on this UText. - *

- *

- * Once frozen, a UText can not be unfrozen. The intent is to ensure - * that a the text underlying a frozen UText wrapper cannot be modified via that UText. - *

- *

- * Caution: freezing a UText will disable changes made via the specific - * frozen UText wrapper only; it will not have any effect on the ability to - * directly modify the text by bypassing the UText. Any such backdoor modifications - * are always an error while UText access is occurring because the underlying - * text can get out of sync with UText's buffering. - *

- * - * @param ut The UText to be frozen. - * @see utext_isWritable() - * @stable ICU 3.6 - */ -U_STABLE void U_EXPORT2 -utext_freeze(UText *ut); - - -/** - * UText provider properties (bit field indexes). - * - * @see UText - * @stable ICU 3.4 - */ -enum { - /** - * It is potentially time consuming for the provider to determine the length of the text. - * @stable ICU 3.4 - */ - UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, - /** - * Text chunks remain valid and usable until the text object is modified or - * deleted, not just until the next time the access() function is called - * (which is the default). - * @stable ICU 3.4 - */ - UTEXT_PROVIDER_STABLE_CHUNKS = 2, - /** - * The provider supports modifying the text via the replace() and copy() - * functions. - * @see Replaceable - * @stable ICU 3.4 - */ - UTEXT_PROVIDER_WRITABLE = 3, - /** - * There is meta data associated with the text. - * @see Replaceable::hasMetaData() - * @stable ICU 3.4 - */ - UTEXT_PROVIDER_HAS_META_DATA = 4, - /** - * Text provider owns the text storage. - * Generally occurs as the result of a deep clone of the UText. - * When closing the UText, the associated text must - * also be closed/deleted/freed/ whatever is appropriate. - * @stable ICU 3.6 - */ - UTEXT_PROVIDER_OWNS_TEXT = 5 -}; - -/** - * Function type declaration for UText.clone(). - * - * clone a UText. Much like opening a UText where the source text is itself - * another UText. - * - * A deep clone will copy both the UText data structures and the underlying text. - * The original and cloned UText will operate completely independently; modifications - * made to the text in one will not effect the other. Text providers are not - * required to support deep clones. The user of clone() must check the status return - * and be prepared to handle failures. - * - * A shallow clone replicates only the UText data structures; it does not make - * a copy of the underlying text. Shallow clones can be used as an efficient way to - * have multiple iterators active in a single text string that is not being - * modified. - * - * A shallow clone operation must not fail except for truly exceptional conditions such - * as memory allocation failures. - * - * A UText and its clone may be safely concurrently accessed by separate threads. - * This is true for both shallow and deep clones. - * It is the responsibility of the Text Provider to ensure that this thread safety - * constraint is met. - - * - * @param dest A UText struct to be filled in with the result of the clone operation, - * or NULL if the clone function should heap-allocate a new UText struct. - * @param src The UText to be cloned. - * @param deep TRUE to request a deep clone, FALSE for a shallow clone. - * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR - * should be returned if the text provider is unable to clone the - * original text. - * @return The newly created clone, or NULL if the clone operation failed. - * - * @stable ICU 3.4 - */ -typedef UText * U_CALLCONV -UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); - - -/** - * Function type declaration for UText.nativeLength(). - * - * @param ut the UText to get the length of. - * @return the length, in the native units of the original text string. - * @see UText - * @stable ICU 3.4 - */ -typedef int64_t U_CALLCONV -UTextNativeLength(UText *ut); - -/** - * Function type declaration for UText.access(). Get the description of the text chunk - * containing the text at a requested native index. The UText's iteration - * position will be left at the requested index. If the index is out - * of bounds, the iteration position will be left at the start or end - * of the string, as appropriate. - * - * Chunks must begin and end on code point boundaries. A single code point - * comprised of multiple storage units must never span a chunk boundary. - * - * - * @param ut the UText being accessed. - * @param nativeIndex Requested index of the text to be accessed. - * @param forward If TRUE, then the returned chunk must contain text - * starting from the index, so that start<=index - * The size (number of 16 bit UChars) in the data to be extracted is returned. The - * full amount is returned, even when the specified buffer size is smaller. - *

- * The extracted string will (if you are a user) / must (if you are a text provider) - * be NUL-terminated if there is sufficient space in the destination buffer. - * - * @param ut the UText from which to extract data. - * @param nativeStart the native index of the first character to extract. - * @param nativeLimit the native string index of the position following the last - * character to extract. - * @param dest the UChar (UTF-16) buffer into which the extracted text is placed - * @param destCapacity The size, in UChars, of the destination buffer. May be zero - * for precomputing the required size. - * @param status receives any error status. - * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for - * preflighting. - * @return Number of UChars in the data. Does not include a trailing NUL. - * - * @stable ICU 3.4 - */ -typedef int32_t U_CALLCONV -UTextExtract(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - UChar *dest, int32_t destCapacity, - UErrorCode *status); - -/** - * Function type declaration for UText.replace(). - * - * Replace a range of the original text with a replacement text. - * - * Leaves the current iteration position at the position following the - * newly inserted replacement text. - * - * This function need only be implemented on UText types that support writing. - * - * When using this function, there should be only a single UText opened onto the - * underlying native text string. The function is responsible for updating the - * text chunk within the UText to reflect the updated iteration position, - * taking into account any changes to the underlying string's structure caused - * by the replace operation. - * - * @param ut the UText representing the text to be operated on. - * @param nativeStart the index of the start of the region to be replaced - * @param nativeLimit the index of the character following the region to be replaced. - * @param replacementText pointer to the replacement text - * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. - * @param status receives any error status. Possible errors include - * U_NO_WRITE_PERMISSION - * - * @return The signed number of (native) storage units by which - * the length of the text expanded or contracted. - * - * @stable ICU 3.4 - */ -typedef int32_t U_CALLCONV -UTextReplace(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - const UChar *replacementText, int32_t replacmentLength, - UErrorCode *status); - -/** - * Function type declaration for UText.copy(). - * - * Copy or move a substring from one position to another within the text, - * while retaining any metadata associated with the text. - * This function is used to duplicate or reorder substrings. - * The destination index must not overlap the source range. - * - * The text to be copied or moved is inserted at destIndex; - * it does not replace or overwrite any existing text. - * - * This function need only be implemented for UText types that support writing. - * - * When using this function, there should be only a single UText opened onto the - * underlying native text string. The function is responsible for updating the - * text chunk within the UText to reflect the updated iteration position, - * taking into account any changes to the underlying string's structure caused - * by the replace operation. - * - * @param ut The UText representing the text to be operated on. - * @param nativeStart The index of the start of the region to be copied or moved - * @param nativeLimit The index of the character following the region to be replaced. - * @param nativeDest The destination index to which the source substring is copied or moved. - * @param move If TRUE, then the substring is moved, not copied/duplicated. - * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION - * - * @stable ICU 3.4 - */ -typedef void U_CALLCONV -UTextCopy(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - int64_t nativeDest, - UBool move, - UErrorCode *status); - -/** - * Function type declaration for UText.mapOffsetToNative(). - * Map from the current UChar offset within the current text chunk to - * the corresponding native index in the original source text. - * - * This is required only for text providers that do not use native UTF-16 indexes. - * - * @param ut the UText. - * @return Absolute (native) index corresponding to chunkOffset in the current chunk. - * The returned native index should always be to a code point boundary. - * - * @stable ICU 3.4 - */ -typedef int64_t U_CALLCONV -UTextMapOffsetToNative(const UText *ut); - -/** - * Function type declaration for UText.mapIndexToUTF16(). - * Map from a native index to a UChar offset within a text chunk. - * Behavior is undefined if the native index does not fall within the - * current chunk. - * - * This function is required only for text providers that do not use native UTF-16 indexes. - * - * @param ut The UText containing the text chunk. - * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. - * @return Chunk-relative UTF-16 offset corresponding to the specified native - * index. - * - * @stable ICU 3.4 - */ -typedef int32_t U_CALLCONV -UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); - - -/** - * Function type declaration for UText.utextClose(). - * - * A Text Provider close function is only required for provider types that make - * allocations in their open function (or other functions) that must be - * cleaned when the UText is closed. - * - * The allocation of the UText struct itself and any "extra" storage - * associated with the UText is handled by the common UText implementation - * and does not require provider specific cleanup in a close function. - * - * Most UText provider implementations do not need to implement this function. - * - * @param ut A UText object to be closed. - * - * @stable ICU 3.4 - */ -typedef void U_CALLCONV -UTextClose(UText *ut); - - -/** - * (public) Function dispatch table for UText. - * Conceptually very much like a C++ Virtual Function Table. - * This struct defines the organization of the table. - * Each text provider implementation must provide an - * actual table that is initialized with the appropriate functions - * for the type of text being handled. - * @stable ICU 3.6 - */ -struct UTextFuncs { - /** - * (public) Function table size, sizeof(UTextFuncs) - * Intended for use should the table grow to accommodate added - * functions in the future, to allow tests for older format - * function tables that do not contain the extensions. - * - * Fields are placed for optimal alignment on - * 32/64/128-bit-pointer machines, by normally grouping together - * 4 32-bit fields, - * 4 pointers, - * 2 64-bit fields - * in sequence. - * @stable ICU 3.6 - */ - int32_t tableSize; - - /** - * (private) Alignment padding. - * Do not use, reserved for use by the UText framework only. - * @internal - */ - int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; - - - /** - * (public) Function pointer for UTextClone - * - * @see UTextClone - * @stable ICU 3.6 - */ - UTextClone *clone; - - /** - * (public) function pointer for UTextLength - * May be expensive to compute! - * - * @see UTextLength - * @stable ICU 3.6 - */ - UTextNativeLength *nativeLength; - - /** - * (public) Function pointer for UTextAccess. - * - * @see UTextAccess - * @stable ICU 3.6 - */ - UTextAccess *access; - - /** - * (public) Function pointer for UTextExtract. - * - * @see UTextExtract - * @stable ICU 3.6 - */ - UTextExtract *extract; - - /** - * (public) Function pointer for UTextReplace. - * - * @see UTextReplace - * @stable ICU 3.6 - */ - UTextReplace *replace; - - /** - * (public) Function pointer for UTextCopy. - * - * @see UTextCopy - * @stable ICU 3.6 - */ - UTextCopy *copy; - - /** - * (public) Function pointer for UTextMapOffsetToNative. - * - * @see UTextMapOffsetToNative - * @stable ICU 3.6 - */ - UTextMapOffsetToNative *mapOffsetToNative; - - /** - * (public) Function pointer for UTextMapNativeIndexToUTF16. - * - * @see UTextMapNativeIndexToUTF16 - * @stable ICU 3.6 - */ - UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; - - /** - * (public) Function pointer for UTextClose. - * - * @see UTextClose - * @stable ICU 3.6 - */ - UTextClose *close; - - /** - * (private) Spare function pointer - * @internal - */ - UTextClose *spare1; - - /** - * (private) Spare function pointer - * @internal - */ - UTextClose *spare2; - - /** - * (private) Spare function pointer - * @internal - */ - UTextClose *spare3; - -}; -/** - * Function dispatch table for UText - * @see UTextFuncs - */ -typedef struct UTextFuncs UTextFuncs; - - /** - * UText struct. Provides the interface between the generic UText access code - * and the UText provider code that works on specific kinds of - * text (UTF-8, noncontiguous UTF-16, whatever.) - * - * Applications that are using predefined types of text providers - * to pass text data to ICU services will have no need to view the - * internals of the UText structs that they open. - * - * @stable ICU 3.6 - */ -struct UText { - /** - * (private) Magic. Used to help detect when UText functions are handed - * invalid or uninitialized UText structs. - * utext_openXYZ() functions take an initialized, - * but not necessarily open, UText struct as an - * optional fill-in parameter. This magic field - * is used to check for that initialization. - * Text provider close functions must NOT clear - * the magic field because that would prevent - * reuse of the UText struct. - * @internal - */ - uint32_t magic; - - - /** - * (private) Flags for managing the allocation and freeing of - * memory associated with this UText. - * @internal - */ - int32_t flags; - - - /** - * Text provider properties. This set of flags is maintained by the - * text provider implementation. - * @stable ICU 3.4 - */ - int32_t providerProperties; - - /** - * (public) sizeOfStruct=sizeof(UText) - * Allows possible backward compatible extension. - * - * @stable ICU 3.4 - */ - int32_t sizeOfStruct; - - /* ------ 16 byte alignment boundary ----------- */ - - - /** - * (protected) Native index of the first character position following - * the current chunk. - * @stable ICU 3.6 - */ - int64_t chunkNativeLimit; - - /** - * (protected) Size in bytes of the extra space (pExtra). - * @stable ICU 3.4 - */ - int32_t extraSize; - - /** - * (protected) The highest chunk offset where native indexing and - * chunk (UTF-16) indexing correspond. For UTF-16 sources, value - * will be equal to chunkLength. - * - * @stable ICU 3.6 - */ - int32_t nativeIndexingLimit; - - /* ---- 16 byte alignment boundary------ */ - - /** - * (protected) Native index of the first character in the text chunk. - * @stable ICU 3.6 - */ - int64_t chunkNativeStart; - - /** - * (protected) Current iteration position within the text chunk (UTF-16 buffer). - * This is the index to the character that will be returned by utext_next32(). - * @stable ICU 3.6 - */ - int32_t chunkOffset; - - /** - * (protected) Length the text chunk (UTF-16 buffer), in UChars. - * @stable ICU 3.6 - */ - int32_t chunkLength; - - /* ---- 16 byte alignment boundary-- */ - - - /** - * (protected) pointer to a chunk of text in UTF-16 format. - * May refer either to original storage of the source of the text, or - * if conversion was required, to a buffer owned by the UText. - * @stable ICU 3.6 - */ - const UChar *chunkContents; - - /** - * (public) Pointer to Dispatch table for accessing functions for this UText. - * @stable ICU 3.6 - */ - const UTextFuncs *pFuncs; - - /** - * (protected) Pointer to additional space requested by the - * text provider during the utext_open operation. - * @stable ICU 3.4 - */ - void *pExtra; - - /** - * (protected) Pointer to string or text-containing object or similar. - * This is the source of the text that this UText is wrapping, in a format - * that is known to the text provider functions. - * @stable ICU 3.4 - */ - const void *context; - - /* --- 16 byte alignment boundary--- */ - - /** - * (protected) Pointer fields available for use by the text provider. - * Not used by UText common code. - * @stable ICU 3.6 - */ - const void *p; - /** - * (protected) Pointer fields available for use by the text provider. - * Not used by UText common code. - * @stable ICU 3.6 - */ - const void *q; - /** - * (protected) Pointer fields available for use by the text provider. - * Not used by UText common code. - * @stable ICU 3.6 - */ - const void *r; - - /** - * Private field reserved for future use by the UText framework - * itself. This is not to be touched by the text providers. - * @internal ICU 3.4 - */ - void *privP; - - - /* --- 16 byte alignment boundary--- */ - - - /** - * (protected) Integer field reserved for use by the text provider. - * Not used by the UText framework, or by the client (user) of the UText. - * @stable ICU 3.4 - */ - int64_t a; - - /** - * (protected) Integer field reserved for use by the text provider. - * Not used by the UText framework, or by the client (user) of the UText. - * @stable ICU 3.4 - */ - int32_t b; - - /** - * (protected) Integer field reserved for use by the text provider. - * Not used by the UText framework, or by the client (user) of the UText. - * @stable ICU 3.4 - */ - int32_t c; - - /* ---- 16 byte alignment boundary---- */ - - - /** - * Private field reserved for future use by the UText framework - * itself. This is not to be touched by the text providers. - * @internal ICU 3.4 - */ - int64_t privA; - /** - * Private field reserved for future use by the UText framework - * itself. This is not to be touched by the text providers. - * @internal ICU 3.4 - */ - int32_t privB; - /** - * Private field reserved for future use by the UText framework - * itself. This is not to be touched by the text providers. - * @internal ICU 3.4 - */ - int32_t privC; -}; - - -/** - * Common function for use by Text Provider implementations to allocate and/or initialize - * a new UText struct. To be called in the implementation of utext_open() functions. - * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. - * If the supplied UText is already open, the provider's close function will be called - * so that the struct can be reused by the open that is in progress. - * - * @param ut pointer to a UText struct to be re-used, or null if a new UText - * should be allocated. - * @param extraSpace The amount of additional space to be allocated as part - * of this UText, for use by types of providers that require - * additional storage. - * @param status Errors are returned here. - * @return pointer to the UText, allocated if necessary, with extra space set up if requested. - * @stable ICU 3.4 - */ -U_STABLE UText * U_EXPORT2 -utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); - -#ifndef U_HIDE_INTERNAL_API -/** - * @internal - * Value used to help identify correctly initialized UText structs. - * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. - */ -enum { - UTEXT_MAGIC = 0x345ad82c -}; -#endif /* U_HIDE_INTERNAL_API */ - -/** - * initializer to be used with local (stack) instances of a UText - * struct. UText structs must be initialized before passing - * them to one of the utext_open functions. - * - * @stable ICU 3.6 - */ -#define UTEXT_INITIALIZER { \ - UTEXT_MAGIC, /* magic */ \ - 0, /* flags */ \ - 0, /* providerProps */ \ - sizeof(UText), /* sizeOfStruct */ \ - 0, /* chunkNativeLimit */ \ - 0, /* extraSize */ \ - 0, /* nativeIndexingLimit */ \ - 0, /* chunkNativeStart */ \ - 0, /* chunkOffset */ \ - 0, /* chunkLength */ \ - NULL, /* chunkContents */ \ - NULL, /* pFuncs */ \ - NULL, /* pExtra */ \ - NULL, /* context */ \ - NULL, NULL, NULL, /* p, q, r */ \ - NULL, /* privP */ \ - 0, 0, 0, /* a, b, c */ \ - 0, 0, 0 /* privA,B,C, */ \ - } - - -U_CDECL_END - - - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/utf.h b/deps/node/deps/icu-small/source/common/unicode/utf.h deleted file mode 100644 index aa569806..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/utf.h +++ /dev/null @@ -1,225 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: utf.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep09 -* created by: Markus W. Scherer -*/ - -/** - * \file - * \brief C API: Code point macros - * - * This file defines macros for checking whether a code point is - * a surrogate or a non-character etc. - * - * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h - * and itself includes utf8.h and utf16.h after some - * common definitions. - * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be - * included explicitly if their definitions are used. - * - * utf8.h and utf16.h define macros for efficiently getting code points - * in and out of UTF-8/16 strings. - * utf16.h macros have "U16_" prefixes. - * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling. - * - * ICU mostly processes 16-bit Unicode strings. - * Most of the time, such strings are well-formed UTF-16. - * Single, unpaired surrogates must be handled as well, and are treated in ICU - * like regular code points where possible. - * (Pairs of surrogate code points are indistinguishable from supplementary - * code points encoded as pairs of supplementary code units.) - * - * In fact, almost all Unicode code points in normal text (>99%) - * are on the BMP (<=U+ffff) and even <=U+d7ff. - * ICU functions handle supplementary code points (U+10000..U+10ffff) - * but are optimized for the much more frequently occurring BMP code points. - * - * umachine.h defines UChar to be an unsigned 16-bit integer. - * Since ICU 59, ICU uses char16_t in C++, UChar only in C, - * and defines UChar=char16_t by default. See the UChar API docs for details. - * - * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit - * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1). - * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as - * the definition of UChar. For details see the documentation for UChar32 itself. - * - * utf.h defines a small number of C macros for single Unicode code points. - * These are simple checks for surrogates and non-characters. - * For actual Unicode character properties see uchar.h. - * - * By default, string operations must be done with error checking in case - * a string is not well-formed UTF-16 or UTF-8. - * - * The U16_ macros detect if a surrogate code unit is unpaired - * (lead unit without trail unit or vice versa) and just return the unit itself - * as the code point. - * - * The U8_ macros detect illegal byte sequences and return a negative value. - * Starting with ICU 60, the observable length of a single illegal byte sequence - * skipped by one of these macros follows the Unicode 6+ recommendation - * which is consistent with the W3C Encoding Standard. - * - * There are ..._OR_FFFD versions of both U16_ and U8_ macros - * that return U+FFFD for illegal code unit sequences. - * - * The regular "safe" macros require that the initial, passed-in string index - * is within bounds. They only check the index when they read more than one - * code unit. This is usually done with code similar to the following loop: - *

while(i
- *
- * When it is safe to assume that text is well-formed UTF-16
- * (does not contain single, unpaired surrogates), then one can use
- * U16_..._UNSAFE macros.
- * These do not check for proper code unit sequences or truncated text and may
- * yield wrong results or even cause a crash if they are used with "malformed"
- * text.
- * In practice, U16_..._UNSAFE macros will produce slightly less code but
- * should not be faster because the processing is only different when a
- * surrogate code unit is detected, which will be rare.
- *
- * Similarly for UTF-8, there are "safe" macros without a suffix,
- * and U8_..._UNSAFE versions.
- * The performance differences are much larger here because UTF-8 provides so
- * many opportunities for malformed sequences.
- * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
- * and are fast, while the safe UTF-8 macros call functions for some complicated cases.
- *
- * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
- * code point values (0..U+10ffff). They are indicated with negative values instead.
- *
- * For more information see the ICU User Guide Strings chapter
- * (http://userguide.icu-project.org/strings).
- *
- * Usage:
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used  for if-else-while...
- * bodies and all macro statements should be terminated with semicolon.
- *
- * @stable ICU 2.4
- */
-
-#ifndef __UTF_H__
-#define __UTF_H__
-
-#include "unicode/umachine.h"
-/* include the utfXX.h after the following definitions */
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * Is this code point a Unicode noncharacter?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_UNICODE_NONCHAR(c) \
-    ((c)>=0xfdd0 && \
-     ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff)
-
-/**
- * Is c a Unicode code point value (0..U+10ffff)
- * that can be assigned a character?
- *
- * Code points that are not characters include:
- * - single surrogate code points (U+d800..U+dfff, 2048 code points)
- * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
- * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
- * - the highest Unicode code point value is U+10ffff
- *
- * This means that all code points below U+d800 are character code points,
- * and that boundary is tested first for performance.
- *
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_UNICODE_CHAR(c) \
-    ((uint32_t)(c)<0xd800 || \
-        (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
-
-/**
- * Is this code point a BMP code point (U+0000..U+ffff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.8
- */
-#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
-
-/**
- * Is this code point a supplementary code point (U+10000..U+10ffff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.8
- */
-#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
-
-/**
- * Is this code point a lead surrogate (U+d800..U+dbff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
-
-/**
- * Is this code point a trail surrogate (U+dc00..U+dfff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
-
-/**
- * Is this code point a surrogate (U+d800..U+dfff)?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
-
-/**
- * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
- * is it a lead surrogate?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
-
-/**
- * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
- * is it a trail surrogate?
- * @param c 32-bit code point
- * @return TRUE or FALSE
- * @stable ICU 4.2
- */
-#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
-
-/* include the utfXX.h ------------------------------------------------------ */
-
-#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
-
-#include "unicode/utf8.h"
-#include "unicode/utf16.h"
-
-/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
-#include "unicode/utf_old.h"
-
-#endif  /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
-
-#endif  /* __UTF_H__ */
diff --git a/deps/node/deps/icu-small/source/common/unicode/utf16.h b/deps/node/deps/icu-small/source/common/unicode/utf16.h
deleted file mode 100644
index 0908b4f0..00000000
--- a/deps/node/deps/icu-small/source/common/unicode/utf16.h
+++ /dev/null
@@ -1,733 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-*   Copyright (C) 1999-2012, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf16.h
-*   encoding:   UTF-8
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep09
-*   created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: 16-bit Unicode handling macros
- *
- * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
- *
- * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://userguide.icu-project.org/strings).
- *
- * Usage:
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used  for if-else-while...
- * bodies and all macro statements should be terminated with semicolon.
- */
-
-#ifndef __UTF16_H__
-#define __UTF16_H__
-
-#include "unicode/umachine.h"
-#ifndef __UTF_H__
-#   include "unicode/utf.h"
-#endif
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * Does this code unit alone encode a code point (BMP, not a surrogate)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
-
-/**
- * Is this code unit a lead surrogate (U+d800..U+dbff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
-
-/**
- * Is this code unit a trail surrogate (U+dc00..U+dfff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
-
-/**
- * Is this code unit a surrogate (U+d800..U+dfff)?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
-
-/**
- * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
- * is it a lead surrogate?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
-
-/**
- * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
- * is it a trail surrogate?
- * @param c 16-bit code unit
- * @return TRUE or FALSE
- * @stable ICU 4.2
- */
-#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
-
-/**
- * Helper constant for U16_GET_SUPPLEMENTARY.
- * @internal
- */
-#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
-
-/**
- * Get a supplementary code point value (U+10000..U+10ffff)
- * from its lead and trail surrogates.
- * The result is undefined if the input values are not
- * lead and trail surrogates.
- *
- * @param lead lead surrogate (U+d800..U+dbff)
- * @param trail trail surrogate (U+dc00..U+dfff)
- * @return supplementary code point (U+10000..U+10ffff)
- * @stable ICU 2.4
- */
-#define U16_GET_SUPPLEMENTARY(lead, trail) \
-    (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
-
-
-/**
- * Get the lead surrogate (0xd800..0xdbff) for a
- * supplementary code point (0x10000..0x10ffff).
- * @param supplementary 32-bit code point (U+10000..U+10ffff)
- * @return lead surrogate (U+d800..U+dbff) for supplementary
- * @stable ICU 2.4
- */
-#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
-
-/**
- * Get the trail surrogate (0xdc00..0xdfff) for a
- * supplementary code point (0x10000..0x10ffff).
- * @param supplementary 32-bit code point (U+10000..U+10ffff)
- * @return trail surrogate (U+dc00..U+dfff) for supplementary
- * @stable ICU 2.4
- */
-#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
-
-/**
- * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
- * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
- * @param c 32-bit code point
- * @return 1 or 2
- * @stable ICU 2.4
- */
-#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
-
-/**
- * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
- * @return 2
- * @stable ICU 2.4
- */
-#define U16_MAX_LENGTH 2
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- * The result is undefined if the offset points to a single, unpaired surrogate.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_GET
- * @stable ICU 2.4
- */
-#define U16_GET_UNSAFE(s, i, c) { \
-    (c)=(s)[i]; \
-    if(U16_IS_SURROGATE(c)) { \
-        if(U16_IS_SURROGATE_LEAD(c)) { \
-            (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
-        } else { \
-            (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
-        } \
-    } \
-}
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * If the offset points to a single, unpaired surrogate, then
- * c is set to that unpaired surrogate.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
-                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
-            } \
-        } \
-    } \
-}
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The offset may point to either the lead or trail surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the adjacent matching surrogate as well.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * If the offset points to a single, unpaired surrogate, then
- * c is set to U+FFFD.
- * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
-                (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
-            } else { \
-                (c)=0xfffd; \
-            } \
-        } \
-    } \
-}
-
-/* definitions with forward iteration --------------------------------------- */
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The offset may point to the lead surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the following trail surrogate as well.
- * If the offset points to a trail surrogate, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset points to a single, unpaired lead surrogate.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_NEXT
- * @stable ICU 2.4
- */
-#define U16_NEXT_UNSAFE(s, i, c) { \
-    (c)=(s)[(i)++]; \
-    if(U16_IS_LEAD(c)) { \
-        (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
-    } \
-}
-
-/**
- * Get a code point from a string at a code point boundary offset,
- * and advance the offset to the next code point boundary.
- * (Post-incrementing forward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * The offset may point to the lead surrogate unit
- * for a supplementary code point, in which case the macro will read
- * the following trail surrogate as well.
- * If the offset points to a trail surrogate or
- * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
- *
- * @param s const UChar * string
- * @param i string offset, must be i>10)+0xd7c0); \
-        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
-    } \
-}
-
-/**
- * Append a code point to a string, overwriting 1 or 2 code units.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Safe" macro, checks for a valid code point.
- * If a surrogate pair is written, checks for sufficient space in the string.
- * If the code point is not valid or a trail surrogate does not fit,
- * then isError is set to TRUE.
- *
- * @param s const UChar * string buffer
- * @param i string offset, must be i>10)+0xd7c0); \
-        (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
-    } else /* c>0x10ffff or not enough space */ { \
-        (isError)=TRUE; \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_FWD_1
- * @stable ICU 2.4
- */
-#define U16_FWD_1_UNSAFE(s, i) { \
-    if(U16_IS_LEAD((s)[(i)++])) { \
-        ++(i); \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const UChar * string
- * @param i string offset, must be i0) { \
-        U16_FWD_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const UChar * string
- * @param i int32_t string offset, must be i0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
-        U16_FWD_1(s, i, length); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to the trail surrogate of a surrogate pair,
- * then the offset is decremented.
- * Otherwise, it is not modified.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_SET_CP_START
- * @stable ICU 2.4
- */
-#define U16_SET_CP_START_UNSAFE(s, i) { \
-    if(U16_IS_TRAIL((s)[i])) { \
-        --(i); \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to the trail surrogate of a surrogate pair,
- * then the offset is decremented.
- * Otherwise, it is not modified.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i
- * @see U16_SET_CP_START_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_SET_CP_START(s, start, i) { \
-    if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
-        --(i); \
-    } \
-}
-
-/* definitions with backward iteration -------------------------------------- */
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset is behind a single, unpaired trail surrogate.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U16_PREV
- * @stable ICU 2.4
- */
-#define U16_PREV_UNSAFE(s, i, c) { \
-    (c)=(s)[--(i)]; \
-    if(U16_IS_TRAIL(c)) { \
-        (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then c is set to that unpaired surrogate.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
-            --(i); \
-            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
-        } \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a trail surrogate unit
- * for a supplementary code point, then the macro will read
- * the preceding lead surrogate as well.
- * If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then c is set to U+FFFD.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
-            --(i); \
-            (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
-        } else { \
-            (c)=0xfffd; \
-        } \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_BACK_1
- * @stable ICU 2.4
- */
-#define U16_BACK_1_UNSAFE(s, i) { \
-    if(U16_IS_TRAIL((s)[--(i)])) { \
-        --(i); \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start(start) && U16_IS_LEAD((s)[(i)-1])) { \
-        --(i); \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U16_BACK_N
- * @stable ICU 2.4
- */
-#define U16_BACK_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        U16_BACK_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * @param s const UChar * string
- * @param start start of string
- * @param i string offset, must be start0 && (i)>(start)) { \
-        U16_BACK_1(s, start, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind the lead surrogate of a surrogate pair,
- * then the offset is incremented.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-16.
- *
- * @param s const UChar * string
- * @param i string offset
- * @see U16_SET_CP_LIMIT
- * @stable ICU 2.4
- */
-#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
-    if(U16_IS_LEAD((s)[(i)-1])) { \
-        ++(i); \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind the lead surrogate of a surrogate pair,
- * then the offset is incremented.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const UChar * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, start<=i<=length
- * @param length int32_t string length
- * @see U16_SET_CP_LIMIT_UNSAFE
- * @stable ICU 2.4
- */
-#define U16_SET_CP_LIMIT(s, start, i, length) { \
-    if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
-        ++(i); \
-    } \
-}
-
-#endif
diff --git a/deps/node/deps/icu-small/source/common/unicode/utf32.h b/deps/node/deps/icu-small/source/common/unicode/utf32.h
deleted file mode 100644
index 8822c4dd..00000000
--- a/deps/node/deps/icu-small/source/common/unicode/utf32.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-*   Copyright (C) 1999-2001, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf32.h
-*   encoding:   UTF-8
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep20
-*   created by: Markus W. Scherer
-*/
-/**
- * \file
- * \brief C API: UTF-32 macros
- *
- * This file is obsolete and its contents moved to utf_old.h.
- * See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list
- * in September 2002.
- */
diff --git a/deps/node/deps/icu-small/source/common/unicode/utf8.h b/deps/node/deps/icu-small/source/common/unicode/utf8.h
deleted file mode 100644
index 1f076343..00000000
--- a/deps/node/deps/icu-small/source/common/unicode/utf8.h
+++ /dev/null
@@ -1,882 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-*   Copyright (C) 1999-2015, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf8.h
-*   encoding:   UTF-8
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 1999sep13
-*   created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: 8-bit Unicode handling macros
- *
- * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
- *
- * For more information see utf.h and the ICU User Guide Strings chapter
- * (http://userguide.icu-project.org/strings).
- *
- * Usage:
- * ICU coding guidelines for if() statements should be followed when using these macros.
- * Compound statements (curly braces {}) must be used  for if-else-while...
- * bodies and all macro statements should be terminated with semicolon.
- */
-
-#ifndef __UTF8_H__
-#define __UTF8_H__
-
-#include "unicode/umachine.h"
-#ifndef __UTF_H__
-#   include "unicode/utf.h"
-#endif
-
-/* internal definitions ----------------------------------------------------- */
-
-/**
- * Counts the trail bytes for a UTF-8 lead byte.
- * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
- * leadByte might be evaluated multiple times.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is called by public macros in this file and thus must remain stable.
- *
- * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
- * @internal
- */
-#define U8_COUNT_TRAIL_BYTES(leadByte) \
-    (U8_IS_LEAD(leadByte) ? \
-        ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
-
-/**
- * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
- * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
- * leadByte might be evaluated multiple times.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is called by public macros in this file and thus must remain stable.
- *
- * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
- * @internal
- */
-#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
-    (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
-
-/**
- * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is called by public macros in this file and thus must remain stable.
- * @internal
- */
-#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
-
-/**
- * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
- * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
- * Lead byte E0..EF bits 3..0 are used as byte index,
- * first trail byte bits 7..5 are used as bit index into that byte.
- * @see U8_IS_VALID_LEAD3_AND_T1
- * @internal
- */
-#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
-
-/**
- * Internal 3-byte UTF-8 validity check.
- * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
- * @internal
- */
-#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
-
-/**
- * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
- * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
- * First trail byte bits 7..4 are used as byte index,
- * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
- * @see U8_IS_VALID_LEAD4_AND_T1
- * @internal
- */
-#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
-
-/**
- * Internal 4-byte UTF-8 validity check.
- * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
- * @internal
- */
-#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
-
-/**
- * Function for handling "next code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE UChar32 U_EXPORT2
-utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
-
-/**
- * Function for handling "append code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE int32_t U_EXPORT2
-utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
-
-/**
- * Function for handling "previous code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE UChar32 U_EXPORT2
-utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
-
-/**
- * Function for handling "skip backward one code point" with error-checking.
- *
- * This is internal since it is not meant to be called directly by external clients;
- * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
- * file and thus must remain stable, and should not be hidden when other internal
- * functions are hidden (otherwise public macros would fail to compile).
- * @internal
- */
-U_STABLE int32_t U_EXPORT2
-utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
-
-/* single-code point definitions -------------------------------------------- */
-
-/**
- * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_SINGLE(c) (((c)&0x80)==0)
-
-/**
- * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
-// 0x32=0xf4-0xc2
-
-/**
- * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
- * @param c 8-bit code unit (byte)
- * @return TRUE or FALSE
- * @stable ICU 2.4
- */
-#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
-
-/**
- * How many code units (bytes) are used for the UTF-8 encoding
- * of this Unicode code point?
- * @param c 32-bit code point
- * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
- * @stable ICU 2.4
- */
-#define U8_LENGTH(c) \
-    ((uint32_t)(c)<=0x7f ? 1 : \
-        ((uint32_t)(c)<=0x7ff ? 2 : \
-            ((uint32_t)(c)<=0xd7ff ? 3 : \
-                ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
-                    ((uint32_t)(c)<=0xffff ? 3 : 4)\
-                ) \
-            ) \
-        ) \
-    )
-
-/**
- * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
- * @return 4
- * @stable ICU 2.4
- */
-#define U8_MAX_LENGTH 4
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * The offset may point to either the lead byte or one of the trail bytes
- * for a code point, in which case the macro will read all of the bytes
- * for the code point.
- * The result is undefined if the offset points to an illegal UTF-8
- * byte sequence.
- * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_GET
- * @stable ICU 2.4
- */
-#define U8_GET_UNSAFE(s, i, c) { \
-    int32_t _u8_get_unsafe_index=(int32_t)(i); \
-    U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
-    U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
-}
-
-/**
- * Get a code point from a string at a random-access offset,
- * without changing the offset.
- * The offset may point to either the lead byte or one of the trail bytes
- * for a code point, in which case the macro will read all of the bytes
- * for the code point.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * If the offset points to an illegal UTF-8 byte sequence, then
- * c is set to a negative value.
- * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset
- * @param i int32_t string offset, must be start<=i=0xe0 ? \
-                ((c)<0xf0 ?  /* U+0800..U+FFFF except surrogates */ \
-                    U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
-                    (__t&=0x3f, 1) \
-                :  /* U+10000..U+10FFFF */ \
-                    ((c)-=0xf0)<=4 && \
-                    U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
-                    ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
-                    (__t=(s)[i]-0x80)<=0x3f) && \
-                /* valid second-to-last trail byte */ \
-                ((c)=((c)<<6)|__t, ++(i)!=(length)) \
-            :  /* U+0080..U+07FF */ \
-                (c)>=0xc2 && ((c)&=0x1f, 1)) && \
-            /* last trail byte */ \
-            (__t=(s)[i]-0x80)<=0x3f && \
-            ((c)=((c)<<6)|__t, ++(i), 1)) { \
-        } else { \
-            (c)=(sub);  /* ill-formed*/ \
-        } \
-    } \
-}
-
-/**
- * Append a code point to a string, overwriting 1 to 4 bytes.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
- * Otherwise, the result is undefined.
- *
- * @param s const uint8_t * string buffer
- * @param i string offset
- * @param c code point to append
- * @see U8_APPEND
- * @stable ICU 2.4
- */
-#define U8_APPEND_UNSAFE(s, i, c) { \
-    uint32_t __uc=(c); \
-    if(__uc<=0x7f) { \
-        (s)[(i)++]=(uint8_t)__uc; \
-    } else { \
-        if(__uc<=0x7ff) { \
-            (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
-        } else { \
-            if(__uc<=0xffff) { \
-                (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
-            } else { \
-                (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
-                (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
-            } \
-            (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
-        } \
-        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
-    } \
-}
-
-/**
- * Append a code point to a string, overwriting 1 to 4 bytes.
- * The offset points to the current end of the string contents
- * and is advanced (post-increment).
- * "Safe" macro, checks for a valid code point.
- * If a non-ASCII code point is written, checks for sufficient space in the string.
- * If the code point is not valid or trail bytes do not fit,
- * then isError is set to TRUE.
- *
- * @param s const uint8_t * string buffer
- * @param i int32_t string offset, must be i>6)|0xc0); \
-        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
-    } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
-        (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
-        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
-        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
-    } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
-        (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
-        (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
-        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
-        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
-    } else { \
-        (isError)=TRUE; \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_FWD_1
- * @stable ICU 2.4
- */
-#define U8_FWD_1_UNSAFE(s, i) { \
-    (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
-}
-
-/**
- * Advance the string offset from one code point boundary to the next.
- * (Post-incrementing iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const uint8_t * string
- * @param i int32_t string offset, must be i=0xf0 */ { \
-            if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
-                    ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
-                    ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
-                ++(i); \
-            } \
-        } \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param n number of code points to skip
- * @see U8_FWD_N
- * @stable ICU 2.4
- */
-#define U8_FWD_N_UNSAFE(s, i, n) { \
-    int32_t __N=(n); \
-    while(__N>0) { \
-        U8_FWD_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Advance the string offset from one code point boundary to the n-th next one,
- * i.e., move forward by n code points.
- * (Post-incrementing iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const uint8_t * string
- * @param i int32_t string offset, must be i0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
-        U8_FWD_1(s, i, length); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to a UTF-8 trail byte,
- * then the offset is moved backward to the corresponding lead byte.
- * Otherwise, it is not modified.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_SET_CP_START
- * @stable ICU 2.4
- */
-#define U8_SET_CP_START_UNSAFE(s, i) { \
-    while(U8_IS_TRAIL((s)[i])) { --(i); } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary
- * at the start of a code point.
- * If the offset points to a UTF-8 trail byte,
- * then the offset is moved backward to the corresponding lead byte.
- * Otherwise, it is not modified.
- *
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start<=i
- * @see U8_SET_CP_START_UNSAFE
- * @see U8_TRUNCATE_IF_INCOMPLETE
- * @stable ICU 2.4
- */
-#define U8_SET_CP_START(s, start, i) { \
-    if(U8_IS_TRAIL((s)[(i)])) { \
-        (i)=utf8_back1SafeBody(s, start, (i)); \
-    } \
-}
-
-#ifndef U_HIDE_DRAFT_API
-/**
- * If the string ends with a UTF-8 byte sequence that is valid so far
- * but incomplete, then reduce the length of the string to end before
- * the lead byte of that incomplete sequence.
- * For example, if the string ends with E1 80, the length is reduced by 2.
- *
- * In all other cases (the string ends with a complete sequence, or it is not
- * possible for any further trail byte to extend the trailing sequence)
- * the length remains unchanged.
- *
- * Useful for processing text split across multiple buffers
- * (save the incomplete sequence for later)
- * and for optimizing iteration
- * (check for string length only once per character).
- *
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- * Unlike U8_SET_CP_START(), this macro never reads s[length].
- *
- * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param length int32_t string length (usually start<=length)
- * @see U8_SET_CP_START
- * @draft ICU 61
- */
-#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
-    if((length)>(start)) { \
-        uint8_t __b1=s[(length)-1]; \
-        if(U8_IS_SINGLE(__b1)) { \
-            /* common ASCII character */ \
-        } else if(U8_IS_LEAD(__b1)) { \
-            --(length); \
-        } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
-            uint8_t __b2=s[(length)-2]; \
-            if(0xe0<=__b2 && __b2<=0xf4) { \
-                if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
-                        U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
-                    (length)-=2; \
-                } \
-            } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
-                uint8_t __b3=s[(length)-3]; \
-                if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
-                    (length)-=3; \
-                } \
-            } \
-        } \
-    }
-#endif  // U_HIDE_DRAFT_API
-
-/* definitions with backward iteration -------------------------------------- */
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a multi-byte sequence, then the macro will read
- * the whole sequence.
- * If the offset is behind a lead byte, then that itself
- * will be returned as the code point.
- * The result is undefined if the offset is behind an illegal UTF-8 sequence.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @param c output UChar32 variable
- * @see U8_PREV
- * @stable ICU 2.4
- */
-#define U8_PREV_UNSAFE(s, i, c) { \
-    (c)=(uint8_t)(s)[--(i)]; \
-    if(U8_IS_TRAIL(c)) { \
-        uint8_t __b, __count=1, __shift=6; \
-\
-        /* c is a trail byte */ \
-        (c)&=0x3f; \
-        for(;;) { \
-            __b=(s)[--(i)]; \
-            if(__b>=0xc0) { \
-                U8_MASK_LEAD_BYTE(__b, __count); \
-                (c)|=(UChar32)__b<<__shift; \
-                break; \
-            } else { \
-                (c)|=(UChar32)(__b&0x3f)<<__shift; \
-                ++__count; \
-                __shift+=6; \
-            } \
-        } \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the previous one
- * and get the code point between them.
- * (Pre-decrementing backward iteration.)
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The input offset may be the same as the string length.
- * If the offset is behind a multi-byte sequence, then the macro will read
- * the whole sequence.
- * If the offset is behind a lead byte, then that itself
- * will be returned as the code point.
- * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start0) { \
-        U8_BACK_1_UNSAFE(s, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Move the string offset from one code point boundary to the n-th one before it,
- * i.e., move backward by n code points.
- * (Pre-decrementing backward iteration.)
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * @param s const uint8_t * string
- * @param start int32_t index of the start of the string
- * @param i int32_t string offset, must be start0 && (i)>(start)) { \
-        U8_BACK_1(s, start, i); \
-        --__N; \
-    } \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind a partial multi-byte sequence,
- * then the offset is incremented to behind the whole sequence.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Unsafe" macro, assumes well-formed UTF-8.
- *
- * @param s const uint8_t * string
- * @param i string offset
- * @see U8_SET_CP_LIMIT
- * @stable ICU 2.4
- */
-#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
-    U8_BACK_1_UNSAFE(s, i); \
-    U8_FWD_1_UNSAFE(s, i); \
-}
-
-/**
- * Adjust a random-access offset to a code point boundary after a code point.
- * If the offset is behind a partial multi-byte sequence,
- * then the offset is incremented to behind the whole sequence.
- * Otherwise, it is not modified.
- * The input offset may be the same as the string length.
- * "Safe" macro, checks for illegal sequences and for string boundaries.
- *
- * The length can be negative for a NUL-terminated string.
- *
- * @param s const uint8_t * string
- * @param start int32_t starting string offset (usually 0)
- * @param i int32_t string offset, must be start<=i<=length
- * @param length int32_t string length
- * @see U8_SET_CP_LIMIT_UNSAFE
- * @stable ICU 2.4
- */
-#define U8_SET_CP_LIMIT(s, start, i, length) { \
-    if((start)<(i) && ((i)<(length) || (length)<0)) { \
-        U8_BACK_1(s, start, i); \
-        U8_FWD_1(s, i, length); \
-    } \
-}
-
-#endif
diff --git a/deps/node/deps/icu-small/source/common/unicode/utf_old.h b/deps/node/deps/icu-small/source/common/unicode/utf_old.h
deleted file mode 100644
index 55c17c01..00000000
--- a/deps/node/deps/icu-small/source/common/unicode/utf_old.h
+++ /dev/null
@@ -1,1204 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-*
-*   Copyright (C) 2002-2012, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-*
-*******************************************************************************
-*   file name:  utf_old.h
-*   encoding:   UTF-8
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002sep21
-*   created by: Markus W. Scherer
-*/
-
-/**
- * \file
- * \brief C API: Deprecated macros for Unicode string handling
- */
-
-/**
- *
- * The macros in utf_old.h are all deprecated and their use discouraged.
- * Some of the design principles behind the set of UTF macros
- * have changed or proved impractical.
- * Almost all of the old "UTF macros" are at least renamed.
- * If you are looking for a new equivalent to an old macro, please see the
- * comment at the old one.
- *
- * Brief summary of reasons for deprecation:
- * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
- *   was impractical.
- * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
- *   was of little use and impractical.
- * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
- *   selection framework: UTF32_ macros (all trivial)
- *   and UTF_ default and intermediate macros (all aliases).
- * - The selection framework also caused many macro aliases.
- * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
- * - Change of language in Unicode standard:
- *   Growing distinction between internal x-bit Unicode strings and external UTF-x
- *   forms, with the former more lenient.
- *   Suggests renaming of UTF16_ macros to U16_.
- * - The prefix "UTF_" without a width number confused some users.
- * - "Safe" append macros needed the addition of an error indicator output.
- * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
- *   to indicate error conditions.
- * - The use of the "_CHAR" infix for code point operations confused some users.
- *
- * More details:
- *
- * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
- * for string processing, and among unsafe/safe/strict default macros for that.
- *
- * It proved nearly impossible to write non-trivial, high-performance code
- * that is UTF-generic.
- * Unsafe default macros would be dangerous for default string processing,
- * and the main reason for the "strict" versions disappeared:
- * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
- * The only other conditions that "strict" checked for were non-characters,
- * which are valid during processing. Only during text input/output should they
- * be checked, and at that time other well-formedness checks may be
- * necessary or useful as well.
- * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
- * or U_IS_UNICODE_CHAR.
- *
- * The old UTF8_..._SAFE macros also used some normal Unicode code points
- * to indicate malformed sequences.
- * The new UTF8_ macros without suffix use negative values instead.
- *
- * The entire contents of utf32.h was moved here without replacement
- * because all those macros were trivial and
- * were meaningful only in the framework of choosing the UTF size.
- *
- * See Jitterbug 2150 and its discussion on the ICU mailing list
- * in September 2002.
- *
- * 
- * - * Obsolete part of pre-ICU 2.4 utf.h file documentation: - * - *

The original concept for these files was for ICU to allow - * in principle to set which UTF (UTF-8/16/32) is used internally - * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type - * accordingly. UTF-16 was the default.

- * - *

This concept has been abandoned. - * A lot of the ICU source code assumes UChar strings are in UTF-16. - * This is especially true for low-level code like - * conversion, normalization, and collation. - * The utf.h header enforces the default of UTF-16. - * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.

- * - *

Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then - * UChar is defined to be exactly wchar_t, otherwise uint16_t.

- * - *

UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit - * Unicode code point (Unicode scalar value, 0..0x10ffff). - * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as - * the definition of UChar. For details see the documentation for UChar32 itself.

- * - *

utf.h also defines a number of C macros for handling single Unicode code points and - * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual - * implementations of those macros and then aliases one set of them (for UTF-16) for general use. - * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while - * the general alias macros always begin with UTF_...

- * - *

Many string operations can be done with or without error checking. - * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe" - * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause - * program failures if the strings are not well-formed. The safe macros have an additional, boolean - * parameter "strict". If strict is FALSE, then only illegal sequences are detected. - * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates). - * Safe macros return special error code points for illegal/irregular sequences: - * Typically, U+ffff, or values that would result in a code unit sequence of the same length - * as the erroneous input sequence.
- * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and - * they do not have start/length parameters for boundary checking.

- * - *

Here, the macros are aliased in two steps: - * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are - * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures. - * Then, in a second step, the default, general alias macros are set to use either the unsafe or - * the safe/not strict (default) or the safe/strict macro; - * these general macros do not have a strictness parameter.

- * - *

It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict. - * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for - * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.

- * - *

For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix. - * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias. - * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds, - * then the _UNSAFE version may be used. - * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.

- * - *
- * - * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead. - */ - -#ifndef __UTF_OLD_H__ -#define __UTF_OLD_H__ - -/** - * \def U_HIDE_OBSOLETE_UTF_OLD_H - * - * Hides the obsolete definitions in unicode/utf_old.h. - * Recommended to be set to 1 at compile time to make sure - * the long-deprecated macros are no longer used. - * - * For reasons for the deprecation see the utf_old.h file comments. - * - * @internal - */ -#ifndef U_HIDE_OBSOLETE_UTF_OLD_H -# define U_HIDE_OBSOLETE_UTF_OLD_H 0 -#endif - -#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H - -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" - -/* Formerly utf.h, part 1 --------------------------------------------------- */ - -#ifdef U_USE_UTF_DEPRECATES -/** - * Unicode string and array offset and index type. - * ICU always counts Unicode code units (UChars) for - * string offsets, indexes, and lengths, not Unicode code points. - * - * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release. - */ -typedef int32_t UTextOffset; -#endif - -/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF_SIZE 16 - -/** - * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations - * with strict=FALSE. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_SAFE -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#undef UTF_UNSAFE -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#undef UTF_STRICT - -/** - * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8, - * which need 1 or 2 bytes in UTF-8: - * \code - * U+0015 = NAK = Negative Acknowledge, C0 control character - * U+009f = highest C1 control character - * \endcode - * - * These are used by UTF8_..._SAFE macros so that they can return an error value - * that needs the same number of code units (bytes) as were seen by - * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID(). - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF8_ERROR_VALUE_1 0x15 - -/** - * See documentation on UTF8_ERROR_VALUE_1 for details. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF8_ERROR_VALUE_2 0x9f - -/** - * Error value for all UTFs. This code point value will be set by macros with error - * checking if an error is detected. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_ERROR_VALUE 0xffff - -/** - * Is a given 32-bit code an error value - * as returned by one of the macros for any UTF? - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_IS_ERROR(c) \ - (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) - -/** - * This is a combined macro: Is c a valid Unicode value _and_ not an error code? - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_IS_VALID(c) \ - (UTF_IS_UNICODE_CHAR(c) && \ - (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) - -/** - * Is this code unit or code point a surrogate (U+d800..U+dfff)? - * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h. - */ -#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) - -/** - * Is a given 32-bit code point a Unicode noncharacter? - * - * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h. - */ -#define UTF_IS_UNICODE_NONCHAR(c) \ - ((c)>=0xfdd0 && \ - ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ - (uint32_t)(c)<=0x10ffff) - -/** - * Is a given 32-bit value a Unicode code point value (0..U+10ffff) - * that can be assigned a character? - * - * Code points that are not characters include: - * - single surrogate code points (U+d800..U+dfff, 2048 code points) - * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) - * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) - * - the highest Unicode code point value is U+10ffff - * - * This means that all code points below U+d800 are character code points, - * and that boundary is tested first for performance. - * - * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h. - */ -#define UTF_IS_UNICODE_CHAR(c) \ - ((uint32_t)(c)<0xd800 || \ - ((uint32_t)(c)>0xdfff && \ - (uint32_t)(c)<=0x10ffff && \ - !UTF_IS_UNICODE_NONCHAR(c))) - -/* Formerly utf8.h ---------------------------------------------------------- */ - -/** -* \var utf8_countTrailBytes -* Internal array with numbers of trail bytes for any given byte used in -* lead byte position. -* -* This is internal since it is not meant to be called directly by external clients; -* however it is called by public macros in this file and thus must remain stable, -* and should not be hidden when other internal functions are hidden (otherwise -* public macros would fail to compile). -* @internal -*/ -#ifdef U_UTF8_IMPL -// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes. -#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) -U_CFUNC const uint8_t utf8_countTrailBytes[]; -#else -U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_IMPORT*/ -#endif - -/** - * Count the trail bytes for a UTF-8 lead byte. - * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h. - */ -#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) - -/** - * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. - * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h. - */ -#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) - -/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */ -#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) -/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */ -#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) -/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */ -#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) - -/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */ -#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) - -/** - * Given the lead character, how many bytes are taken by this code point. - * ICU does not deal with code points >0x10ffff - * unless necessary for advancing in the byte stream. - * - * These length macros take into account that for values >0x10ffff - * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff - * with 3 bytes. - * Code point comparisons need to be in uint32_t because UChar32 - * may be a signed type, and negative values must be recognized. - * - * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h. - */ -#if 1 -# define UTF8_CHAR_LENGTH(c) \ - ((uint32_t)(c)<=0x7f ? 1 : \ - ((uint32_t)(c)<=0x7ff ? 2 : \ - ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ - ) \ - ) -#else -# define UTF8_CHAR_LENGTH(c) \ - ((uint32_t)(c)<=0x7f ? 1 : \ - ((uint32_t)(c)<=0x7ff ? 2 : \ - ((uint32_t)(c)<=0xffff ? 3 : \ - ((uint32_t)(c)<=0x10ffff ? 4 : \ - ((uint32_t)(c)<=0x3ffffff ? 5 : \ - ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ - ) \ - ) \ - ) \ - ) \ - ) -#endif - -/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */ -#define UTF8_MAX_CHAR_LENGTH 4 - -/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF8_ARRAY_SIZE(size) ((5*(size))/2) - -/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */ -#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ - int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ - UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ - UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ -} - -/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */ -#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ - int32_t _utf8_get_char_safe_index=(int32_t)(i); \ - UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ - UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ -} - -/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */ -#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[(i)++]; \ - if((uint8_t)((c)-0xc0)<0x35) { \ - uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ - UTF8_MASK_LEAD_BYTE(c, __count); \ - switch(__count) { \ - /* each following branch falls through to the next one */ \ - case 3: \ - (c)=((c)<<6)|((s)[(i)++]&0x3f); \ - case 2: \ - (c)=((c)<<6)|((s)[(i)++]&0x3f); \ - case 1: \ - (c)=((c)<<6)|((s)[(i)++]&0x3f); \ - /* no other branches to optimize switch() */ \ - break; \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */ -#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else { \ - if((uint32_t)(c)<=0x7ff) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - } else { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - } else { \ - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ - } \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - } \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF8_FWD_1_UNSAFE(s, i) { \ - (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ -} - -/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF8_FWD_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF8_FWD_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ - while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ -} - -/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */ -#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ - (c)=(s)[(i)++]; \ - if((c)>=0x80) { \ - if(UTF8_IS_LEAD(c)) { \ - (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ - } else { \ - (c)=UTF8_ERROR_VALUE_1; \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */ -#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else { \ - (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */ -#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) - -/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */ -#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */ -#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */ -#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[--(i)]; \ - if(UTF8_IS_TRAIL(c)) { \ - uint8_t __b, __count=1, __shift=6; \ -\ - /* c is a trail byte */ \ - (c)&=0x3f; \ - for(;;) { \ - __b=(s)[--(i)]; \ - if(__b>=0xc0) { \ - UTF8_MASK_LEAD_BYTE(__b, __count); \ - (c)|=(UChar32)__b<<__shift; \ - break; \ - } else { \ - (c)|=(UChar32)(__b&0x3f)<<__shift; \ - ++__count; \ - __shift+=6; \ - } \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF8_BACK_1_UNSAFE(s, i) { \ - while(UTF8_IS_TRAIL((s)[--(i)])) {} \ -} - -/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF8_BACK_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF8_BACK_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ - UTF8_BACK_1_UNSAFE(s, i); \ - UTF8_FWD_1_UNSAFE(s, i); \ -} - -/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */ -#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ - (c)=(s)[--(i)]; \ - if((c)>=0x80) { \ - if((c)<=0xbf) { \ - (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ - } else { \ - (c)=UTF8_ERROR_VALUE_1; \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */ -#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */ -#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */ -#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) - -/* Formerly utf16.h --------------------------------------------------------- */ - -/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */ -#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) - -/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */ -#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) - -/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */ -#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) - -/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */ -#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) - -/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */ -#define UTF16_GET_PAIR_VALUE(first, second) \ - (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) - -/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ -#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) - -/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ -#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) - -/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ -#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) - -/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ -#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) - -/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */ -#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) - -/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */ -#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) - -/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */ -#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) - -/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */ -#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) - -/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */ -#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) - -/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */ -#define UTF16_MAX_CHAR_LENGTH 2 - -/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF16_ARRAY_SIZE(size) (size) - -/** - * Get a single code point from an offset that points to any - * of the code units that belong to that code point. - * Assume 0<=i=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ - (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ - /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ - } else if(strict) {\ - /* unmatched second surrogate */ \ - (c)=UTF_ERROR_VALUE; \ - } \ - } \ - } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ -#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[(i)++]; \ - if(UTF_IS_FIRST_SURROGATE(c)) { \ - (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ -#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint16_t)(c); \ - } else { \ - (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ - (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF16_FWD_1_UNSAFE(s, i) { \ - if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ - ++(i); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF16_FWD_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF16_FWD_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ - if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ - --(i); \ - } \ -} - -/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ -#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ - (c)=(s)[(i)++]; \ - if(UTF_IS_FIRST_SURROGATE(c)) { \ - uint16_t __c2; \ - if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ - ++(i); \ - (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ - /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ - } else if(strict) {\ - /* unmatched first surrogate */ \ - (c)=UTF_ERROR_VALUE; \ - } \ - } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ - /* unmatched second surrogate or other non-character */ \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ -#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint16_t)(c); \ - } else if((uint32_t)(c)<=0x10ffff) { \ - if((i)+1<(length)) { \ - (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ - (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - } else /* not enough space */ { \ - (s)[(i)++]=UTF_ERROR_VALUE; \ - } \ - } else /* c>0x10ffff, write error value */ { \ - (s)[(i)++]=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ -#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ -#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ -#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ -#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[--(i)]; \ - if(UTF_IS_SECOND_SURROGATE(c)) { \ - (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF16_BACK_1_UNSAFE(s, i) { \ - if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ - --(i); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF16_BACK_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF16_BACK_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ - if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ - ++(i); \ - } \ -} - -/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ -#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ - (c)=(s)[--(i)]; \ - if(UTF_IS_SECOND_SURROGATE(c)) { \ - uint16_t __c2; \ - if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ - --(i); \ - (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ - /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ - } else if(strict) {\ - /* unmatched second surrogate */ \ - (c)=UTF_ERROR_VALUE; \ - } \ - } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ - /* unmatched first surrogate or other non-character */ \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ -#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ -#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ -#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) - -/* Formerly utf32.h --------------------------------------------------------- */ - -/* -* Old documentation: -* -* This file defines macros to deal with UTF-32 code units and code points. -* Signatures and semantics are the same as for the similarly named macros -* in utf16.h. -* utf32.h is included by utf.h after unicode/umachine.h

-* and some common definitions. -*

Usage: ICU coding guidelines for if() statements should be followed when using these macros. -* Compound statements (curly braces {}) must be used for if-else-while... -* bodies and all macro statements should be terminated with semicolon.

-*/ - -/* internal definitions ----------------------------------------------------- */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_SAFE(c, strict) \ - (!(strict) ? \ - (uint32_t)(c)<=0x10ffff : \ - UTF_IS_UNICODE_CHAR(c)) - -/* - * For the semantics of all of these macros, see utf16.h. - * The UTF-32 versions are trivial because any code point is - * encoded using exactly one code unit. - */ - -/* single-code point definitions -------------------------------------------- */ - -/* classes of code unit values */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_SINGLE(uchar) 1 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_LEAD(uchar) 0 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_TRAIL(uchar) 0 - -/* number of code units per code point */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEED_MULTIPLE_UCHAR(c) 0 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_CHAR_LENGTH(c) 1 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_MAX_CHAR_LENGTH 1 - -/* average number of code units compared to UTF-16 */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_ARRAY_SIZE(size) (size) - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[i]; \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ - (c)=(s)[i]; \ - if(!UTF32_IS_SAFE(c, strict)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/* definitions with forward iteration --------------------------------------- */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[(i)++]; \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ - (s)[(i)++]=(c); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_1_UNSAFE(s, i) { \ - ++(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_N_UNSAFE(s, i, n) { \ - (i)+=(n); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ - (c)=(s)[(i)++]; \ - if(!UTF32_IS_SAFE(c, strict)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ - if((uint32_t)(c)<=0x10ffff) { \ - (s)[(i)++]=(c); \ - } else /* c>0x10ffff, write 0xfffd */ { \ - (s)[(i)++]=0xfffd; \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_1_SAFE(s, i, length) { \ - ++(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_N_SAFE(s, i, length, n) { \ - if(((i)+=(n))>(length)) { \ - (i)=(length); \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ -} - -/* definitions with backward iteration -------------------------------------- */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[--(i)]; \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_1_UNSAFE(s, i) { \ - --(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_N_UNSAFE(s, i, n) { \ - (i)-=(n); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ - (c)=(s)[--(i)]; \ - if(!UTF32_IS_SAFE(c, strict)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_1_SAFE(s, start, i) { \ - --(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_N_SAFE(s, start, i, n) { \ - (i)-=(n); \ - if((i)<(start)) { \ - (i)=(start); \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ -} - -/* Formerly utf.h, part 2 --------------------------------------------------- */ - -/** - * Estimate the number of code units for a string based on the number of UTF-16 code units. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) - -/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */ -#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ -#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) - - -/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ -#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ -#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) - - -/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ -#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ -#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) - - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ -#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) - - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ -#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) - - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ -#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) - - -/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ -#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ -#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) - - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ -#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) - - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ -#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) - - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ -#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) - -/* Define default macros (UTF-16 "safe") ------------------------------------ */ - -/** - * Does this code unit alone encode a code point (BMP, not a surrogate)? - * Same as UTF16_IS_SINGLE. - * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h. - */ -#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) - -/** - * Is this code unit the first one of several (a lead surrogate)? - * Same as UTF16_IS_LEAD. - * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. - */ -#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) - -/** - * Is this code unit one of several but not the first one (a trail surrogate)? - * Same as UTF16_IS_TRAIL. - * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. - */ -#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) - -/** - * Does this code point require multiple code units (is it a supplementary code point)? - * Same as UTF16_NEED_MULTIPLE_UCHAR. - * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead. - */ -#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) - -/** - * How many code units are used to encode this code point (1 or 2)? - * Same as UTF16_CHAR_LENGTH. - * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. - */ -#define UTF_CHAR_LENGTH(c) U16_LENGTH(c) - -/** - * How many code units are used at most for any Unicode code point (2)? - * Same as UTF16_MAX_CHAR_LENGTH. - * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. - */ -#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH - -/** - * Set c to the code point that contains the code unit i. - * i could point to the lead or the trail surrogate for the code point. - * i is not modified. - * Same as UTF16_GET_CHAR. - * \pre 0<=i -#include "unicode/utypes.h" - -/** - * \file - * \brief C API: Definitions for ICU tracing/logging. - * - * This provides API for debugging the internals of ICU without the use of - * a traditional debugger. - * - * By default, tracing is disabled in ICU. If you need to debug ICU with - * tracing, please compile ICU with the --enable-tracing configure option. - */ - -U_CDECL_BEGIN - -/** - * Trace severity levels. Higher levels increase the verbosity of the trace output. - * @see utrace_setLevel - * @stable ICU 2.8 - */ -typedef enum UTraceLevel { - /** Disable all tracing @stable ICU 2.8*/ - UTRACE_OFF=-1, - /** Trace error conditions only @stable ICU 2.8*/ - UTRACE_ERROR=0, - /** Trace errors and warnings @stable ICU 2.8*/ - UTRACE_WARNING=3, - /** Trace opens and closes of ICU services @stable ICU 2.8*/ - UTRACE_OPEN_CLOSE=5, - /** Trace an intermediate number of ICU operations @stable ICU 2.8*/ - UTRACE_INFO=7, - /** Trace the maximum number of ICU operations @stable ICU 2.8*/ - UTRACE_VERBOSE=9 -} UTraceLevel; - -/** - * These are the ICU functions that will be traced when tracing is enabled. - * @stable ICU 2.8 - */ -typedef enum UTraceFunctionNumber { - UTRACE_FUNCTION_START=0, - UTRACE_U_INIT=UTRACE_FUNCTION_START, - UTRACE_U_CLEANUP, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal collation trace location. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UTRACE_FUNCTION_LIMIT, -#endif // U_HIDE_DEPRECATED_API - - UTRACE_CONVERSION_START=0x1000, - UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START, - UTRACE_UCNV_OPEN_PACKAGE, - UTRACE_UCNV_OPEN_ALGORITHMIC, - UTRACE_UCNV_CLONE, - UTRACE_UCNV_CLOSE, - UTRACE_UCNV_FLUSH_CACHE, - UTRACE_UCNV_LOAD, - UTRACE_UCNV_UNLOAD, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal collation trace location. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UTRACE_CONVERSION_LIMIT, -#endif // U_HIDE_DEPRECATED_API - - UTRACE_COLLATION_START=0x2000, - UTRACE_UCOL_OPEN=UTRACE_COLLATION_START, - UTRACE_UCOL_CLOSE, - UTRACE_UCOL_STRCOLL, - UTRACE_UCOL_GET_SORTKEY, - UTRACE_UCOL_GETLOCALE, - UTRACE_UCOL_NEXTSORTKEYPART, - UTRACE_UCOL_STRCOLLITER, - UTRACE_UCOL_OPEN_FROM_SHORT_STRING, - UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal collation trace location. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - UTRACE_COLLATION_LIMIT -#endif // U_HIDE_DEPRECATED_API -} UTraceFunctionNumber; - -/** - * Setter for the trace level. - * @param traceLevel A UTraceLevel value. - * @stable ICU 2.8 - */ -U_STABLE void U_EXPORT2 -utrace_setLevel(int32_t traceLevel); - -/** - * Getter for the trace level. - * @return The UTraceLevel value being used by ICU. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -utrace_getLevel(void); - -/* Trace function pointers types ----------------------------- */ - -/** - * Type signature for the trace function to be called when entering a function. - * @param context value supplied at the time the trace functions are set. - * @param fnNumber Enum value indicating the ICU function being entered. - * @stable ICU 2.8 - */ -typedef void U_CALLCONV -UTraceEntry(const void *context, int32_t fnNumber); - -/** - * Type signature for the trace function to be called when exiting from a function. - * @param context value supplied at the time the trace functions are set. - * @param fnNumber Enum value indicating the ICU function being exited. - * @param fmt A formatting string that describes the number and types - * of arguments included with the variable args. The fmt - * string has the same form as the utrace_vformat format - * string. - * @param args A variable arguments list. Contents are described by - * the fmt parameter. - * @see utrace_vformat - * @stable ICU 2.8 - */ -typedef void U_CALLCONV -UTraceExit(const void *context, int32_t fnNumber, - const char *fmt, va_list args); - -/** - * Type signature for the trace function to be called from within an ICU function - * to display data or messages. - * @param context value supplied at the time the trace functions are set. - * @param fnNumber Enum value indicating the ICU function being exited. - * @param level The current tracing level - * @param fmt A format string describing the tracing data that is supplied - * as variable args - * @param args The data being traced, passed as variable args. - * @stable ICU 2.8 - */ -typedef void U_CALLCONV -UTraceData(const void *context, int32_t fnNumber, int32_t level, - const char *fmt, va_list args); - -/** - * Set ICU Tracing functions. Installs application-provided tracing - * functions into ICU. After doing this, subsequent ICU operations - * will call back to the installed functions, providing a trace - * of the use of ICU. Passing a NULL pointer for a tracing function - * is allowed, and inhibits tracing action at points where that function - * would be called. - *

- * Tracing and Threads: Tracing functions are global to a process, and - * will be called in response to ICU operations performed by any - * thread. If tracing of an individual thread is desired, the - * tracing functions must themselves filter by checking that the - * current thread is the desired thread. - * - * @param context an uninterpreted pointer. Whatever is passed in - * here will in turn be passed to each of the tracing - * functions UTraceEntry, UTraceExit and UTraceData. - * ICU does not use or alter this pointer. - * @param e Callback function to be called on entry to a - * a traced ICU function. - * @param x Callback function to be called on exit from a - * traced ICU function. - * @param d Callback function to be called from within a - * traced ICU function, for the purpose of providing - * data to the trace. - * - * @stable ICU 2.8 - */ -U_STABLE void U_EXPORT2 -utrace_setFunctions(const void *context, - UTraceEntry *e, UTraceExit *x, UTraceData *d); - -/** - * Get the currently installed ICU tracing functions. Note that a null function - * pointer will be returned if no trace function has been set. - * - * @param context The currently installed tracing context. - * @param e The currently installed UTraceEntry function. - * @param x The currently installed UTraceExit function. - * @param d The currently installed UTraceData function. - * @stable ICU 2.8 - */ -U_STABLE void U_EXPORT2 -utrace_getFunctions(const void **context, - UTraceEntry **e, UTraceExit **x, UTraceData **d); - - - -/* - * - * ICU trace format string syntax - * - * Format Strings are passed to UTraceData functions, and define the - * number and types of the trace data being passed on each call. - * - * The UTraceData function, which is supplied by the application, - * not by ICU, can either forward the trace data (passed via - * varargs) and the format string back to ICU for formatting into - * a displayable string, or it can interpret the format itself, - * and do as it wishes with the trace data. - * - * - * Goals for the format string - * - basic data output - * - easy to use for trace programmer - * - sufficient provision for data types for trace output readability - * - well-defined types and binary portable APIs - * - * Non-goals - * - printf compatibility - * - fancy formatting - * - argument reordering and other internationalization features - * - * ICU trace format strings contain plain text with argument inserts, - * much like standard printf format strings. - * Each insert begins with a '%', then optionally contains a 'v', - * then exactly one type character. - * Two '%' in a row represent a '%' instead of an insert. - * The trace format strings need not have \n at the end. - * - * - * Types - * ----- - * - * Type characters: - * - c A char character in the default codepage. - * - s A NUL-terminated char * string in the default codepage. - * - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term. - * - b A byte (8-bit integer). - * - h A 16-bit integer. Also a 16 bit Unicode code unit. - * - d A 32-bit integer. Also a 20 bit Unicode code point value. - * - l A 64-bit integer. - * - p A data pointer. - * - * Vectors - * ------- - * - * If the 'v' is not specified, then one item of the specified type - * is passed in. - * If the 'v' (for "vector") is specified, then a vector of items of the - * specified type is passed in, via a pointer to the first item - * and an int32_t value for the length of the vector. - * Length==-1 means zero or NUL termination. Works for vectors of all types. - * - * Note: %vS is a vector of (UChar *) strings. The strings must - * be nul terminated as there is no way to provide a - * separate length parameter for each string. The length - * parameter (required for all vectors) is the number of - * strings, not the length of the strings. - * - * Examples - * -------- - * - * These examples show the parameters that will be passed to an application's - * UTraceData() function for various formats. - * - * - the precise formatting is up to the application! - * - the examples use type casts for arguments only to _show_ the types of - * arguments without needing variable declarations in the examples; - * the type casts will not be necessary in actual code - * - * UTraceDataFunc(context, fnNumber, level, - * "There is a character %c in the string %s.", // Format String - * (char)c, (const char *)s); // varargs parameters - * -> There is a character 0x42 'B' in the string "Bravo". - * - * UTraceDataFunc(context, fnNumber, level, - * "Vector of bytes %vb vector of chars %vc", - * (const uint8_t *)bytes, (int32_t)bytesLength, - * (const char *)chars, (int32_t)charsLength); - * -> Vector of bytes - * 42 63 64 3f [4] - * vector of chars - * "Bcd?"[4] - * - * UTraceDataFunc(context, fnNumber, level, - * "An int32_t %d and a whole bunch of them %vd", - * (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength); - * -> An int32_t 0xfffffffb and a whole bunch of them - * fffffffb 00000005 0000010a [3] - * - */ - - - -/** - * Trace output Formatter. An application's UTraceData tracing functions may call - * back to this function to format the trace output in a - * human readable form. Note that a UTraceData function may choose - * to not format the data; it could, for example, save it in - * in the raw form it was received (more compact), leaving - * formatting for a later trace analysis tool. - * @param outBuf pointer to a buffer to receive the formatted output. Output - * will be nul terminated if there is space in the buffer - - * if the length of the requested output < the output buffer size. - * @param capacity Length of the output buffer. - * @param indent Number of spaces to indent the output. Intended to allow - * data displayed from nested functions to be indented for readability. - * @param fmt Format specification for the data to output - * @param args Data to be formatted. - * @return Length of formatted output, including the terminating NUL. - * If buffer capacity is insufficient, the required capacity is returned. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -utrace_vformat(char *outBuf, int32_t capacity, - int32_t indent, const char *fmt, va_list args); - -/** - * Trace output Formatter. An application's UTraceData tracing functions may call - * this function to format any additional trace data, beyond that - * provided by default, in human readable form with the same - * formatting conventions used by utrace_vformat(). - * @param outBuf pointer to a buffer to receive the formatted output. Output - * will be nul terminated if there is space in the buffer - - * if the length of the requested output < the output buffer size. - * @param capacity Length of the output buffer. - * @param indent Number of spaces to indent the output. Intended to allow - * data displayed from nested functions to be indented for readability. - * @param fmt Format specification for the data to output - * @param ... Data to be formatted. - * @return Length of formatted output, including the terminating NUL. - * If buffer capacity is insufficient, the required capacity is returned. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -utrace_format(char *outBuf, int32_t capacity, - int32_t indent, const char *fmt, ...); - - - -/* Trace function numbers --------------------------------------------------- */ - -/** - * Get the name of a function from its trace function number. - * - * @param fnNumber The trace number for an ICU function. - * @return The name string for the function. - * - * @see UTraceFunctionNumber - * @stable ICU 2.8 - */ -U_STABLE const char * U_EXPORT2 -utrace_functionName(int32_t fnNumber); - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/utypes.h b/deps/node/deps/icu-small/source/common/unicode/utypes.h deleted file mode 100644 index f1814e06..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/utypes.h +++ /dev/null @@ -1,712 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1996-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* FILE NAME : UTYPES.H (formerly ptypes.h) -* -* Date Name Description -* 12/11/96 helena Creation. -* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32, -* uint8, uint16, and uint32. -* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as -* well as C++. -* Modified to use memcpy() for uprv_arrayCopy() fns. -* 04/14/97 aliu Added TPlatformUtilities. -* 05/07/97 aliu Added import/export specifiers (replacing the old -* broken EXT_CLASS). Added version number for our -* code. Cleaned up header. -* 6/20/97 helena Java class name change. -* 08/11/98 stephen UErrorCode changed from typedef to enum -* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3 -* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t -* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066) -* 04/20/99 stephen Cleaned up & reworked for autoconf. -* Renamed to utypes.h. -* 05/05/99 stephen Changed to use -* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here. -******************************************************************************* -*/ - -#ifndef UTYPES_H -#define UTYPES_H - - -#include "unicode/umachine.h" -#include "unicode/uversion.h" -#include "unicode/uconfig.h" -#include - -#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS -# include "unicode/utf.h" -#endif - -/*! - * \file - * \brief Basic definitions for ICU, for both C and C++ APIs - * - * This file defines basic types, constants, and enumerations directly or - * indirectly by including other header files, especially utf.h for the - * basic character and string definitions and umachine.h for consistent - * integer and other types. - */ - - -/** - * \def U_SHOW_CPLUSPLUS_API - * @internal - */ -#ifdef __cplusplus -# ifndef U_SHOW_CPLUSPLUS_API -# define U_SHOW_CPLUSPLUS_API 1 -# endif -#else -# undef U_SHOW_CPLUSPLUS_API -# define U_SHOW_CPLUSPLUS_API 0 -#endif - -/** @{ API visibility control */ - -/** - * \def U_HIDE_DRAFT_API - * Define this to 1 to request that draft API be "hidden" - * @internal - */ -/** - * \def U_HIDE_INTERNAL_API - * Define this to 1 to request that internal API be "hidden" - * @internal - */ -#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API) -#define U_HIDE_DRAFT_API 1 -#endif -#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API) -#define U_HIDE_INTERNAL_API 1 -#endif - -/** @} */ - -/*===========================================================================*/ -/* ICUDATA naming scheme */ -/*===========================================================================*/ - -/** - * \def U_ICUDATA_TYPE_LETTER - * - * This is a platform-dependent string containing one letter: - * - b for big-endian, ASCII-family platforms - * - l for little-endian, ASCII-family platforms - * - e for big-endian, EBCDIC-family platforms - * This letter is part of the common data file name. - * @stable ICU 2.0 - */ - -/** - * \def U_ICUDATA_TYPE_LITLETTER - * The non-string form of U_ICUDATA_TYPE_LETTER - * @stable ICU 2.0 - */ -#if U_CHARSET_FAMILY -# if U_IS_BIG_ENDIAN - /* EBCDIC - should always be BE */ -# define U_ICUDATA_TYPE_LETTER "e" -# define U_ICUDATA_TYPE_LITLETTER e -# else -# error "Don't know what to do with little endian EBCDIC!" -# define U_ICUDATA_TYPE_LETTER "x" -# define U_ICUDATA_TYPE_LITLETTER x -# endif -#else -# if U_IS_BIG_ENDIAN - /* Big-endian ASCII */ -# define U_ICUDATA_TYPE_LETTER "b" -# define U_ICUDATA_TYPE_LITLETTER b -# else - /* Little-endian ASCII */ -# define U_ICUDATA_TYPE_LETTER "l" -# define U_ICUDATA_TYPE_LITLETTER l -# endif -#endif - -/** - * A single string literal containing the icudata stub name. i.e. 'icudt18e' for - * ICU 1.8.x on EBCDIC, etc.. - * @stable ICU 2.0 - */ -#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER -#ifndef U_HIDE_INTERNAL_API -#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */ -#define U_USE_USRDATA 0 /**< @internal */ -#endif /* U_HIDE_INTERNAL_API */ - -/** - * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. - * Defined as a literal, not a string. - * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string - * from the corresponding macro invocation, _before_ other macro substitutions. - * Need a nested \#defines to get the actual version numbers rather than - * the literal text U_ICU_VERSION_MAJOR_NUM into the name. - * The net result will be something of the form - * \#define U_ICU_ENTRY_POINT icudt19_dat - * @stable ICU 2.4 - */ -#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME) - -#ifndef U_HIDE_INTERNAL_API -/** - * Do not use. Note that it's OK for the 2nd argument to be undefined (literal). - * @internal - */ -#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff) - -/** - * Do not use. - * @internal - */ -#ifndef U_DEF_ICUDATA_ENTRY_POINT -/* affected by symbol renaming. See platform.h */ -#ifndef U_LIB_SUFFIX_C_NAME -#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat -#else -#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat -#endif -#endif -#endif /* U_HIDE_INTERNAL_API */ - -/** - * \def NULL - * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C. - * @stable ICU 2.0 - */ -#ifndef NULL -#ifdef __cplusplus -#define NULL nullptr -#else -#define NULL ((void *)0) -#endif -#endif - -/*===========================================================================*/ -/* Calendar/TimeZone data types */ -/*===========================================================================*/ - -/** - * Date and Time data type. - * This is a primitive data type that holds the date and time - * as the number of milliseconds since 1970-jan-01, 00:00 UTC. - * UTC leap seconds are ignored. - * @stable ICU 2.0 - */ -typedef double UDate; - -/** The number of milliseconds per second @stable ICU 2.0 */ -#define U_MILLIS_PER_SECOND (1000) -/** The number of milliseconds per minute @stable ICU 2.0 */ -#define U_MILLIS_PER_MINUTE (60000) -/** The number of milliseconds per hour @stable ICU 2.0 */ -#define U_MILLIS_PER_HOUR (3600000) -/** The number of milliseconds per day @stable ICU 2.0 */ -#define U_MILLIS_PER_DAY (86400000) - -/** - * Maximum UDate value - * @stable ICU 4.8 - */ -#define U_DATE_MAX DBL_MAX - -/** - * Minimum UDate value - * @stable ICU 4.8 - */ -#define U_DATE_MIN -U_DATE_MAX - -/*===========================================================================*/ -/* Shared library/DLL import-export API control */ -/*===========================================================================*/ - -/* - * Control of symbol import/export. - * ICU is separated into three libraries. - */ - -/** - * \def U_COMBINED_IMPLEMENTATION - * Set to export library symbols from inside the ICU library - * when all of ICU is in a single library. - * This can be set as a compiler option while building ICU, and it - * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc. - * @stable ICU 2.0 - */ - -/** - * \def U_DATA_API - * Set to export library symbols from inside the stubdata library, - * and to import them from outside. - * @stable ICU 3.0 - */ - -/** - * \def U_COMMON_API - * Set to export library symbols from inside the common library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_I18N_API - * Set to export library symbols from inside the i18n library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_LAYOUT_API - * Set to export library symbols from inside the layout engine library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_LAYOUTEX_API - * Set to export library symbols from inside the layout extensions library, - * and to import them from outside. - * @stable ICU 2.6 - */ - -/** - * \def U_IO_API - * Set to export library symbols from inside the ustdio library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_TOOLUTIL_API - * Set to export library symbols from inside the toolutil library, - * and to import them from outside. - * @stable ICU 3.4 - */ - -#ifdef U_IN_DOXYGEN -// This definition is required when generating the API docs. -#define U_COMBINED_IMPLEMENTATION 1 -#endif - -#if defined(U_COMBINED_IMPLEMENTATION) -#define U_DATA_API U_EXPORT -#define U_COMMON_API U_EXPORT -#define U_I18N_API U_EXPORT -#define U_LAYOUT_API U_EXPORT -#define U_LAYOUTEX_API U_EXPORT -#define U_IO_API U_EXPORT -#define U_TOOLUTIL_API U_EXPORT -#elif defined(U_STATIC_IMPLEMENTATION) -#define U_DATA_API -#define U_COMMON_API -#define U_I18N_API -#define U_LAYOUT_API -#define U_LAYOUTEX_API -#define U_IO_API -#define U_TOOLUTIL_API -#elif defined(U_COMMON_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_EXPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_I18N_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_EXPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_LAYOUT_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_EXPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_LAYOUTEX_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_EXPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_IO_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_EXPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_TOOLUTIL_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_EXPORT -#else -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#endif - -/** - * \def U_STANDARD_CPP_NAMESPACE - * Control of C++ Namespace - * @stable ICU 2.0 - */ -#ifdef __cplusplus -#define U_STANDARD_CPP_NAMESPACE :: -#else -#define U_STANDARD_CPP_NAMESPACE -#endif - -/*===========================================================================*/ -/* UErrorCode */ -/*===========================================================================*/ - -/** - * Error code to replace exception handling, so that the code is compatible with all C++ compilers, - * and to use the same mechanism for C and C++. - * - * \par - * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode - * first test if(U_FAILURE(errorCode)) { return immediately; } - * so that in a chain of such functions the first one that sets an error code - * causes the following ones to not perform any operations. - * - * \par - * Error codes should be tested using U_FAILURE() and U_SUCCESS(). - * @stable ICU 2.0 - */ -typedef enum UErrorCode { - /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird - * and is that way because VC++ debugger displays first encountered constant, - * which is not the what the code is used for - */ - - U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */ - - U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */ - - U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */ - - U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */ - - U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */ - - U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */ - - U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */ - - U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */ - - U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */ - - U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */ - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal UErrorCode warning value. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_ERROR_WARNING_LIMIT, -#endif // U_HIDE_DEPRECATED_API - - U_ZERO_ERROR = 0, /**< No error, no warning. */ - - U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */ - U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */ - U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */ - U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */ - U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */ - U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */ - U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */ - U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */ - U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */ - U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */ - U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */ - U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */ - U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */ - U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */ - U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ - U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ - U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ - U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */ - U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ - U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ - U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ - U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ - U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ - U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. - It is very possible that a circular alias definition has occurred */ - U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ - U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ - U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ - U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */ - U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */ - U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */ - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest standard error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_STANDARD_ERROR_LIMIT, -#endif // U_HIDE_DEPRECATED_API - - /* - * Error codes in the range 0x10000 0x10100 are reserved for Transliterator. - */ - U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */ - U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */ - U_MALFORMED_RULE, /**< Elements of a rule are misplaced */ - U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/ - U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */ - U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/ - U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */ - U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */ - U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */ - U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */ - U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */ - U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */ - U_MISSING_OPERATOR, /**< A rule contains no operator */ - U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */ - U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */ - U_MULTIPLE_CURSORS, /**< More than one cursor */ - U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */ - U_TRAILING_BACKSLASH, /**< A dangling backslash */ - U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */ - U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */ - U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */ - U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */ - U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */ - U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */ - U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ - U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ - U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ - U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */ - U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ - U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ - U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ - U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */ - U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */ - U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */ - U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */ - U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal Transliterator error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_PARSE_ERROR_LIMIT, -#endif // U_HIDE_DEPRECATED_API - - /* - * Error codes in the range 0x10100 0x10200 are reserved for the formatting API. - */ - U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */ - U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */ - U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */ - U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */ - U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */ - U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */ - U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */ - U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */ - U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */ - U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */ - U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */ - U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */ - U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */ - U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */ - U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */ - U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */ - U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */ - U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ - U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ - U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ -#ifndef U_HIDE_DRAFT_API - U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */ -#endif // U_HIDE_DRAFT_API -#ifndef U_HIDE_DRAFT_API - U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @draft ICU 62 */ -#endif // U_HIDE_DRAFT_API -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal formatting API error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_FMT_PARSE_ERROR_LIMIT = 0x10114, -#endif // U_HIDE_DEPRECATED_API - - /* - * Error codes in the range 0x10200 0x102ff are reserved for BreakIterator. - */ - U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */ - U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */ - U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ - U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ - U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ - U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */ - U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ - U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ - U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ - U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */ - U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */ - U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ - U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ - U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ - U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal BreakIterator error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_BRK_ERROR_LIMIT, -#endif // U_HIDE_DEPRECATED_API - - /* - * Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors. - */ - U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */ - U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */ - U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */ - U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */ - U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */ - U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */ - U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */ - U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */ - U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */ - U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */ - U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */ - U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */ - U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */ - U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */ - U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/ -#ifndef U_HIDE_DEPRECATED_API - U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */ -#endif /* U_HIDE_DEPRECATED_API */ - U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */ - U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */ - U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */ - U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */ - U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */ - U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */ - U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal regular expression error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3, -#endif // U_HIDE_DEPRECATED_API - - /* - * Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes. - */ - U_IDNA_PROHIBITED_ERROR=0x10400, - U_IDNA_ERROR_START=0x10400, - U_IDNA_UNASSIGNED_ERROR, - U_IDNA_CHECK_BIDI_ERROR, - U_IDNA_STD3_ASCII_RULES_ERROR, - U_IDNA_ACE_PREFIX_ERROR, - U_IDNA_VERIFICATION_ERROR, - U_IDNA_LABEL_TOO_LONG_ERROR, - U_IDNA_ZERO_LENGTH_LABEL_ERROR, - U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR, -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal IDNA error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_IDNA_ERROR_LIMIT, -#endif // U_HIDE_DEPRECATED_API - /* - * Aliases for StringPrep - */ - U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR, - U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR, - U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR, - - /* - * Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes. - */ - U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */ - U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */ - U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */ -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal plug-in error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_PLUGIN_ERROR_LIMIT, -#endif // U_HIDE_DEPRECATED_API - -#ifndef U_HIDE_DEPRECATED_API - /** - * One more than the highest normal error code. - * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. - */ - U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT -#endif // U_HIDE_DEPRECATED_API -} UErrorCode; - -/* Use the following to determine if an UErrorCode represents */ -/* operational success or failure. */ - -#ifdef __cplusplus - /** - * Does the error code indicate success? - * @stable ICU 2.0 - */ - static - inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); } - /** - * Does the error code indicate a failure? - * @stable ICU 2.0 - */ - static - inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); } -#else - /** - * Does the error code indicate success? - * @stable ICU 2.0 - */ -# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR) - /** - * Does the error code indicate a failure? - * @stable ICU 2.0 - */ -# define U_FAILURE(x) ((x)>U_ZERO_ERROR) -#endif - -/** - * Return a string for a UErrorCode value. - * The string will be the same as the name of the error code constant - * in the UErrorCode enum above. - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -u_errorName(UErrorCode code); - - -#endif /* _UTYPES */ diff --git a/deps/node/deps/icu-small/source/common/unicode/uvernum.h b/deps/node/deps/icu-small/source/common/unicode/uvernum.h deleted file mode 100644 index 83d0b4ec..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uvernum.h +++ /dev/null @@ -1,180 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* -* file name: uvernum.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* Created by: Vladimir Weinstein -* Updated by: Steven R. Loomis -* -*/ - -/** - * \file - * \brief C API: definitions of ICU version numbers - * - * This file is included by uversion.h and other files. This file contains only - * macros and definitions. The actual version numbers are defined here. - */ - - /* - * IMPORTANT: When updating version, the following things need to be done: - * source/common/unicode/uvernum.h - this file: update major, minor, - * patchlevel, suffix, version, short version constants, namespace, - * renaming macro, and copyright - * - * The following files need to be updated as well, which can be done - * by running the UNIX makefile target 'update-windows-makefiles' in icu/source. - * - * - * source/common/common_uwp.vcxproj - * source/common/common.vcxproj - update 'Output file name' on the link tab so - * that it contains the new major/minor combination - * source/i18n/i18n.vcxproj - same as for the common.vcxproj - * source/i18n/i18n_uwp.vcxproj - same as for the common_uwp.vcxproj - * source/layoutex/layoutex.vcproj - same - * source/stubdata/stubdata.vcproj - same as for the common.vcxproj - * source/io/io.vcproj - same as for the common.vcxproj - * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains - * the new major/minor combination and the Unicode version. - */ - -#ifndef UVERNUM_H -#define UVERNUM_H - -/** The standard copyright notice that gets compiled into each library. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_COPYRIGHT_STRING \ - " Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html " - -/** The current ICU major version as an integer. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_ICU_VERSION_MAJOR_NUM 63 - -/** The current ICU minor version as an integer. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 - */ -#define U_ICU_VERSION_MINOR_NUM 1 - -/** The current ICU patchlevel version as an integer. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_ICU_VERSION_PATCHLEVEL_NUM 0 - -/** The current ICU build level version as an integer. - * This value is for use by ICU clients. It defaults to 0. - * @stable ICU 4.0 - */ -#ifndef U_ICU_VERSION_BUILDLEVEL_NUM -#define U_ICU_VERSION_BUILDLEVEL_NUM 0 -#endif - -/** Glued version suffix for renamers - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 - */ -#define U_ICU_VERSION_SUFFIX _63 - -/** - * \def U_DEF2_ICU_ENTRY_POINT_RENAME - * @internal - */ -/** - * \def U_DEF_ICU_ENTRY_POINT_RENAME - * @internal - */ -/** Glued version suffix function for renamers - * This value will change in the subsequent releases of ICU. - * If a custom suffix (such as matching library suffixes) is desired, this can be modified. - * Note that if present, platform.h may contain an earlier definition of this macro. - * \def U_ICU_ENTRY_POINT_RENAME - * @stable ICU 4.2 - */ - -#ifndef U_ICU_ENTRY_POINT_RENAME -#ifdef U_HAVE_LIB_SUFFIX -#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z -#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) -#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME) -#else -#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y -#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y) -#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX) -#endif -#endif - -/** The current ICU library version as a dotted-decimal string. The patchlevel - * only appears in this string if it non-zero. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_ICU_VERSION "63.1" - -/** - * The current ICU library major version number as a string, for library name suffixes. - * This value will change in subsequent releases of ICU. - * - * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers - * into one string without dots ("48"). - * Since ICU 49, it is the double-digit major ICU version number. - * See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU - * - * @stable ICU 2.6 - */ -#define U_ICU_VERSION_SHORT "63" - -#ifndef U_HIDE_INTERNAL_API -/** Data version in ICU4C. - * @internal ICU 4.4 Internal Use Only - **/ -#define U_ICU_DATA_VERSION "63.1" -#endif /* U_HIDE_INTERNAL_API */ - -/*=========================================================================== - * ICU collation framework version information - * Version info that can be obtained from a collator is affected by these - * numbers in a secret and magic way. Please use collator version as whole - *=========================================================================== - */ - -/** - * Collation runtime version (sort key generator, strcoll). - * If the version is different, sort keys for the same string could be different. - * This value may change in subsequent releases of ICU. - * @stable ICU 2.4 - */ -#define UCOL_RUNTIME_VERSION 9 - -/** - * Collation builder code version. - * When this is different, the same tailoring might result - * in assigning different collation elements to code points. - * This value may change in subsequent releases of ICU. - * @stable ICU 2.4 - */ -#define UCOL_BUILDER_VERSION 9 - -#ifndef U_HIDE_DEPRECATED_API -/** - * Constant 1. - * This was intended to be the version of collation tailorings, - * but instead the tailoring data carries a version number. - * @deprecated ICU 54 - */ -#define UCOL_TAILORINGS_VERSION 1 -#endif /* U_HIDE_DEPRECATED_API */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/unicode/uversion.h b/deps/node/deps/icu-small/source/common/unicode/uversion.h deleted file mode 100644 index 3f0251d3..00000000 --- a/deps/node/deps/icu-small/source/common/unicode/uversion.h +++ /dev/null @@ -1,201 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2000-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* -* file name: uversion.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* Created by: Vladimir Weinstein -* -* Gets included by utypes.h and Windows .rc files -*/ - -/** - * \file - * \brief C API: API for accessing ICU version numbers. - */ -/*===========================================================================*/ -/* Main ICU version information */ -/*===========================================================================*/ - -#ifndef UVERSION_H -#define UVERSION_H - -#include "unicode/umachine.h" - -/* Actual version info lives in uvernum.h */ -#include "unicode/uvernum.h" - -/** Maximum length of the copyright string. - * @stable ICU 2.4 - */ -#define U_COPYRIGHT_STRING_LENGTH 128 - -/** An ICU version consists of up to 4 numbers from 0..255. - * @stable ICU 2.4 - */ -#define U_MAX_VERSION_LENGTH 4 - -/** In a string, ICU version fields are delimited by dots. - * @stable ICU 2.4 - */ -#define U_VERSION_DELIMITER '.' - -/** The maximum length of an ICU version string. - * @stable ICU 2.4 - */ -#define U_MAX_VERSION_STRING_LENGTH 20 - -/** The binary form of a version on ICU APIs is an array of 4 uint8_t. - * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)). - * @stable ICU 2.4 - */ -typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; - -/*===========================================================================*/ -/* C++ namespace if supported. Versioned unless versioning is disabled. */ -/*===========================================================================*/ - -/** - * \def U_NAMESPACE_BEGIN - * This is used to begin a declaration of a public ICU C++ API. - * When not compiling for C++, it does nothing. - * When compiling for C++, it begins an extern "C++" linkage block (to protect - * against cases in which an external client includes ICU header files inside - * an extern "C" linkage block). - * - * It also begins a versioned-ICU-namespace block. - * @stable ICU 2.4 - */ - -/** - * \def U_NAMESPACE_END - * This is used to end a declaration of a public ICU C++ API. - * When not compiling for C++, it does nothing. - * When compiling for C++, it ends the extern "C++" block begun by - * U_NAMESPACE_BEGIN. - * - * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN. - * @stable ICU 2.4 - */ - -/** - * \def U_NAMESPACE_USE - * This is used to specify that the rest of the code uses the - * public ICU C++ API namespace. - * This is invoked by default; we recommend that you turn it off: - * See the "Recommended Build Options" section of the ICU4C readme - * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild) - * @stable ICU 2.4 - */ - -/** - * \def U_NAMESPACE_QUALIFIER - * This is used to qualify that a function or class is part of - * the public ICU C++ API namespace. - * - * This macro is unnecessary since ICU 49 requires namespace support. - * You can just use "icu::" instead. - * @stable ICU 2.4 - */ - -/* Define C++ namespace symbols. */ -#ifdef __cplusplus -# if U_DISABLE_RENAMING -# define U_ICU_NAMESPACE icu - namespace U_ICU_NAMESPACE { } -# else -# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu) - namespace U_ICU_NAMESPACE { } - namespace icu = U_ICU_NAMESPACE; -# endif - -# define U_NAMESPACE_BEGIN extern "C++" { namespace U_ICU_NAMESPACE { -# define U_NAMESPACE_END } } -# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE; -# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: - -# ifndef U_USING_ICU_NAMESPACE -# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ - defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ - defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) -# define U_USING_ICU_NAMESPACE 0 -# else -# define U_USING_ICU_NAMESPACE 0 -# endif -# endif -# if U_USING_ICU_NAMESPACE - U_NAMESPACE_USE -# endif -#else -# define U_NAMESPACE_BEGIN -# define U_NAMESPACE_END -# define U_NAMESPACE_USE -# define U_NAMESPACE_QUALIFIER -#endif - -/*===========================================================================*/ -/* General version helper functions. Definitions in putil.c */ -/*===========================================================================*/ - -/** - * Parse a string with dotted-decimal version information and - * fill in a UVersionInfo structure with the result. - * Definition of this function lives in putil.c - * - * @param versionArray The destination structure for the version information. - * @param versionString A string with dotted-decimal version information, - * with up to four non-negative number fields with - * values of up to 255 each. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -u_versionFromString(UVersionInfo versionArray, const char *versionString); - -/** - * Parse a Unicode string with dotted-decimal version information and - * fill in a UVersionInfo structure with the result. - * Definition of this function lives in putil.c - * - * @param versionArray The destination structure for the version information. - * @param versionString A Unicode string with dotted-decimal version - * information, with up to four non-negative number - * fields with values of up to 255 each. - * @stable ICU 4.2 - */ -U_STABLE void U_EXPORT2 -u_versionFromUString(UVersionInfo versionArray, const UChar *versionString); - - -/** - * Write a string with dotted-decimal version information according - * to the input UVersionInfo. - * Definition of this function lives in putil.c - * - * @param versionArray The version information to be written as a string. - * @param versionString A string buffer that will be filled in with - * a string corresponding to the numeric version - * information in versionArray. - * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -u_versionToString(const UVersionInfo versionArray, char *versionString); - -/** - * Gets the ICU release version. The version array stores the version information - * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. - * Definition of this function lives in putil.c - * - * @param versionArray the version # information, the result will be filled in - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_getVersion(UVersionInfo versionArray); -#endif diff --git a/deps/node/deps/icu-small/source/common/unifiedcache.cpp b/deps/node/deps/icu-small/source/common/unifiedcache.cpp deleted file mode 100644 index d33d8d2c..00000000 --- a/deps/node/deps/icu-small/source/common/unifiedcache.cpp +++ /dev/null @@ -1,519 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2015, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* -* File unifiedcache.cpp -****************************************************************************** -*/ - -#include "unifiedcache.h" - -#include // For std::max() - -#include "mutex.h" -#include "uassert.h" -#include "uhash.h" -#include "ucln_cmn.h" -#include "umutex.h" - -static icu::UnifiedCache *gCache = NULL; -static UMutex gCacheMutex = U_MUTEX_INITIALIZER; -static UConditionVar gInProgressValueAddedCond = U_CONDITION_INITIALIZER; -static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; - -static const int32_t MAX_EVICT_ITERATIONS = 10; -static const int32_t DEFAULT_MAX_UNUSED = 1000; -static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100; - - -U_CDECL_BEGIN -static UBool U_CALLCONV unifiedcache_cleanup() { - gCacheInitOnce.reset(); - if (gCache) { - delete gCache; - gCache = NULL; - } - return TRUE; -} -U_CDECL_END - - -U_NAMESPACE_BEGIN - -U_CAPI int32_t U_EXPORT2 -ucache_hashKeys(const UHashTok key) { - const CacheKeyBase *ckey = (const CacheKeyBase *) key.pointer; - return ckey->hashCode(); -} - -U_CAPI UBool U_EXPORT2 -ucache_compareKeys(const UHashTok key1, const UHashTok key2) { - const CacheKeyBase *p1 = (const CacheKeyBase *) key1.pointer; - const CacheKeyBase *p2 = (const CacheKeyBase *) key2.pointer; - return *p1 == *p2; -} - -U_CAPI void U_EXPORT2 -ucache_deleteKey(void *obj) { - CacheKeyBase *p = (CacheKeyBase *) obj; - delete p; -} - -CacheKeyBase::~CacheKeyBase() { -} - -static void U_CALLCONV cacheInit(UErrorCode &status) { - U_ASSERT(gCache == NULL); - ucln_common_registerCleanup( - UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup); - - gCache = new UnifiedCache(status); - if (gCache == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } - if (U_FAILURE(status)) { - delete gCache; - gCache = NULL; - return; - } -} - -UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { - umtx_initOnce(gCacheInitOnce, &cacheInit, status); - if (U_FAILURE(status)) { - return NULL; - } - U_ASSERT(gCache != NULL); - return gCache; -} - -UnifiedCache::UnifiedCache(UErrorCode &status) : - fHashtable(NULL), - fEvictPos(UHASH_FIRST), - fNumValuesTotal(0), - fNumValuesInUse(0), - fMaxUnused(DEFAULT_MAX_UNUSED), - fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE), - fAutoEvictedCount(0), - fNoValue(nullptr) { - if (U_FAILURE(status)) { - return; - } - fNoValue = new SharedObject(); - if (fNoValue == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted - fNoValue->hardRefCount = 1; // when other references to it are removed. - fNoValue->cachePtr = this; - - fHashtable = uhash_open( - &ucache_hashKeys, - &ucache_compareKeys, - NULL, - &status); - if (U_FAILURE(status)) { - return; - } - uhash_setKeyDeleter(fHashtable, &ucache_deleteKey); -} - -void UnifiedCache::setEvictionPolicy( - int32_t count, int32_t percentageOfInUseItems, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - if (count < 0 || percentageOfInUseItems < 0) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - Mutex lock(&gCacheMutex); - fMaxUnused = count; - fMaxPercentageOfInUse = percentageOfInUseItems; -} - -int32_t UnifiedCache::unusedCount() const { - Mutex lock(&gCacheMutex); - return uhash_count(fHashtable) - fNumValuesInUse; -} - -int64_t UnifiedCache::autoEvictedCount() const { - Mutex lock(&gCacheMutex); - return fAutoEvictedCount; -} - -int32_t UnifiedCache::keyCount() const { - Mutex lock(&gCacheMutex); - return uhash_count(fHashtable); -} - -void UnifiedCache::flush() const { - Mutex lock(&gCacheMutex); - - // Use a loop in case cache items that are flushed held hard references to - // other cache items making those additional cache items eligible for - // flushing. - while (_flush(FALSE)); -} - -void UnifiedCache::handleUnreferencedObject() const { - Mutex lock(&gCacheMutex); - --fNumValuesInUse; - _runEvictionSlice(); -} - -#ifdef UNIFIED_CACHE_DEBUG -#include - -void UnifiedCache::dump() { - UErrorCode status = U_ZERO_ERROR; - const UnifiedCache *cache = getInstance(status); - if (U_FAILURE(status)) { - fprintf(stderr, "Unified Cache: Error fetching cache.\n"); - return; - } - cache->dumpContents(); -} - -void UnifiedCache::dumpContents() const { - Mutex lock(&gCacheMutex); - _dumpContents(); -} - -// Dumps content of cache. -// On entry, gCacheMutex must be held. -// On exit, cache contents dumped to stderr. -void UnifiedCache::_dumpContents() const { - int32_t pos = UHASH_FIRST; - const UHashElement *element = uhash_nextElement(fHashtable, &pos); - char buffer[256]; - int32_t cnt = 0; - for (; element != NULL; element = uhash_nextElement(fHashtable, &pos)) { - const SharedObject *sharedObject = - (const SharedObject *) element->value.pointer; - const CacheKeyBase *key = - (const CacheKeyBase *) element->key.pointer; - if (sharedObject->hasHardReferences()) { - ++cnt; - fprintf( - stderr, - "Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n", - key->writeDescription(buffer, 256), - key->creationStatus, - sharedObject == fNoValue ? NULL :sharedObject, - sharedObject->getRefCount(), - sharedObject->getSoftRefCount()); - } - } - fprintf(stderr, "Unified Cache: %d out of a total of %d still have hard references\n", cnt, uhash_count(fHashtable)); -} -#endif - -UnifiedCache::~UnifiedCache() { - // Try our best to clean up first. - flush(); - { - // Now all that should be left in the cache are entries that refer to - // each other and entries with hard references from outside the cache. - // Nothing we can do about these so proceed to wipe out the cache. - Mutex lock(&gCacheMutex); - _flush(TRUE); - } - uhash_close(fHashtable); - fHashtable = nullptr; - delete fNoValue; - fNoValue = nullptr; -} - -const UHashElement * -UnifiedCache::_nextElement() const { - const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos); - if (element == NULL) { - fEvictPos = UHASH_FIRST; - return uhash_nextElement(fHashtable, &fEvictPos); - } - return element; -} - -UBool UnifiedCache::_flush(UBool all) const { - UBool result = FALSE; - int32_t origSize = uhash_count(fHashtable); - for (int32_t i = 0; i < origSize; ++i) { - const UHashElement *element = _nextElement(); - if (element == nullptr) { - break; - } - if (all || _isEvictable(element)) { - const SharedObject *sharedObject = - (const SharedObject *) element->value.pointer; - U_ASSERT(sharedObject->cachePtr == this); - uhash_removeElement(fHashtable, element); - removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero. - result = TRUE; - } - } - return result; -} - -int32_t UnifiedCache::_computeCountOfItemsToEvict() const { - int32_t totalItems = uhash_count(fHashtable); - int32_t evictableItems = totalItems - fNumValuesInUse; - - int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100; - int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused); - int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit); - return countOfItemsToEvict; -} - -void UnifiedCache::_runEvictionSlice() const { - int32_t maxItemsToEvict = _computeCountOfItemsToEvict(); - if (maxItemsToEvict <= 0) { - return; - } - for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) { - const UHashElement *element = _nextElement(); - if (element == nullptr) { - break; - } - if (_isEvictable(element)) { - const SharedObject *sharedObject = - (const SharedObject *) element->value.pointer; - uhash_removeElement(fHashtable, element); - removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero. - ++fAutoEvictedCount; - if (--maxItemsToEvict == 0) { - break; - } - } - } -} - -void UnifiedCache::_putNew( - const CacheKeyBase &key, - const SharedObject *value, - const UErrorCode creationStatus, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return; - } - CacheKeyBase *keyToAdopt = key.clone(); - if (keyToAdopt == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - keyToAdopt->fCreationStatus = creationStatus; - if (value->softRefCount == 0) { - _registerMaster(keyToAdopt, value); - } - void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status); - U_ASSERT(oldValue == nullptr); - (void)oldValue; - if (U_SUCCESS(status)) { - value->softRefCount++; - } -} - -void UnifiedCache::_putIfAbsentAndGet( - const CacheKeyBase &key, - const SharedObject *&value, - UErrorCode &status) const { - Mutex lock(&gCacheMutex); - const UHashElement *element = uhash_find(fHashtable, &key); - if (element != NULL && !_inProgress(element)) { - _fetch(element, value, status); - return; - } - if (element == NULL) { - UErrorCode putError = U_ZERO_ERROR; - // best-effort basis only. - _putNew(key, value, status, putError); - } else { - _put(element, value, status); - } - // Run an eviction slice. This will run even if we added a master entry - // which doesn't increase the unused count, but that is still o.k - _runEvictionSlice(); -} - - -UBool UnifiedCache::_poll( - const CacheKeyBase &key, - const SharedObject *&value, - UErrorCode &status) const { - U_ASSERT(value == NULL); - U_ASSERT(status == U_ZERO_ERROR); - Mutex lock(&gCacheMutex); - const UHashElement *element = uhash_find(fHashtable, &key); - - // If the hash table contains an inProgress placeholder entry for this key, - // this means that another thread is currently constructing the value object. - // Loop, waiting for that construction to complete. - while (element != NULL && _inProgress(element)) { - umtx_condWait(&gInProgressValueAddedCond, &gCacheMutex); - element = uhash_find(fHashtable, &key); - } - - // If the hash table contains an entry for the key, - // fetch out the contents and return them. - if (element != NULL) { - _fetch(element, value, status); - return TRUE; - } - - // The hash table contained nothing for this key. - // Insert an inProgress place holder value. - // Our caller will create the final value and update the hash table. - _putNew(key, fNoValue, U_ZERO_ERROR, status); - return FALSE; -} - -void UnifiedCache::_get( - const CacheKeyBase &key, - const SharedObject *&value, - const void *creationContext, - UErrorCode &status) const { - U_ASSERT(value == NULL); - U_ASSERT(status == U_ZERO_ERROR); - if (_poll(key, value, status)) { - if (value == fNoValue) { - SharedObject::clearPtr(value); - } - return; - } - if (U_FAILURE(status)) { - return; - } - value = key.createObject(creationContext, status); - U_ASSERT(value == NULL || value->hasHardReferences()); - U_ASSERT(value != NULL || status != U_ZERO_ERROR); - if (value == NULL) { - SharedObject::copyPtr(fNoValue, value); - } - _putIfAbsentAndGet(key, value, status); - if (value == fNoValue) { - SharedObject::clearPtr(value); - } -} - -void UnifiedCache::_registerMaster( - const CacheKeyBase *theKey, const SharedObject *value) const { - theKey->fIsMaster = true; - value->cachePtr = this; - ++fNumValuesTotal; - ++fNumValuesInUse; -} - -void UnifiedCache::_put( - const UHashElement *element, - const SharedObject *value, - const UErrorCode status) const { - U_ASSERT(_inProgress(element)); - const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; - const SharedObject *oldValue = (const SharedObject *) element->value.pointer; - theKey->fCreationStatus = status; - if (value->softRefCount == 0) { - _registerMaster(theKey, value); - } - value->softRefCount++; - UHashElement *ptr = const_cast(element); - ptr->value.pointer = (void *) value; - U_ASSERT(oldValue == fNoValue); - removeSoftRef(oldValue); - - // Tell waiting threads that we replace in-progress status with - // an error. - umtx_condBroadcast(&gInProgressValueAddedCond); -} - -void UnifiedCache::_fetch( - const UHashElement *element, - const SharedObject *&value, - UErrorCode &status) const { - const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; - status = theKey->fCreationStatus; - - // Since we have the cache lock, calling regular SharedObject add/removeRef - // could cause us to deadlock on ourselves since they may need to lock - // the cache mutex. - removeHardRef(value); - value = static_cast(element->value.pointer); - addHardRef(value); -} - - -UBool UnifiedCache::_inProgress(const UHashElement* element) const { - UErrorCode status = U_ZERO_ERROR; - const SharedObject * value = NULL; - _fetch(element, value, status); - UBool result = _inProgress(value, status); - removeHardRef(value); - return result; -} - -UBool UnifiedCache::_inProgress( - const SharedObject* theValue, UErrorCode creationStatus) const { - return (theValue == fNoValue && creationStatus == U_ZERO_ERROR); -} - -UBool UnifiedCache::_isEvictable(const UHashElement *element) const -{ - const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; - const SharedObject *theValue = - (const SharedObject *) element->value.pointer; - - // Entries that are under construction are never evictable - if (_inProgress(theValue, theKey->fCreationStatus)) { - return FALSE; - } - - // We can evict entries that are either not a master or have just - // one reference (The one reference being from the cache itself). - return (!theKey->fIsMaster || (theValue->softRefCount == 1 && theValue->noHardReferences())); -} - -void UnifiedCache::removeSoftRef(const SharedObject *value) const { - U_ASSERT(value->cachePtr == this); - U_ASSERT(value->softRefCount > 0); - if (--value->softRefCount == 0) { - --fNumValuesTotal; - if (value->noHardReferences()) { - delete value; - } else { - // This path only happens from flush(all). Which only happens from the - // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior - // of value.removeRef(), causing the deletion to be done there. - value->cachePtr = nullptr; - } - } -} - -int32_t UnifiedCache::removeHardRef(const SharedObject *value) const { - int refCount = 0; - if (value) { - refCount = umtx_atomic_dec(&value->hardRefCount); - U_ASSERT(refCount >= 0); - if (refCount == 0) { - --fNumValuesInUse; - } - } - return refCount; -} - -int32_t UnifiedCache::addHardRef(const SharedObject *value) const { - int refCount = 0; - if (value) { - refCount = umtx_atomic_inc(&value->hardRefCount); - U_ASSERT(refCount >= 1); - if (refCount == 1) { - fNumValuesInUse++; - } - } - return refCount; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/unifiedcache.h b/deps/node/deps/icu-small/source/common/unifiedcache.h deleted file mode 100644 index b3ccd60d..00000000 --- a/deps/node/deps/icu-small/source/common/unifiedcache.h +++ /dev/null @@ -1,556 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2015, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* -* File UNIFIEDCACHE.H - The ICU Unified cache. -****************************************************************************** -*/ - -#ifndef __UNIFIED_CACHE_H__ -#define __UNIFIED_CACHE_H__ - -#include "utypeinfo.h" // for 'typeid' to work - -#include "unicode/uobject.h" -#include "unicode/locid.h" -#include "sharedobject.h" -#include "unicode/unistr.h" -#include "cstring.h" -#include "ustr_imp.h" - -struct UHashtable; -struct UHashElement; - -U_NAMESPACE_BEGIN - -class UnifiedCache; - -/** - * A base class for all cache keys. - */ -class U_COMMON_API CacheKeyBase : public UObject { - public: - CacheKeyBase() : fCreationStatus(U_ZERO_ERROR), fIsMaster(FALSE) {} - - /** - * Copy constructor. Needed to support cloning. - */ - CacheKeyBase(const CacheKeyBase &other) - : UObject(other), fCreationStatus(other.fCreationStatus), fIsMaster(FALSE) { } - virtual ~CacheKeyBase(); - - /** - * Returns the hash code for this object. - */ - virtual int32_t hashCode() const = 0; - - /** - * Clones this object polymorphically. Caller owns returned value. - */ - virtual CacheKeyBase *clone() const = 0; - - /** - * Equality operator. - */ - virtual UBool operator == (const CacheKeyBase &other) const = 0; - - /** - * Create a new object for this key. Called by cache on cache miss. - * createObject must add a reference to the object it returns. Note - * that getting an object from the cache and returning it without calling - * removeRef on it satisfies this requirement. It can also return NULL - * and set status to an error. - * - * @param creationContext the context in which the object is being - * created. May be NULL. - * @param status Implementations can return a failure here. - * In addition, implementations may return a - * non NULL object and set a warning status. - */ - virtual const SharedObject *createObject( - const void *creationContext, UErrorCode &status) const = 0; - - /** - * Writes a description of this key to buffer and returns buffer. Written - * description is NULL terminated. - */ - virtual char *writeDescription(char *buffer, int32_t bufSize) const = 0; - - /** - * Inequality operator. - */ - UBool operator != (const CacheKeyBase &other) const { - return !(*this == other); - } - private: - mutable UErrorCode fCreationStatus; - mutable UBool fIsMaster; - friend class UnifiedCache; -}; - - - -/** - * Templated version of CacheKeyBase. - * A key of type LocaleCacheKey maps to a value of type T. - */ -template -class CacheKey : public CacheKeyBase { - public: - virtual ~CacheKey() { } - /** - * The template parameter, T, determines the hash code returned. - */ - virtual int32_t hashCode() const { - const char *s = typeid(T).name(); - return ustr_hashCharsN(s, static_cast(uprv_strlen(s))); - } - - /** - * Use the value type, T, as the description. - */ - virtual char *writeDescription(char *buffer, int32_t bufLen) const { - const char *s = typeid(T).name(); - uprv_strncpy(buffer, s, bufLen); - buffer[bufLen - 1] = 0; - return buffer; - } - - /** - * Two objects are equal if they are of the same type. - */ - virtual UBool operator == (const CacheKeyBase &other) const { - return typeid(*this) == typeid(other); - } -}; - -/** - * Cache key based on locale. - * A key of type LocaleCacheKey maps to a value of type T. - */ -template -class LocaleCacheKey : public CacheKey { - protected: - Locale fLoc; - public: - LocaleCacheKey(const Locale &loc) : fLoc(loc) {}; - LocaleCacheKey(const LocaleCacheKey &other) - : CacheKey(other), fLoc(other.fLoc) { } - virtual ~LocaleCacheKey() { } - virtual int32_t hashCode() const { - return (int32_t)(37u * (uint32_t)CacheKey::hashCode() + (uint32_t)fLoc.hashCode()); - } - virtual UBool operator == (const CacheKeyBase &other) const { - // reflexive - if (this == &other) { - return TRUE; - } - if (!CacheKey::operator == (other)) { - return FALSE; - } - // We know this and other are of same class because operator== on - // CacheKey returned true. - const LocaleCacheKey *fOther = - static_cast *>(&other); - return fLoc == fOther->fLoc; - } - virtual CacheKeyBase *clone() const { - return new LocaleCacheKey(*this); - } - virtual const T *createObject( - const void *creationContext, UErrorCode &status) const; - /** - * Use the locale id as the description. - */ - virtual char *writeDescription(char *buffer, int32_t bufLen) const { - const char *s = fLoc.getName(); - uprv_strncpy(buffer, s, bufLen); - buffer[bufLen - 1] = 0; - return buffer; - } - -}; - -/** - * The unified cache. A singleton type. - * Design doc here: - * https://docs.google.com/document/d/1RwGQJs4N4tawNbf809iYDRCvXoMKqDJihxzYt1ysmd8/edit?usp=sharing - */ -class U_COMMON_API UnifiedCache : public UnifiedCacheBase { - public: - /** - * @internal - * Do not call directly. Instead use UnifiedCache::getInstance() as - * there should be only one UnifiedCache in an application. - */ - UnifiedCache(UErrorCode &status); - - /** - * Return a pointer to the global cache instance. - */ - static UnifiedCache *getInstance(UErrorCode &status); - - /** - * Fetches a value from the cache by key. Equivalent to - * get(key, NULL, ptr, status); - */ - template - void get( - const CacheKey& key, - const T *&ptr, - UErrorCode &status) const { - get(key, NULL, ptr, status); - } - - /** - * Fetches value from the cache by key. - * - * @param key the cache key. - * @param creationContext passed verbatim to createObject method of key - * @param ptr On entry, ptr must be NULL or be included if - * the reference count of the object it points - * to. On exit, ptr points to the fetched object - * from the cache or is left unchanged on - * failure. Caller must call removeRef on ptr - * if set to a non NULL value. - * @param status Any error returned here. May be set to a - * warning value even if ptr is set. - */ - template - void get( - const CacheKey& key, - const void *creationContext, - const T *&ptr, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return; - } - UErrorCode creationStatus = U_ZERO_ERROR; - const SharedObject *value = NULL; - _get(key, value, creationContext, creationStatus); - const T *tvalue = (const T *) value; - if (U_SUCCESS(creationStatus)) { - SharedObject::copyPtr(tvalue, ptr); - } - SharedObject::clearPtr(tvalue); - // Take care not to overwrite a warning status passed in with - // another warning or U_ZERO_ERROR. - if (status == U_ZERO_ERROR || U_FAILURE(creationStatus)) { - status = creationStatus; - } - } - -#ifdef UNIFIED_CACHE_DEBUG - /** - * Dumps the contents of this cache to standard error. Used for testing of - * cache only. - */ - void dumpContents() const; -#endif - - /** - * Convenience method to get a value of type T from cache for a - * particular locale with creationContext == NULL. - * @param loc the locale - * @param ptr On entry, must be NULL or included in the ref count - * of the object to which it points. - * On exit, fetched value stored here or is left - * unchanged on failure. Caller must call removeRef on - * ptr if set to a non NULL value. - * @param status Any error returned here. May be set to a - * warning value even if ptr is set. - */ - template - static void getByLocale( - const Locale &loc, const T *&ptr, UErrorCode &status) { - const UnifiedCache *cache = getInstance(status); - if (U_FAILURE(status)) { - return; - } - cache->get(LocaleCacheKey(loc), ptr, status); - } - -#ifdef UNIFIED_CACHE_DEBUG - /** - * Dumps the cache contents to stderr. For testing only. - */ - static void dump(); -#endif - - /** - * Returns the number of keys in this cache. For testing only. - */ - int32_t keyCount() const; - - /** - * Removes any values from cache that are not referenced outside - * the cache. - */ - void flush() const; - - /** - * Configures at what point evcition of unused entries will begin. - * Eviction is triggered whenever the number of evictable keys exeeds - * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100). - * Once the number of unused entries drops below one of these, - * eviction ceases. Because eviction happens incrementally, - * the actual unused entry count may exceed both these numbers - * from time to time. - * - * A cache entry is defined as unused if it is not essential to guarantee - * that for a given key X, the cache returns the same reference to the - * same value as long as the client already holds a reference to that - * value. - * - * If this method is never called, the default settings are 1000 and 100%. - * - * Although this method is thread-safe, it is designed to be called at - * application startup. If it is called in the middle of execution, it - * will have no immediate effect on the cache. However over time, the - * cache will perform eviction slices in an attempt to honor the new - * settings. - * - * If a client already holds references to many different unique values - * in the cache such that the number of those unique values far exeeds - * "count" then the cache may not be able to maintain this maximum. - * However, if this happens, the cache still guarantees that the number of - * unused entries will remain only a small percentage of the total cache - * size. - * - * If the parameters passed are negative, setEvctionPolicy sets status to - * U_ILLEGAL_ARGUMENT_ERROR. - */ - void setEvictionPolicy( - int32_t count, int32_t percentageOfInUseItems, UErrorCode &status); - - - /** - * Returns how many entries have been auto evicted during the lifetime - * of this cache. This only includes auto evicted entries, not - * entries evicted because of a call to flush(). - */ - int64_t autoEvictedCount() const; - - /** - * Returns the unused entry count in this cache. For testing only, - * Regular clients will not need this. - */ - int32_t unusedCount() const; - - virtual void handleUnreferencedObject() const; - virtual ~UnifiedCache(); - - private: - UHashtable *fHashtable; - mutable int32_t fEvictPos; - mutable int32_t fNumValuesTotal; - mutable int32_t fNumValuesInUse; - int32_t fMaxUnused; - int32_t fMaxPercentageOfInUse; - mutable int64_t fAutoEvictedCount; - SharedObject *fNoValue; - - UnifiedCache(const UnifiedCache &other); - UnifiedCache &operator=(const UnifiedCache &other); - - /** - * Flushes the contents of the cache. If cache values hold references to other - * cache values then _flush should be called in a loop until it returns FALSE. - * - * On entry, gCacheMutex must be held. - * On exit, those values with are evictable are flushed. - * - * @param all if false flush evictable items only, which are those with no external - * references, plus those that can be safely recreated.
- * if true, flush all elements. Any values (sharedObjects) with remaining - * hard (external) references are not deleted, but are detached from - * the cache, so that a subsequent removeRefs can delete them. - * _flush is not thread safe when all is true. - * @return TRUE if any value in cache was flushed or FALSE otherwise. - */ - UBool _flush(UBool all) const; - - /** - * Gets value out of cache. - * On entry. gCacheMutex must not be held. value must be NULL. status - * must be U_ZERO_ERROR. - * On exit. value and status set to what is in cache at key or on cache - * miss the key's createObject() is called and value and status are set to - * the result of that. In this latter case, best effort is made to add the - * value and status to the cache. If createObject() fails to create a value, - * fNoValue is stored in cache, and value is set to NULL. Caller must call - * removeRef on value if non NULL. - */ - void _get( - const CacheKeyBase &key, - const SharedObject *&value, - const void *creationContext, - UErrorCode &status) const; - - /** - * Attempts to fetch value and status for key from cache. - * On entry, gCacheMutex must not be held value must be NULL and status must - * be U_ZERO_ERROR. - * On exit, either returns FALSE (In this - * case caller should try to create the object) or returns TRUE with value - * pointing to the fetched value and status set to fetched status. When - * FALSE is returned status may be set to failure if an in progress hash - * entry could not be made but value will remain unchanged. When TRUE is - * returned, caller must call removeRef() on value. - */ - UBool _poll( - const CacheKeyBase &key, - const SharedObject *&value, - UErrorCode &status) const; - - /** - * Places a new value and creationStatus in the cache for the given key. - * On entry, gCacheMutex must be held. key must not exist in the cache. - * On exit, value and creation status placed under key. Soft reference added - * to value on successful add. On error sets status. - */ - void _putNew( - const CacheKeyBase &key, - const SharedObject *value, - const UErrorCode creationStatus, - UErrorCode &status) const; - - /** - * Places value and status at key if there is no value at key or if cache - * entry for key is in progress. Otherwise, it leaves the current value and - * status there. - * - * On entry. gCacheMutex must not be held. Value must be - * included in the reference count of the object to which it points. - * - * On exit, value and status are changed to what was already in the cache if - * something was there and not in progress. Otherwise, value and status are left - * unchanged in which case they are placed in the cache on a best-effort basis. - * Caller must call removeRef() on value. - */ - void _putIfAbsentAndGet( - const CacheKeyBase &key, - const SharedObject *&value, - UErrorCode &status) const; - - /** - * Returns the next element in the cache round robin style. - * Returns nullptr if the cache is empty. - * On entry, gCacheMutex must be held. - */ - const UHashElement *_nextElement() const; - - /** - * Return the number of cache items that would need to be evicted - * to bring usage into conformance with eviction policy. - * - * An item corresponds to an entry in the hash table, a hash table element. - * - * On entry, gCacheMutex must be held. - */ - int32_t _computeCountOfItemsToEvict() const; - - /** - * Run an eviction slice. - * On entry, gCacheMutex must be held. - * _runEvictionSlice runs a slice of the evict pipeline by examining the next - * 10 entries in the cache round robin style evicting them if they are eligible. - */ - void _runEvictionSlice() const; - - /** - * Register a master cache entry. A master key is the first key to create - * a given SharedObject value. Subsequent keys whose create function - * produce referneces to an already existing SharedObject are not masters - - * they can be evicted and subsequently recreated. - * - * On entry, gCacheMutex must be held. - * On exit, items in use count incremented, entry is marked as a master - * entry, and value registered with cache so that subsequent calls to - * addRef() and removeRef() on it correctly interact with the cache. - */ - void _registerMaster(const CacheKeyBase *theKey, const SharedObject *value) const; - - /** - * Store a value and creation error status in given hash entry. - * On entry, gCacheMutex must be held. Hash entry element must be in progress. - * value must be non NULL. - * On Exit, soft reference added to value. value and status stored in hash - * entry. Soft reference removed from previous stored value. Waiting - * threads notified. - */ - void _put( - const UHashElement *element, - const SharedObject *value, - const UErrorCode status) const; - /** - * Remove a soft reference, and delete the SharedObject if no references remain. - * To be used from within the UnifiedCache implementation only. - * gCacheMutex must be held by caller. - * @param value the SharedObject to be acted on. - */ - void removeSoftRef(const SharedObject *value) const; - - /** - * Increment the hard reference count of the given SharedObject. - * gCacheMutex must be held by the caller. - * Update numValuesEvictable on transitions between zero and one reference. - * - * @param value The SharedObject to be referenced. - * @return the hard reference count after the addition. - */ - int32_t addHardRef(const SharedObject *value) const; - - /** - * Decrement the hard reference count of the given SharedObject. - * gCacheMutex must be held by the caller. - * Update numValuesEvictable on transitions between one and zero reference. - * - * @param value The SharedObject to be referenced. - * @return the hard reference count after the removal. - */ - int32_t removeHardRef(const SharedObject *value) const; - - -#ifdef UNIFIED_CACHE_DEBUG - void _dumpContents() const; -#endif - - /** - * Fetch value and error code from a particular hash entry. - * On entry, gCacheMutex must be held. value must be either NULL or must be - * included in the ref count of the object to which it points. - * On exit, value and status set to what is in the hash entry. Caller must - * eventually call removeRef on value. - * If hash entry is in progress, value will be set to gNoValue and status will - * be set to U_ZERO_ERROR. - */ - void _fetch(const UHashElement *element, const SharedObject *&value, - UErrorCode &status) const; - - /** - * Determine if given hash entry is in progress. - * On entry, gCacheMutex must be held. - */ - UBool _inProgress(const UHashElement *element) const; - - /** - * Determine if given hash entry is in progress. - * On entry, gCacheMutex must be held. - */ - UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const; - - /** - * Determine if given hash entry is eligible for eviction. - * On entry, gCacheMutex must be held. - */ - UBool _isEvictable(const UHashElement *element) const; -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unifilt.cpp b/deps/node/deps/icu-small/source/common/unifilt.cpp deleted file mode 100644 index 4ab0d9b5..00000000 --- a/deps/node/deps/icu-small/source/common/unifilt.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2001-2012, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 07/18/01 aliu Creation. -********************************************************************** -*/ - -#include "unicode/unifilt.h" -#include "unicode/rep.h" -#include "unicode/utf16.h" - -U_NAMESPACE_BEGIN -UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter) - - -/* Define this here due to the lack of another file. - It can't be defined in the header */ -UnicodeMatcher::~UnicodeMatcher() {} - -UnicodeFilter::~UnicodeFilter() {} - -/** - * UnicodeFunctor API. - * Note that UnicodeMatcher is a base class of UnicodeFilter. - */ -UnicodeMatcher* UnicodeFilter::toMatcher() const { - return const_cast(this); -} - -void UnicodeFilter::setData(const TransliterationRuleData*) {} - -/** - * Default implementation of UnicodeMatcher::matches() for Unicode - * filters. Matches a single code point at offset (either one or - * two 16-bit code units). - */ -UMatchDegree UnicodeFilter::matches(const Replaceable& text, - int32_t& offset, - int32_t limit, - UBool incremental) { - UChar32 c; - if (offset < limit && - contains(c = text.char32At(offset))) { - offset += U16_LENGTH(c); - return U_MATCH; - } - if (offset > limit && - contains(c = text.char32At(offset))) { - // Backup offset by 1, unless the preceding character is a - // surrogate pair -- then backup by 2 (keep offset pointing at - // the lead surrogate). - --offset; - if (offset >= 0) { - offset -= U16_LENGTH(text.char32At(offset)) - 1; - } - return U_MATCH; - } - if (incremental && offset == limit) { - return U_PARTIAL_MATCH; - } - return U_MISMATCH; -} - -U_NAMESPACE_END - -//eof diff --git a/deps/node/deps/icu-small/source/common/unifunct.cpp b/deps/node/deps/icu-small/source/common/unifunct.cpp deleted file mode 100644 index f3995b29..00000000 --- a/deps/node/deps/icu-small/source/common/unifunct.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2004, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "unicode/unifunct.h" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFunctor) - -UnicodeFunctor::~UnicodeFunctor() {} - -UnicodeMatcher* UnicodeFunctor::toMatcher() const { - return 0; -} - -UnicodeReplacer* UnicodeFunctor::toReplacer() const { - return 0; -} - -U_NAMESPACE_END - -//eof diff --git a/deps/node/deps/icu-small/source/common/uniset.cpp b/deps/node/deps/icu-small/source/common/uniset.cpp deleted file mode 100644 index 7d2e3cd6..00000000 --- a/deps/node/deps/icu-small/source/common/uniset.cpp +++ /dev/null @@ -1,2374 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 10/20/99 alan Creation. -********************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/parsepos.h" -#include "unicode/symtable.h" -#include "unicode/uniset.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "ruleiter.h" -#include "cmemory.h" -#include "cstring.h" -#include "patternprops.h" -#include "uelement.h" -#include "util.h" -#include "uvector.h" -#include "charstr.h" -#include "ustrfmt.h" -#include "uassert.h" -#include "bmpset.h" -#include "unisetspan.h" - -// Define UChar constants using hex for EBCDIC compatibility -// Used #define to reduce private static exports and memory access time. -#define SET_OPEN ((UChar)0x005B) /*[*/ -#define SET_CLOSE ((UChar)0x005D) /*]*/ -#define HYPHEN ((UChar)0x002D) /*-*/ -#define COMPLEMENT ((UChar)0x005E) /*^*/ -#define COLON ((UChar)0x003A) /*:*/ -#define BACKSLASH ((UChar)0x005C) /*\*/ -#define INTERSECTION ((UChar)0x0026) /*&*/ -#define UPPER_U ((UChar)0x0055) /*U*/ -#define LOWER_U ((UChar)0x0075) /*u*/ -#define OPEN_BRACE ((UChar)123) /*{*/ -#define CLOSE_BRACE ((UChar)125) /*}*/ -#define UPPER_P ((UChar)0x0050) /*P*/ -#define LOWER_P ((UChar)0x0070) /*p*/ -#define UPPER_N ((UChar)78) /*N*/ -#define EQUALS ((UChar)0x003D) /*=*/ - -// HIGH_VALUE > all valid values. 110000 for codepoints -#define UNICODESET_HIGH 0x0110000 - -// LOW <= all valid values. ZERO for codepoints -#define UNICODESET_LOW 0x000000 - -// initial storage. Must be >= 0 -#define START_EXTRA 16 - -// extra amount for growth. Must be >= 0 -#define GROW_EXTRA START_EXTRA - -U_NAMESPACE_BEGIN - -SymbolTable::~SymbolTable() {} - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet) - -/** - * Modify the given UChar32 variable so that it is in range, by - * pinning values < UNICODESET_LOW to UNICODESET_LOW, and - * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1. - * It modifies its argument in-place and also returns it. - */ -static inline UChar32 pinCodePoint(UChar32& c) { - if (c < UNICODESET_LOW) { - c = UNICODESET_LOW; - } else if (c > (UNICODESET_HIGH-1)) { - c = (UNICODESET_HIGH-1); - } - return c; -} - -//---------------------------------------------------------------- -// Debugging -//---------------------------------------------------------------- - -// DO NOT DELETE THIS CODE. This code is used to debug memory leaks. -// To enable the debugging, define the symbol DEBUG_MEM in the line -// below. This will result in text being sent to stdout that looks -// like this: -// DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85- -// DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85- -// Each line lists a construction (ct) or destruction (dt) event, the -// object address, the number of outstanding objects after the event, -// and the pattern of the object in question. - -// #define DEBUG_MEM - -#ifdef DEBUG_MEM -#include -static int32_t _dbgCount = 0; - -static inline void _dbgct(UnicodeSet* set) { - UnicodeString str; - set->toPattern(str, TRUE); - char buf[40]; - str.extract(0, 39, buf, ""); - printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf); -} - -static inline void _dbgdt(UnicodeSet* set) { - UnicodeString str; - set->toPattern(str, TRUE); - char buf[40]; - str.extract(0, 39, buf, ""); - printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf); -} - -#else - -#define _dbgct(set) -#define _dbgdt(set) - -#endif - -//---------------------------------------------------------------- -// UnicodeString in UVector support -//---------------------------------------------------------------- - -static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) { - dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer); -} - -static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { - const UnicodeString &a = *(const UnicodeString*)t1.pointer; - const UnicodeString &b = *(const UnicodeString*)t2.pointer; - return a.compare(b); -} - -//---------------------------------------------------------------- -// Constructors &c -//---------------------------------------------------------------- - -/** - * Constructs an empty set. - */ -UnicodeSet::UnicodeSet() : - len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - UErrorCode status = U_ZERO_ERROR; - allocateStrings(status); - if (U_FAILURE(status)) { - setToBogus(); // If memory allocation failed, set to bogus state. - return; - } - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - if(list!=NULL){ - list[0] = UNICODESET_HIGH; - } else { // If memory allocation failed, set to bogus state. - setToBogus(); - return; - } - _dbgct(this); -} - -/** - * Constructs a set containing the given range. If end > - * start then an empty set is created. - * - * @param start first character, inclusive, of range - * @param end last character, inclusive, of range - */ -UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) : - len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - UErrorCode status = U_ZERO_ERROR; - allocateStrings(status); - if (U_FAILURE(status)) { - setToBogus(); // If memory allocation failed, set to bogus state. - return; - } - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - if(list!=NULL){ - list[0] = UNICODESET_HIGH; - complement(start, end); - } else { // If memory allocation failed, set to bogus state. - setToBogus(); - return; - } - _dbgct(this); -} - -/** - * Constructs a set that is identical to the given UnicodeSet. - */ -UnicodeSet::UnicodeSet(const UnicodeSet& o) : - UnicodeFilter(o), - len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0), - bmpSet(0), - buffer(0), bufferCapacity(0), - patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - UErrorCode status = U_ZERO_ERROR; - allocateStrings(status); - if (U_FAILURE(status)) { - setToBogus(); // If memory allocation failed, set to bogus state. - return; - } - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - if(list!=NULL){ - *this = o; - } else { // If memory allocation failed, set to bogus state. - setToBogus(); - return; - } - _dbgct(this); -} - -// Copy-construct as thawed. -UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : - UnicodeFilter(o), - len(0), capacity(o.len + GROW_EXTRA), list(0), - bmpSet(0), - buffer(0), bufferCapacity(0), - patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - UErrorCode status = U_ZERO_ERROR; - allocateStrings(status); - if (U_FAILURE(status)) { - setToBogus(); // If memory allocation failed, set to bogus state. - return; - } - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - if(list!=NULL){ - // *this = o except for bmpSet and stringSpan - len = o.len; - uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); - if (strings != NULL && o.strings != NULL) { - strings->assign(*o.strings, cloneUnicodeString, status); - } else { // Invalid strings. - setToBogus(); - return; - } - if (o.pat) { - setPattern(UnicodeString(o.pat, o.patLen)); - } - } else { // If memory allocation failed, set to bogus state. - setToBogus(); - return; - } - _dbgct(this); -} - -/** - * Destructs the set. - */ -UnicodeSet::~UnicodeSet() { - _dbgdt(this); // first! - uprv_free(list); - delete bmpSet; - if (buffer) { - uprv_free(buffer); - } - delete strings; - delete stringSpan; - releasePattern(); -} - -/** - * Assigns this object to be a copy of another. - */ -UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { - return copyFrom(o, FALSE); -} - -UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) { - if (this == &o) { - return *this; - } - if (isFrozen()) { - return *this; - } - if (o.isBogus()) { - setToBogus(); - return *this; - } - UErrorCode ec = U_ZERO_ERROR; - ensureCapacity(o.len, ec); - if (U_FAILURE(ec)) { - // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens. - return *this; - } - len = o.len; - uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); - if (o.bmpSet == NULL || asThawed) { - bmpSet = NULL; - } else { - bmpSet = new BMPSet(*o.bmpSet, list, len); - if (bmpSet == NULL) { // Check for memory allocation error. - setToBogus(); - return *this; - } - } - if (strings != NULL && o.strings != NULL) { - strings->assign(*o.strings, cloneUnicodeString, ec); - } else { // Invalid strings. - setToBogus(); - return *this; - } - if (o.stringSpan == NULL || asThawed) { - stringSpan = NULL; - } else { - stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings); - if (stringSpan == NULL) { // Check for memory allocation error. - setToBogus(); - return *this; - } - } - releasePattern(); - if (o.pat) { - setPattern(UnicodeString(o.pat, o.patLen)); - } - return *this; -} - -/** - * Returns a copy of this object. All UnicodeMatcher objects have - * to support cloning in order to allow classes using - * UnicodeMatchers, such as Transliterator, to implement cloning. - */ -UnicodeFunctor* UnicodeSet::clone() const { - return new UnicodeSet(*this); -} - -UnicodeFunctor *UnicodeSet::cloneAsThawed() const { - return new UnicodeSet(*this, TRUE); -} - -/** - * Compares the specified object with this set for equality. Returns - * true if the two sets - * have the same size, and every member of the specified set is - * contained in this set (or equivalently, every member of this set is - * contained in the specified set). - * - * @param o set to be compared for equality with this set. - * @return true if the specified set is equal to this set. - */ -UBool UnicodeSet::operator==(const UnicodeSet& o) const { - if (len != o.len) return FALSE; - for (int32_t i = 0; i < len; ++i) { - if (list[i] != o.list[i]) return FALSE; - } - if (*strings != *o.strings) return FALSE; - return TRUE; -} - -/** - * Returns the hash code value for this set. - * - * @return the hash code value for this set. - * @see Object#hashCode() - */ -int32_t UnicodeSet::hashCode(void) const { - uint32_t result = static_cast(len); - for (int32_t i = 0; i < len; ++i) { - result *= 1000003u; - result += list[i]; - } - return static_cast(result); -} - -//---------------------------------------------------------------- -// Public API -//---------------------------------------------------------------- - -/** - * Returns the number of elements in this set (its cardinality), - * Note than the elements of a set may include both individual - * codepoints and strings. - * - * @return the number of elements in this set (its cardinality). - */ -int32_t UnicodeSet::size(void) const { - int32_t n = 0; - int32_t count = getRangeCount(); - for (int32_t i = 0; i < count; ++i) { - n += getRangeEnd(i) - getRangeStart(i) + 1; - } - return n + strings->size(); -} - -/** - * Returns true if this set contains no elements. - * - * @return true if this set contains no elements. - */ -UBool UnicodeSet::isEmpty(void) const { - return len == 1 && strings->size() == 0; -} - -/** - * Returns true if this set contains the given character. - * @param c character to be checked for containment - * @return true if the test condition is met - */ -UBool UnicodeSet::contains(UChar32 c) const { - // Set i to the index of the start item greater than ch - // We know we will terminate without length test! - // LATER: for large sets, add binary search - //int32_t i = -1; - //for (;;) { - // if (c < list[++i]) break; - //} - if (bmpSet != NULL) { - return bmpSet->contains(c); - } - if (stringSpan != NULL) { - return stringSpan->contains(c); - } - if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound - return FALSE; - } - int32_t i = findCodePoint(c); - return (UBool)(i & 1); // return true if odd -} - -/** - * Returns the smallest value i such that c < list[i]. Caller - * must ensure that c is a legal value or this method will enter - * an infinite loop. This method performs a binary search. - * @param c a character in the range MIN_VALUE..MAX_VALUE - * inclusive - * @return the smallest integer i in the range 0..len-1, - * inclusive, such that c < list[i] - */ -int32_t UnicodeSet::findCodePoint(UChar32 c) const { - /* Examples: - findCodePoint(c) - set list[] c=0 1 3 4 7 8 - === ============== =========== - [] [110000] 0 0 0 0 0 0 - [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 - [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 - [:Any:] [0, 110000] 1 1 1 1 1 1 - */ - - // Return the smallest i such that c < list[i]. Assume - // list[len - 1] == HIGH and that c is legal (0..HIGH-1). - if (c < list[0]) - return 0; - // High runner test. c is often after the last range, so an - // initial check for this condition pays off. - int32_t lo = 0; - int32_t hi = len - 1; - if (lo >= hi || c >= list[hi-1]) - return hi; - // invariant: c >= list[lo] - // invariant: c < list[hi] - for (;;) { - int32_t i = (lo + hi) >> 1; - if (i == lo) { - break; // Found! - } else if (c < list[i]) { - hi = i; - } else { - lo = i; - } - } - return hi; -} - -/** - * Returns true if this set contains every character - * of the given range. - * @param start first character, inclusive, of the range - * @param end last character, inclusive, of the range - * @return true if the test condition is met - */ -UBool UnicodeSet::contains(UChar32 start, UChar32 end) const { - //int32_t i = -1; - //for (;;) { - // if (start < list[++i]) break; - //} - int32_t i = findCodePoint(start); - return ((i & 1) != 0 && end < list[i]); -} - -/** - * Returns true if this set contains the given - * multicharacter string. - * @param s string to be checked for containment - * @return true if this set contains the specified string - */ -UBool UnicodeSet::contains(const UnicodeString& s) const { - if (s.length() == 0) return FALSE; - int32_t cp = getSingleCP(s); - if (cp < 0) { - return strings->contains((void*) &s); - } else { - return contains((UChar32) cp); - } -} - -/** - * Returns true if this set contains all the characters and strings - * of the given set. - * @param c set to be checked for containment - * @return true if the test condition is met - */ -UBool UnicodeSet::containsAll(const UnicodeSet& c) const { - // The specified set is a subset if all of its pairs are contained in - // this set. It's possible to code this more efficiently in terms of - // direct manipulation of the inversion lists if the need arises. - int32_t n = c.getRangeCount(); - for (int i=0; icontainsAll(*c.strings)) return FALSE; - return TRUE; -} - -/** - * Returns true if this set contains all the characters - * of the given string. - * @param s string containing characters to be checked for containment - * @return true if the test condition is met - */ -UBool UnicodeSet::containsAll(const UnicodeString& s) const { - return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) == - s.length()); -} - -/** - * Returns true if this set contains none of the characters - * of the given range. - * @param start first character, inclusive, of the range - * @param end last character, inclusive, of the range - * @return true if the test condition is met - */ -UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const { - //int32_t i = -1; - //for (;;) { - // if (start < list[++i]) break; - //} - int32_t i = findCodePoint(start); - return ((i & 1) == 0 && end < list[i]); -} - -/** - * Returns true if this set contains none of the characters and strings - * of the given set. - * @param c set to be checked for containment - * @return true if the test condition is met - */ -UBool UnicodeSet::containsNone(const UnicodeSet& c) const { - // The specified set is a subset if all of its pairs are contained in - // this set. It's possible to code this more efficiently in terms of - // direct manipulation of the inversion lists if the need arises. - int32_t n = c.getRangeCount(); - for (int32_t i=0; icontainsNone(*c.strings)) return FALSE; - return TRUE; -} - -/** - * Returns true if this set contains none of the characters - * of the given string. - * @param s string containing characters to be checked for containment - * @return true if the test condition is met - */ -UBool UnicodeSet::containsNone(const UnicodeString& s) const { - return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) == - s.length()); -} - -/** - * Returns true if this set contains any character whose low byte - * is the given value. This is used by RuleBasedTransliterator for - * indexing. - */ -UBool UnicodeSet::matchesIndexValue(uint8_t v) const { - /* The index value v, in the range [0,255], is contained in this set if - * it is contained in any pair of this set. Pairs either have the high - * bytes equal, or unequal. If the high bytes are equal, then we have - * aaxx..aayy, where aa is the high byte. Then v is contained if xx <= - * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa. - * Then v is contained if xx <= v || v <= yy. (This is identical to the - * time zone month containment logic.) - */ - int32_t i; - int32_t rangeCount=getRangeCount(); - for (i=0; isize() != 0) { - for (i=0; isize(); ++i) { - const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i); - //if (s.length() == 0) { - // // Empty strings match everything - // return TRUE; - //} - // assert(s.length() != 0); // We enforce this elsewhere - UChar32 c = s.char32At(0); - if ((c & 0xFF) == v) { - return TRUE; - } - } - } - return FALSE; -} - -/** - * Implementation of UnicodeMatcher::matches(). Always matches the - * longest possible multichar string. - */ -UMatchDegree UnicodeSet::matches(const Replaceable& text, - int32_t& offset, - int32_t limit, - UBool incremental) { - if (offset == limit) { - // Strings, if any, have length != 0, so we don't worry - // about them here. If we ever allow zero-length strings - // we much check for them here. - if (contains(U_ETHER)) { - return incremental ? U_PARTIAL_MATCH : U_MATCH; - } else { - return U_MISMATCH; - } - } else { - if (strings->size() != 0) { // try strings first - - // might separate forward and backward loops later - // for now they are combined - - // TODO Improve efficiency of this, at least in the forward - // direction, if not in both. In the forward direction we - // can assume the strings are sorted. - - int32_t i; - UBool forward = offset < limit; - - // firstChar is the leftmost char to match in the - // forward direction or the rightmost char to match in - // the reverse direction. - UChar firstChar = text.charAt(offset); - - // If there are multiple strings that can match we - // return the longest match. - int32_t highWaterLength = 0; - - for (i=0; isize(); ++i) { - const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i); - - //if (trial.length() == 0) { - // return U_MATCH; // null-string always matches - //} - // assert(trial.length() != 0); // We ensure this elsewhere - - UChar c = trial.charAt(forward ? 0 : trial.length() - 1); - - // Strings are sorted, so we can optimize in the - // forward direction. - if (forward && c > firstChar) break; - if (c != firstChar) continue; - - int32_t matchLen = matchRest(text, offset, limit, trial); - - if (incremental) { - int32_t maxLen = forward ? limit-offset : offset-limit; - if (matchLen == maxLen) { - // We have successfully matched but only up to limit. - return U_PARTIAL_MATCH; - } - } - - if (matchLen == trial.length()) { - // We have successfully matched the whole string. - if (matchLen > highWaterLength) { - highWaterLength = matchLen; - } - // In the forward direction we know strings - // are sorted so we can bail early. - if (forward && matchLen < highWaterLength) { - break; - } - continue; - } - } - - // We've checked all strings without a partial match. - // If we have full matches, return the longest one. - if (highWaterLength != 0) { - offset += forward ? highWaterLength : -highWaterLength; - return U_MATCH; - } - } - return UnicodeFilter::matches(text, offset, limit, incremental); - } -} - -/** - * Returns the longest match for s in text at the given position. - * If limit > start then match forward from start+1 to limit - * matching all characters except s.charAt(0). If limit < start, - * go backward starting from start-1 matching all characters - * except s.charAt(s.length()-1). This method assumes that the - * first character, text.charAt(start), matches s, so it does not - * check it. - * @param text the text to match - * @param start the first character to match. In the forward - * direction, text.charAt(start) is matched against s.charAt(0). - * In the reverse direction, it is matched against - * s.charAt(s.length()-1). - * @param limit the limit offset for matching, either last+1 in - * the forward direction, or last-1 in the reverse direction, - * where last is the index of the last character to match. - * @return If part of s matches up to the limit, return |limit - - * start|. If all of s matches before reaching the limit, return - * s.length(). If there is a mismatch between s and text, return - * 0 - */ -int32_t UnicodeSet::matchRest(const Replaceable& text, - int32_t start, int32_t limit, - const UnicodeString& s) { - int32_t i; - int32_t maxLen; - int32_t slen = s.length(); - if (start < limit) { - maxLen = limit - start; - if (maxLen > slen) maxLen = slen; - for (i = 1; i < maxLen; ++i) { - if (text.charAt(start + i) != s.charAt(i)) return 0; - } - } else { - maxLen = start - limit; - if (maxLen > slen) maxLen = slen; - --slen; // <=> slen = s.length() - 1; - for (i = 1; i < maxLen; ++i) { - if (text.charAt(start - i) != s.charAt(slen - i)) return 0; - } - } - return maxLen; -} - -/** - * Implement of UnicodeMatcher - */ -void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const { - toUnionTo.addAll(*this); -} - -/** - * Returns the index of the given character within this set, where - * the set is ordered by ascending code point. If the character - * is not in this set, return -1. The inverse of this method is - * charAt(). - * @return an index from 0..size()-1, or -1 - */ -int32_t UnicodeSet::indexOf(UChar32 c) const { - if (c < MIN_VALUE || c > MAX_VALUE) { - return -1; - } - int32_t i = 0; - int32_t n = 0; - for (;;) { - UChar32 start = list[i++]; - if (c < start) { - return -1; - } - UChar32 limit = list[i++]; - if (c < limit) { - return n + c - start; - } - n += limit - start; - } -} - -/** - * Returns the character at the given index within this set, where - * the set is ordered by ascending code point. If the index is - * out of range, return (UChar32)-1. The inverse of this method is - * indexOf(). - * @param index an index from 0..size()-1 - * @return the character at the given index, or (UChar32)-1. - */ -UChar32 UnicodeSet::charAt(int32_t index) const { - if (index >= 0) { - // len2 is the largest even integer <= len, that is, it is len - // for even values and len-1 for odd values. With odd values - // the last entry is UNICODESET_HIGH. - int32_t len2 = len & ~1; - for (int32_t i=0; i < len2;) { - UChar32 start = list[i++]; - int32_t count = list[i++] - start; - if (index < count) { - return (UChar32)(start + index); - } - index -= count; - } - } - return (UChar32)-1; -} - -/** - * Make this object represent the range start - end. - * If end > start then this object is set to an - * an empty range. - * - * @param start first character in the set, inclusive - * @rparam end last character in the set, inclusive - */ -UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) { - clear(); - complement(start, end); - return *this; -} - -/** - * Adds the specified range to this set if it is not already - * present. If this set already contains the specified range, - * the call leaves this set unchanged. If end > start - * then an empty range is added, leaving the set unchanged. - * - * @param start first character, inclusive, of range to be added - * to this set. - * @param end last character, inclusive, of range to be added - * to this set. - */ -UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) { - if (pinCodePoint(start) < pinCodePoint(end)) { - UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; - add(range, 2, 0); - } else if (start == end) { - add(start); - } - return *this; -} - -// #define DEBUG_US_ADD - -#ifdef DEBUG_US_ADD -#include -void dump(UChar32 c) { - if (c <= 0xFF) { - printf("%c", (char)c); - } else { - printf("U+%04X", c); - } -} -void dump(const UChar32* list, int32_t len) { - printf("["); - for (int32_t i=0; i "); -#endif - - if (c == list[i]-1) { - // c is before start of next range - list[i] = c; - // if we touched the HIGH mark, then add a new one - if (c == (UNICODESET_HIGH - 1)) { - UErrorCode status = U_ZERO_ERROR; - ensureCapacity(len+1, status); - if (U_FAILURE(status)) { - // ensureCapacity will mark the object as Bogus if OOM failure happens. - return *this; - } - list[len++] = UNICODESET_HIGH; - } - if (i > 0 && c == list[i-1]) { - // collapse adjacent ranges - - // [..., start_k-1, c, c, limit_k, ..., HIGH] - // ^ - // list[i] - - //for (int32_t k=i-1; k 0 && c == list[i-1]) { - // c is after end of prior range - list[i-1]++; - // no need to check for collapse here - } - - else { - // At this point we know the new char is not adjacent to - // any existing ranges, and it is not 10FFFF. - - - // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] - // ^ - // list[i] - - // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] - // ^ - // list[i] - - UErrorCode status = U_ZERO_ERROR; - ensureCapacity(len+2, status); - if (U_FAILURE(status)) { - // ensureCapacity will mark the object as Bogus if OOM failure happens. - return *this; - } - - //for (int32_t k=len-1; k>=i; --k) { - // list[k+2] = list[k]; - //} - UChar32* src = list + len; - UChar32* dst = src + 2; - UChar32* srclimit = list + i; - while (src > srclimit) *(--dst) = *(--src); - - list[i] = c; - list[i+1] = c+1; - len += 2; - } - -#ifdef DEBUG_US_ADD - dump(list, len); - printf("\n"); - - for (i=1; i {"ch"} - *
Warning: you cannot add an empty string ("") to a UnicodeSet. - * @param s the source string - * @return the modified set, for chaining - */ -UnicodeSet& UnicodeSet::add(const UnicodeString& s) { - if (s.length() == 0 || isFrozen() || isBogus()) return *this; - int32_t cp = getSingleCP(s); - if (cp < 0) { - if (!strings->contains((void*) &s)) { - _add(s); - releasePattern(); - } - } else { - add((UChar32)cp); - } - return *this; -} - -/** - * Adds the given string, in order, to 'strings'. The given string - * must have been checked by the caller to not be empty and to not - * already be in 'strings'. - */ -void UnicodeSet::_add(const UnicodeString& s) { - if (isFrozen() || isBogus()) { - return; - } - UnicodeString* t = new UnicodeString(s); - if (t == NULL) { // Check for memory allocation error. - setToBogus(); - return; - } - UErrorCode ec = U_ZERO_ERROR; - strings->sortedInsert(t, compareUnicodeString, ec); - if (U_FAILURE(ec)) { - setToBogus(); - delete t; - } -} - -/** - * @return a code point IF the string consists of a single one. - * otherwise returns -1. - * @param string to test - */ -int32_t UnicodeSet::getSingleCP(const UnicodeString& s) { - //if (s.length() < 1) { - // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); - //} - if (s.length() > 2) return -1; - if (s.length() == 1) return s.charAt(0); - - // at this point, len = 2 - UChar32 cp = s.char32At(0); - if (cp > 0xFFFF) { // is surrogate pair - return cp; - } - return -1; -} - -/** - * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * @param the source string - * @return the modified set, for chaining - */ -UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) { - UChar32 cp; - for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { - cp = s.char32At(i); - add(cp); - } - return *this; -} - -/** - * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * @param the source string - * @return the modified set, for chaining - */ -UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) { - UnicodeSet set; - set.addAll(s); - retainAll(set); - return *this; -} - -/** - * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * @param the source string - * @return the modified set, for chaining - */ -UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) { - UnicodeSet set; - set.addAll(s); - complementAll(set); - return *this; -} - -/** - * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * @param the source string - * @return the modified set, for chaining - */ -UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) { - UnicodeSet set; - set.addAll(s); - removeAll(set); - return *this; -} - -UnicodeSet& UnicodeSet::removeAllStrings() { - strings->removeAllElements(); - return *this; -} - - -/** - * Makes a set from a multicharacter string. Thus "ch" => {"ch"} - *
Warning: you cannot add an empty string ("") to a UnicodeSet. - * @param the source string - * @return a newly created set containing the given string - */ -UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) { - UnicodeSet *set = new UnicodeSet(); - if (set != NULL) { // Check for memory allocation error. - set->add(s); - } - return set; -} - - -/** - * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} - * @param the source string - * @return a newly created set containing the given characters - */ -UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) { - UnicodeSet *set = new UnicodeSet(); - if (set != NULL) { // Check for memory allocation error. - set->addAll(s); - } - return set; -} - -/** - * Retain only the elements in this set that are contained in the - * specified range. If end > start then an empty range is - * retained, leaving the set empty. - * - * @param start first character, inclusive, of range to be retained - * to this set. - * @param end last character, inclusive, of range to be retained - * to this set. - */ -UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) { - if (pinCodePoint(start) <= pinCodePoint(end)) { - UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; - retain(range, 2, 0); - } else { - clear(); - } - return *this; -} - -UnicodeSet& UnicodeSet::retain(UChar32 c) { - return retain(c, c); -} - -/** - * Removes the specified range from this set if it is present. - * The set will not contain the specified range once the call - * returns. If end > start then an empty range is - * removed, leaving the set unchanged. - * - * @param start first character, inclusive, of range to be removed - * from this set. - * @param end last character, inclusive, of range to be removed - * from this set. - */ -UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) { - if (pinCodePoint(start) <= pinCodePoint(end)) { - UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; - retain(range, 2, 2); - } - return *this; -} - -/** - * Removes the specified character from this set if it is present. - * The set will not contain the specified range once the call - * returns. - */ -UnicodeSet& UnicodeSet::remove(UChar32 c) { - return remove(c, c); -} - -/** - * Removes the specified string from this set if it is present. - * The set will not contain the specified character once the call - * returns. - * @param the source string - * @return the modified set, for chaining - */ -UnicodeSet& UnicodeSet::remove(const UnicodeString& s) { - if (s.length() == 0 || isFrozen() || isBogus()) return *this; - int32_t cp = getSingleCP(s); - if (cp < 0) { - strings->removeElement((void*) &s); - releasePattern(); - } else { - remove((UChar32)cp, (UChar32)cp); - } - return *this; -} - -/** - * Complements the specified range in this set. Any character in - * the range will be removed if it is in this set, or will be - * added if it is not in this set. If end > start - * then an empty range is xor'ed, leaving the set unchanged. - * - * @param start first character, inclusive, of range to be removed - * from this set. - * @param end last character, inclusive, of range to be removed - * from this set. - */ -UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) { - if (isFrozen() || isBogus()) { - return *this; - } - if (pinCodePoint(start) <= pinCodePoint(end)) { - UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; - exclusiveOr(range, 2, 0); - } - releasePattern(); - return *this; -} - -UnicodeSet& UnicodeSet::complement(UChar32 c) { - return complement(c, c); -} - -/** - * This is equivalent to - * complement(MIN_VALUE, MAX_VALUE). - */ -UnicodeSet& UnicodeSet::complement(void) { - if (isFrozen() || isBogus()) { - return *this; - } - UErrorCode status = U_ZERO_ERROR; - if (list[0] == UNICODESET_LOW) { - ensureBufferCapacity(len-1, status); - if (U_FAILURE(status)) { - return *this; - } - uprv_memcpy(buffer, list + 1, (size_t)(len-1)*sizeof(UChar32)); - --len; - } else { - ensureBufferCapacity(len+1, status); - if (U_FAILURE(status)) { - return *this; - } - uprv_memcpy(buffer + 1, list, (size_t)len*sizeof(UChar32)); - buffer[0] = UNICODESET_LOW; - ++len; - } - swapBuffers(); - releasePattern(); - return *this; -} - -/** - * Complement the specified string in this set. - * The set will not contain the specified string once the call - * returns. - *
Warning: you cannot add an empty string ("") to a UnicodeSet. - * @param s the string to complement - * @return this object, for chaining - */ -UnicodeSet& UnicodeSet::complement(const UnicodeString& s) { - if (s.length() == 0 || isFrozen() || isBogus()) return *this; - int32_t cp = getSingleCP(s); - if (cp < 0) { - if (strings->contains((void*) &s)) { - strings->removeElement((void*) &s); - } else { - _add(s); - } - releasePattern(); - } else { - complement((UChar32)cp, (UChar32)cp); - } - return *this; -} - -/** - * Adds all of the elements in the specified set to this set if - * they're not already present. This operation effectively - * modifies this set so that its value is the union of the two - * sets. The behavior of this operation is unspecified if the specified - * collection is modified while the operation is in progress. - * - * @param c set whose elements are to be added to this set. - * @see #add(char, char) - */ -UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) { - if ( c.len>0 && c.list!=NULL ) { - add(c.list, c.len, 0); - } - - // Add strings in order - if ( c.strings!=NULL ) { - for (int32_t i=0; isize(); ++i) { - const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i); - if (!strings->contains((void*) s)) { - _add(*s); - } - } - } - return *this; -} - -/** - * Retains only the elements in this set that are contained in the - * specified set. In other words, removes from this set all of - * its elements that are not contained in the specified set. This - * operation effectively modifies this set so that its value is - * the intersection of the two sets. - * - * @param c set that defines which elements this set will retain. - */ -UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) { - if (isFrozen() || isBogus()) { - return *this; - } - retain(c.list, c.len, 0); - strings->retainAll(*c.strings); - return *this; -} - -/** - * Removes from this set all of its elements that are contained in the - * specified set. This operation effectively modifies this - * set so that its value is the asymmetric set difference of - * the two sets. - * - * @param c set that defines which elements will be removed from - * this set. - */ -UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) { - if (isFrozen() || isBogus()) { - return *this; - } - retain(c.list, c.len, 2); - strings->removeAll(*c.strings); - return *this; -} - -/** - * Complements in this set all elements contained in the specified - * set. Any character in the other set will be removed if it is - * in this set, or will be added if it is not in this set. - * - * @param c set that defines which elements will be xor'ed from - * this set. - */ -UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) { - if (isFrozen() || isBogus()) { - return *this; - } - exclusiveOr(c.list, c.len, 0); - - for (int32_t i=0; isize(); ++i) { - void* e = c.strings->elementAt(i); - if (!strings->removeElement(e)) { - _add(*(const UnicodeString*)e); - } - } - return *this; -} - -/** - * Removes all of the elements from this set. This set will be - * empty after this call returns. - */ -UnicodeSet& UnicodeSet::clear(void) { - if (isFrozen()) { - return *this; - } - if (list != NULL) { - list[0] = UNICODESET_HIGH; - } - len = 1; - releasePattern(); - if (strings != NULL) { - strings->removeAllElements(); - } - if (list != NULL && strings != NULL) { - // Remove bogus - fFlags = 0; - } - return *this; -} - -/** - * Iteration method that returns the number of ranges contained in - * this set. - * @see #getRangeStart - * @see #getRangeEnd - */ -int32_t UnicodeSet::getRangeCount() const { - return len/2; -} - -/** - * Iteration method that returns the first character in the - * specified range of this set. - * @see #getRangeCount - * @see #getRangeEnd - */ -UChar32 UnicodeSet::getRangeStart(int32_t index) const { - return list[index*2]; -} - -/** - * Iteration method that returns the last character in the - * specified range of this set. - * @see #getRangeStart - * @see #getRangeEnd - */ -UChar32 UnicodeSet::getRangeEnd(int32_t index) const { - return list[index*2 + 1] - 1; -} - -int32_t UnicodeSet::getStringCount() const { - return strings->size(); -} - -const UnicodeString* UnicodeSet::getString(int32_t index) const { - return (const UnicodeString*) strings->elementAt(index); -} - -/** - * Reallocate this objects internal structures to take up the least - * possible space, without changing this object's value. - */ -UnicodeSet& UnicodeSet::compact() { - if (isFrozen() || isBogus()) { - return *this; - } - // Delete buffer first to defragment memory less. - if (buffer != NULL) { - uprv_free(buffer); - buffer = NULL; - } - if (len < capacity) { - // Make the capacity equal to len or 1. - // We don't want to realloc of 0 size. - int32_t newCapacity = len + (len == 0); - UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * newCapacity); - if (temp) { - list = temp; - capacity = newCapacity; - } - // else what the heck happened?! We allocated less memory! - // Oh well. We'll keep our original array. - } - return *this; -} - -#ifdef DEBUG_SERIALIZE -#include -#endif - -/** - * Deserialize constructor. - */ -UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization, UErrorCode &ec) - : len(1), capacity(1+START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) { - - if(U_FAILURE(ec)) { - setToBogus(); - return; - } - - if( (serialization != kSerialized) - || (data==NULL) - || (dataLen < 1)) { - ec = U_ILLEGAL_ARGUMENT_ERROR; - setToBogus(); - return; - } - - allocateStrings(ec); - if (U_FAILURE(ec)) { - setToBogus(); - return; - } - - // bmp? - int32_t headerSize = ((data[0]&0x8000)) ?2:1; - int32_t bmpLength = (headerSize==1)?data[0]:data[1]; - - len = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength; -#ifdef DEBUG_SERIALIZE - printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,len, data[0],data[1],data[2],data[3]); -#endif - capacity = len+1; - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - if(!list || U_FAILURE(ec)) { - setToBogus(); - return; - } - // copy bmp - int32_t i; - for(i = 0; i< bmpLength;i++) { - list[i] = data[i+headerSize]; -#ifdef DEBUG_SERIALIZE - printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]); -#endif - } - // copy smp - for(i=bmpLength;i0 && dest==NULL)) { - ec=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* count necessary 16-bit units */ - length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH - // assert(length>=0); - if (length==0) { - /* empty set */ - if (destCapacity>0) { - *dest=0; - } else { - ec=U_BUFFER_OVERFLOW_ERROR; - } - return 1; - } - /* now length>0 */ - - if (this->list[length-1]<=0xffff) { - /* all BMP */ - bmpLength=length; - } else if (this->list[0]>=0x10000) { - /* all supplementary */ - bmpLength=0; - length*=2; - } else { - /* some BMP, some supplementary */ - for (bmpLength=0; bmpLengthlist[bmpLength]<=0xffff; ++bmpLength) {} - length=bmpLength+2*(length-bmpLength); - } -#ifdef DEBUG_SERIALIZE - printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len); -#endif - /* length: number of 16-bit array units */ - if (length>0x7fff) { - /* there are only 15 bits for the length in the first serialized word */ - ec=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - /* - * total serialized length: - * number of 16-bit array units (length) + - * 1 length unit (always) + - * 1 bmpLength unit (if there are supplementary values) - */ - destLength=length+((length>bmpLength)?2:1); - if (destLength<=destCapacity) { - const UChar32 *p; - int32_t i; - -#ifdef DEBUG_SERIALIZE - printf("writeHdr\n"); -#endif - *dest=(uint16_t)length; - if (length>bmpLength) { - *dest|=0x8000; - *++dest=(uint16_t)bmpLength; - } - ++dest; - - /* write the BMP part of the array */ - p=this->list; - for (i=0; i>16); - *dest++=(uint16_t)*p++; - } - } else { - ec=U_BUFFER_OVERFLOW_ERROR; - } - return destLength; -} - -//---------------------------------------------------------------- -// Implementation: Utility methods -//---------------------------------------------------------------- - -/** - * Allocate our strings vector and return TRUE if successful. - */ -UBool UnicodeSet::allocateStrings(UErrorCode &status) { - if (U_FAILURE(status)) { - return FALSE; - } - strings = new UVector(uprv_deleteUObject, - uhash_compareUnicodeString, 1, status); - if (strings == NULL) { // Check for memory allocation error. - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - if (U_FAILURE(status)) { - delete strings; - strings = NULL; - return FALSE; - } - return TRUE; -} - -void UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) { - if (newLen <= capacity) { - return; - } - UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA)); - if (temp == NULL) { - ec = U_MEMORY_ALLOCATION_ERROR; - setToBogus(); // set the object to bogus state if an OOM failure occurred. - return; - } - list = temp; - capacity = newLen + GROW_EXTRA; - // else we keep the original contents on the memory failure. -} - -void UnicodeSet::ensureBufferCapacity(int32_t newLen, UErrorCode& ec) { - if (buffer != NULL && newLen <= bufferCapacity) - return; - UChar32* temp = (UChar32*) uprv_realloc(buffer, sizeof(UChar32) * (newLen + GROW_EXTRA)); - if (temp == NULL) { - ec = U_MEMORY_ALLOCATION_ERROR; - setToBogus(); - return; - } - buffer = temp; - bufferCapacity = newLen + GROW_EXTRA; - // else we keep the original contents on the memory failure. -} - -/** - * Swap list and buffer. - */ -void UnicodeSet::swapBuffers(void) { - // swap list and buffer - UChar32* temp = list; - list = buffer; - buffer = temp; - - int32_t c = capacity; - capacity = bufferCapacity; - bufferCapacity = c; -} - -void UnicodeSet::setToBogus() { - clear(); // Remove everything in the set. - fFlags = kIsBogus; -} - -//---------------------------------------------------------------- -// Implementation: Fundamental operators -//---------------------------------------------------------------- - -static inline UChar32 max(UChar32 a, UChar32 b) { - return (a > b) ? a : b; -} - -// polarity = 0, 3 is normal: x xor y -// polarity = 1, 2: x xor ~y == x === y - -void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) { - if (isFrozen() || isBogus()) { - return; - } - UErrorCode status = U_ZERO_ERROR; - ensureBufferCapacity(len + otherLen, status); - if (U_FAILURE(status)) { - return; - } - - int32_t i = 0, j = 0, k = 0; - UChar32 a = list[i++]; - UChar32 b; - if (polarity == 1 || polarity == 2) { - b = UNICODESET_LOW; - if (other[j] == UNICODESET_LOW) { // skip base if already LOW - ++j; - b = other[j]; - } - } else { - b = other[j++]; - } - // simplest of all the routines - // sort the values, discarding identicals! - for (;;) { - if (a < b) { - buffer[k++] = a; - a = list[i++]; - } else if (b < a) { - buffer[k++] = b; - b = other[j++]; - } else if (a != UNICODESET_HIGH) { // at this point, a == b - // discard both values! - a = list[i++]; - b = other[j++]; - } else { // DONE! - buffer[k++] = UNICODESET_HIGH; - len = k; - break; - } - } - swapBuffers(); - releasePattern(); -} - -// polarity = 0 is normal: x union y -// polarity = 2: x union ~y -// polarity = 1: ~x union y -// polarity = 3: ~x union ~y - -void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) { - if (isFrozen() || isBogus() || other==NULL) { - return; - } - UErrorCode status = U_ZERO_ERROR; - ensureBufferCapacity(len + otherLen, status); - if (U_FAILURE(status)) { - return; - } - - int32_t i = 0, j = 0, k = 0; - UChar32 a = list[i++]; - UChar32 b = other[j++]; - // change from xor is that we have to check overlapping pairs - // polarity bit 1 means a is second, bit 2 means b is. - for (;;) { - switch (polarity) { - case 0: // both first; take lower if unequal - if (a < b) { // take a - // Back up over overlapping ranges in buffer[] - if (k > 0 && a <= buffer[k-1]) { - // Pick latter end value in buffer[] vs. list[] - a = max(list[i], buffer[--k]); - } else { - // No overlap - buffer[k++] = a; - a = list[i]; - } - i++; // Common if/else code factored out - polarity ^= 1; - } else if (b < a) { // take b - if (k > 0 && b <= buffer[k-1]) { - b = max(other[j], buffer[--k]); - } else { - buffer[k++] = b; - b = other[j]; - } - j++; - polarity ^= 2; - } else { // a == b, take a, drop b - if (a == UNICODESET_HIGH) goto loop_end; - // This is symmetrical; it doesn't matter if - // we backtrack with a or b. - liu - if (k > 0 && a <= buffer[k-1]) { - a = max(list[i], buffer[--k]); - } else { - // No overlap - buffer[k++] = a; - a = list[i]; - } - i++; - polarity ^= 1; - b = other[j++]; - polarity ^= 2; - } - break; - case 3: // both second; take higher if unequal, and drop other - if (b <= a) { // take a - if (a == UNICODESET_HIGH) goto loop_end; - buffer[k++] = a; - } else { // take b - if (b == UNICODESET_HIGH) goto loop_end; - buffer[k++] = b; - } - a = list[i++]; - polarity ^= 1; // factored common code - b = other[j++]; - polarity ^= 2; - break; - case 1: // a second, b first; if b < a, overlap - if (a < b) { // no overlap, take a - buffer[k++] = a; a = list[i++]; polarity ^= 1; - } else if (b < a) { // OVERLAP, drop b - b = other[j++]; - polarity ^= 2; - } else { // a == b, drop both! - if (a == UNICODESET_HIGH) goto loop_end; - a = list[i++]; - polarity ^= 1; - b = other[j++]; - polarity ^= 2; - } - break; - case 2: // a first, b second; if a < b, overlap - if (b < a) { // no overlap, take b - buffer[k++] = b; - b = other[j++]; - polarity ^= 2; - } else if (a < b) { // OVERLAP, drop a - a = list[i++]; - polarity ^= 1; - } else { // a == b, drop both! - if (a == UNICODESET_HIGH) goto loop_end; - a = list[i++]; - polarity ^= 1; - b = other[j++]; - polarity ^= 2; - } - break; - } - } - loop_end: - buffer[k++] = UNICODESET_HIGH; // terminate - len = k; - swapBuffers(); - releasePattern(); -} - -// polarity = 0 is normal: x intersect y -// polarity = 2: x intersect ~y == set-minus -// polarity = 1: ~x intersect y -// polarity = 3: ~x intersect ~y - -void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) { - if (isFrozen() || isBogus()) { - return; - } - UErrorCode status = U_ZERO_ERROR; - ensureBufferCapacity(len + otherLen, status); - if (U_FAILURE(status)) { - return; - } - - int32_t i = 0, j = 0, k = 0; - UChar32 a = list[i++]; - UChar32 b = other[j++]; - // change from xor is that we have to check overlapping pairs - // polarity bit 1 means a is second, bit 2 means b is. - for (;;) { - switch (polarity) { - case 0: // both first; drop the smaller - if (a < b) { // drop a - a = list[i++]; - polarity ^= 1; - } else if (b < a) { // drop b - b = other[j++]; - polarity ^= 2; - } else { // a == b, take one, drop other - if (a == UNICODESET_HIGH) goto loop_end; - buffer[k++] = a; - a = list[i++]; - polarity ^= 1; - b = other[j++]; - polarity ^= 2; - } - break; - case 3: // both second; take lower if unequal - if (a < b) { // take a - buffer[k++] = a; - a = list[i++]; - polarity ^= 1; - } else if (b < a) { // take b - buffer[k++] = b; - b = other[j++]; - polarity ^= 2; - } else { // a == b, take one, drop other - if (a == UNICODESET_HIGH) goto loop_end; - buffer[k++] = a; - a = list[i++]; - polarity ^= 1; - b = other[j++]; - polarity ^= 2; - } - break; - case 1: // a second, b first; - if (a < b) { // NO OVERLAP, drop a - a = list[i++]; - polarity ^= 1; - } else if (b < a) { // OVERLAP, take b - buffer[k++] = b; - b = other[j++]; - polarity ^= 2; - } else { // a == b, drop both! - if (a == UNICODESET_HIGH) goto loop_end; - a = list[i++]; - polarity ^= 1; - b = other[j++]; - polarity ^= 2; - } - break; - case 2: // a first, b second; if a < b, overlap - if (b < a) { // no overlap, drop b - b = other[j++]; - polarity ^= 2; - } else if (a < b) { // OVERLAP, take a - buffer[k++] = a; - a = list[i++]; - polarity ^= 1; - } else { // a == b, drop both! - if (a == UNICODESET_HIGH) goto loop_end; - a = list[i++]; - polarity ^= 1; - b = other[j++]; - polarity ^= 2; - } - break; - } - } - loop_end: - buffer[k++] = UNICODESET_HIGH; // terminate - len = k; - swapBuffers(); - releasePattern(); -} - -/** - * Append the toPattern() representation of a - * string to the given StringBuffer. - */ -void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool -escapeUnprintable) { - UChar32 cp; - for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { - _appendToPat(buf, cp = s.char32At(i), escapeUnprintable); - } -} - -/** - * Append the toPattern() representation of a - * character to the given StringBuffer. - */ -void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool -escapeUnprintable) { - if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { - // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything - // unprintable - if (ICU_Utility::escapeUnprintable(buf, c)) { - return; - } - } - // Okay to let ':' pass through - switch (c) { - case SET_OPEN: - case SET_CLOSE: - case HYPHEN: - case COMPLEMENT: - case INTERSECTION: - case BACKSLASH: - case OPEN_BRACE: - case CLOSE_BRACE: - case COLON: - case SymbolTable::SYMBOL_REF: - buf.append(BACKSLASH); - break; - default: - // Escape whitespace - if (PatternProps::isWhiteSpace(c)) { - buf.append(BACKSLASH); - } - break; - } - buf.append(c); -} - -/** - * Append a string representation of this set to result. This will be - * a cleaned version of the string passed to applyPattern(), if there - * is one. Otherwise it will be generated. - */ -UnicodeString& UnicodeSet::_toPattern(UnicodeString& result, - UBool escapeUnprintable) const -{ - if (pat != NULL) { - int32_t i; - int32_t backslashCount = 0; - for (i=0; i 1 && - getRangeStart(0) == MIN_VALUE && - getRangeEnd(count-1) == MAX_VALUE) { - - // Emit the inverse - result.append(COMPLEMENT); - - for (int32_t i = 1; i < count; ++i) { - UChar32 start = getRangeEnd(i-1)+1; - UChar32 end = getRangeStart(i)-1; - _appendToPat(result, start, escapeUnprintable); - if (start != end) { - if ((start+1) != end) { - result.append(HYPHEN); - } - _appendToPat(result, end, escapeUnprintable); - } - } - } - - // Default; emit the ranges as pairs - else { - for (int32_t i = 0; i < count; ++i) { - UChar32 start = getRangeStart(i); - UChar32 end = getRangeEnd(i); - _appendToPat(result, start, escapeUnprintable); - if (start != end) { - if ((start+1) != end) { - result.append(HYPHEN); - } - _appendToPat(result, end, escapeUnprintable); - } - } - } - - for (int32_t i = 0; isize(); ++i) { - result.append(OPEN_BRACE); - _appendToPat(result, - *(const UnicodeString*) strings->elementAt(i), - escapeUnprintable); - result.append(CLOSE_BRACE); - } - return result.append(SET_CLOSE); -} - -/** -* Release existing cached pattern -*/ -void UnicodeSet::releasePattern() { - if (pat) { - uprv_free(pat); - pat = NULL; - patLen = 0; - } -} - -/** -* Set the new pattern to cache. -*/ -void UnicodeSet::setPattern(const UnicodeString& newPat) { - releasePattern(); - int32_t newPatLen = newPat.length(); - pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar)); - if (pat) { - patLen = newPatLen; - newPat.extractBetween(0, patLen, pat); - pat[patLen] = 0; - } - // else we don't care if malloc failed. This was just a nice cache. - // We can regenerate an equivalent pattern later when requested. -} - -UnicodeFunctor *UnicodeSet::freeze() { - if(!isFrozen() && !isBogus()) { - // Do most of what compact() does before freezing because - // compact() will not work when the set is frozen. - // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA). - - // Delete buffer first to defragment memory less. - if (buffer != NULL) { - uprv_free(buffer); - buffer = NULL; - } - if (capacity > (len + GROW_EXTRA)) { - // Make the capacity equal to len or 1. - // We don't want to realloc of 0 size. - capacity = len + (len == 0); - list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity); - if (list == NULL) { // Check for memory allocation error. - setToBogus(); - return this; - } - } - - // Optimize contains() and span() and similar functions. - if (!strings->isEmpty()) { - stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL); - if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) { - // All strings are irrelevant for span() etc. because - // all of each string's code points are contained in this set. - // Do not check needsStringSpanUTF8() because UTF-8 has at most as - // many relevant strings as UTF-16. - // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().) - delete stringSpan; - stringSpan = NULL; - } - } - if (stringSpan == NULL) { - // No span-relevant strings: Optimize for code point spans. - bmpSet=new BMPSet(list, len); - if (bmpSet == NULL) { // Check for memory allocation error. - setToBogus(); - } - } - } - return this; -} - -int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { - if(length>0 && bmpSet!=NULL) { - return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s); - } - if(length<0) { - length=u_strlen(s); - } - if(length==0) { - return 0; - } - if(stringSpan!=NULL) { - return stringSpan->span(s, length, spanCondition); - } else if(!strings->isEmpty()) { - uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? - UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED : - UnicodeSetStringSpan::FWD_UTF16_CONTAINED; - UnicodeSetStringSpan strSpan(*this, *strings, which); - if(strSpan.needsStringSpanUTF16()) { - return strSpan.span(s, length, spanCondition); - } - } - - if(spanCondition!=USET_SPAN_NOT_CONTAINED) { - spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. - } - - UChar32 c; - int32_t start=0, prev=0; - do { - U16_NEXT(s, start, length, c); - if(spanCondition!=contains(c)) { - break; - } - } while((prev=start)0 && bmpSet!=NULL) { - return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s); - } - if(length<0) { - length=u_strlen(s); - } - if(length==0) { - return 0; - } - if(stringSpan!=NULL) { - return stringSpan->spanBack(s, length, spanCondition); - } else if(!strings->isEmpty()) { - uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? - UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED : - UnicodeSetStringSpan::BACK_UTF16_CONTAINED; - UnicodeSetStringSpan strSpan(*this, *strings, which); - if(strSpan.needsStringSpanUTF16()) { - return strSpan.spanBack(s, length, spanCondition); - } - } - - if(spanCondition!=USET_SPAN_NOT_CONTAINED) { - spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. - } - - UChar32 c; - int32_t prev=length; - do { - U16_PREV(s, 0, length, c); - if(spanCondition!=contains(c)) { - break; - } - } while((prev=length)>0); - return prev; -} - -int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { - if(length>0 && bmpSet!=NULL) { - const uint8_t *s0=(const uint8_t *)s; - return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0); - } - if(length<0) { - length=(int32_t)uprv_strlen(s); - } - if(length==0) { - return 0; - } - if(stringSpan!=NULL) { - return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition); - } else if(!strings->isEmpty()) { - uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? - UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED : - UnicodeSetStringSpan::FWD_UTF8_CONTAINED; - UnicodeSetStringSpan strSpan(*this, *strings, which); - if(strSpan.needsStringSpanUTF8()) { - return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition); - } - } - - if(spanCondition!=USET_SPAN_NOT_CONTAINED) { - spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. - } - - UChar32 c; - int32_t start=0, prev=0; - do { - U8_NEXT_OR_FFFD(s, start, length, c); - if(spanCondition!=contains(c)) { - break; - } - } while((prev=start)0 && bmpSet!=NULL) { - const uint8_t *s0=(const uint8_t *)s; - return bmpSet->spanBackUTF8(s0, length, spanCondition); - } - if(length<0) { - length=(int32_t)uprv_strlen(s); - } - if(length==0) { - return 0; - } - if(stringSpan!=NULL) { - return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition); - } else if(!strings->isEmpty()) { - uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? - UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED : - UnicodeSetStringSpan::BACK_UTF8_CONTAINED; - UnicodeSetStringSpan strSpan(*this, *strings, which); - if(strSpan.needsStringSpanUTF8()) { - return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition); - } - } - - if(spanCondition!=USET_SPAN_NOT_CONTAINED) { - spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. - } - - UChar32 c; - int32_t prev=length; - do { - U8_PREV_OR_FFFD(s, 0, length, c); - if(spanCondition!=contains(c)) { - break; - } - } while((prev=length)>0); - return prev; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/uniset_closure.cpp b/deps/node/deps/icu-small/source/common/uniset_closure.cpp deleted file mode 100644 index 97c7bc9d..00000000 --- a/deps/node/deps/icu-small/source/common/uniset_closure.cpp +++ /dev/null @@ -1,280 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uniset_closure.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011may30 -* created by: Markus W. Scherer -* -* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp -* to simplify dependencies. -* In particular, this depends on the BreakIterator, but the BreakIterator -* code also builds UnicodeSets from patterns and needs uniset_props. -*/ - -#include "unicode/brkiter.h" -#include "unicode/locid.h" -#include "unicode/parsepos.h" -#include "unicode/uniset.h" -#include "cmemory.h" -#include "ruleiter.h" -#include "ucase.h" -#include "util.h" -#include "uvector.h" - -// initial storage. Must be >= 0 -// *** same as in uniset.cpp ! *** -#define START_EXTRA 16 - -U_NAMESPACE_BEGIN - -// TODO memory debugging provided inside uniset.cpp -// could be made available here but probably obsolete with use of modern -// memory leak checker tools -#define _dbgct(me) - -//---------------------------------------------------------------- -// Constructors &c -//---------------------------------------------------------------- - -UnicodeSet::UnicodeSet(const UnicodeString& pattern, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) : - len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - if(U_SUCCESS(status)){ - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - /* test for NULL */ - if(list == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - }else{ - allocateStrings(status); - applyPattern(pattern, options, symbols, status); - } - } - _dbgct(this); -} - -UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) : - len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - if(U_SUCCESS(status)){ - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - /* test for NULL */ - if(list == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - }else{ - allocateStrings(status); - applyPattern(pattern, pos, options, symbols, status); - } - } - _dbgct(this); -} - -//---------------------------------------------------------------- -// Public API -//---------------------------------------------------------------- - -UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) { - ParsePosition pos(0); - applyPattern(pattern, pos, options, symbols, status); - if (U_FAILURE(status)) return *this; - - int32_t i = pos.getIndex(); - - if (options & USET_IGNORE_SPACE) { - // Skip over trailing whitespace - ICU_Utility::skipWhitespace(pattern, i, TRUE); - } - - if (i != pattern.length()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } - return *this; -} - -UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, - ParsePosition& pos, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) { - if (U_FAILURE(status)) { - return *this; - } - if (isFrozen()) { - status = U_NO_WRITE_PERMISSION; - return *this; - } - // Need to build the pattern in a temporary string because - // _applyPattern calls add() etc., which set pat to empty. - UnicodeString rebuiltPat; - RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status); - if (U_FAILURE(status)) return *this; - if (chars.inVariable()) { - // syntaxError(chars, "Extra chars in variable value"); - status = U_MALFORMED_SET; - return *this; - } - setPattern(rebuiltPat); - return *this; -} - -// USetAdder implementation -// Does not use uset.h to reduce code dependencies -static void U_CALLCONV -_set_add(USet *set, UChar32 c) { - ((UnicodeSet *)set)->add(c); -} - -static void U_CALLCONV -_set_addRange(USet *set, UChar32 start, UChar32 end) { - ((UnicodeSet *)set)->add(start, end); -} - -static void U_CALLCONV -_set_addString(USet *set, const UChar *str, int32_t length) { - ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); -} - -//---------------------------------------------------------------- -// Case folding API -//---------------------------------------------------------------- - -// add the result of a full case mapping to the set -// use str as a temporary string to avoid constructing one -static inline void -addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) { - if(result >= 0) { - if(result > UCASE_MAX_STRING_LENGTH) { - // add a single-code point case mapping - set.add(result); - } else { - // add a string case mapping from full with length result - str.setTo((UBool)FALSE, full, result); - set.add(str); - } - } - // result < 0: the code point mapped to itself, no need to add it - // see ucase.h -} - -UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { - if (isFrozen() || isBogus()) { - return *this; - } - if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { - { - UnicodeSet foldSet(*this); - UnicodeString str; - USetAdder sa = { - foldSet.toUSet(), - _set_add, - _set_addRange, - _set_addString, - NULL, // don't need remove() - NULL // don't need removeRange() - }; - - // start with input set to guarantee inclusion - // USET_CASE: remove strings because the strings will actually be reduced (folded); - // therefore, start with no strings and add only those needed - if (attribute & USET_CASE_INSENSITIVE) { - foldSet.strings->removeAllElements(); - } - - int32_t n = getRangeCount(); - UChar32 result; - const UChar *full; - - for (int32_t i=0; isize() > 0) { - if (attribute & USET_CASE_INSENSITIVE) { - for (int32_t j=0; jsize(); ++j) { - str = *(const UnicodeString *) strings->elementAt(j); - str.foldCase(); - if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) { - foldSet.add(str); // does not map to code points: add the folded string itself - } - } - } else { - Locale root(""); -#if !UCONFIG_NO_BREAK_ITERATION - UErrorCode status = U_ZERO_ERROR; - BreakIterator *bi = BreakIterator::createWordInstance(root, status); - if (U_SUCCESS(status)) { -#endif - const UnicodeString *pStr; - - for (int32_t j=0; jsize(); ++j) { - pStr = (const UnicodeString *) strings->elementAt(j); - (str = *pStr).toLower(root); - foldSet.add(str); -#if !UCONFIG_NO_BREAK_ITERATION - (str = *pStr).toTitle(bi, root); - foldSet.add(str); -#endif - (str = *pStr).toUpper(root); - foldSet.add(str); - (str = *pStr).foldCase(); - foldSet.add(str); - } -#if !UCONFIG_NO_BREAK_ITERATION - } - delete bi; -#endif - } - } - *this = foldSet; - } - } - return *this; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/uniset_props.cpp b/deps/node/deps/icu-small/source/common/uniset_props.cpp deleted file mode 100644 index 1312de20..00000000 --- a/deps/node/deps/icu-small/source/common/uniset_props.cpp +++ /dev/null @@ -1,1214 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uniset_props.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004aug25 -* created by: Markus W. Scherer -* -* Character property dependent functions moved here from uniset.cpp -*/ - -#include "unicode/utypes.h" -#include "unicode/uniset.h" -#include "unicode/parsepos.h" -#include "unicode/uchar.h" -#include "unicode/uscript.h" -#include "unicode/symtable.h" -#include "unicode/uset.h" -#include "unicode/locid.h" -#include "unicode/brkiter.h" -#include "uset_imp.h" -#include "ruleiter.h" -#include "cmemory.h" -#include "ucln_cmn.h" -#include "util.h" -#include "uvector.h" -#include "uprops.h" -#include "propname.h" -#include "normalizer2impl.h" -#include "uinvchar.h" -#include "uprops.h" -#include "charstr.h" -#include "cstring.h" -#include "mutex.h" -#include "umutex.h" -#include "uassert.h" -#include "hash.h" - -U_NAMESPACE_USE - -// initial storage. Must be >= 0 -// *** same as in uniset.cpp ! *** -#define START_EXTRA 16 - -// Define UChar constants using hex for EBCDIC compatibility -// Used #define to reduce private static exports and memory access time. -#define SET_OPEN ((UChar)0x005B) /*[*/ -#define SET_CLOSE ((UChar)0x005D) /*]*/ -#define HYPHEN ((UChar)0x002D) /*-*/ -#define COMPLEMENT ((UChar)0x005E) /*^*/ -#define COLON ((UChar)0x003A) /*:*/ -#define BACKSLASH ((UChar)0x005C) /*\*/ -#define INTERSECTION ((UChar)0x0026) /*&*/ -#define UPPER_U ((UChar)0x0055) /*U*/ -#define LOWER_U ((UChar)0x0075) /*u*/ -#define OPEN_BRACE ((UChar)123) /*{*/ -#define CLOSE_BRACE ((UChar)125) /*}*/ -#define UPPER_P ((UChar)0x0050) /*P*/ -#define LOWER_P ((UChar)0x0070) /*p*/ -#define UPPER_N ((UChar)78) /*N*/ -#define EQUALS ((UChar)0x003D) /*=*/ - -//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:" -static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]" -//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p" -//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}" -//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N" -static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/ - -// Special property set IDs -static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF] -static const char ASCII[] = "ASCII"; // [\u0000-\u007F] -static const char ASSIGNED[] = "Assigned"; // [:^Cn:] - -// Unicode name property alias -#define NAME_PROP "na" -#define NAME_PROP_LENGTH 2 - -/** - * Delimiter string used in patterns to close a category reference: - * ":]". Example: "[:Lu:]". - */ -//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */ - -// Cached sets ------------------------------------------------------------- *** - -U_CDECL_BEGIN -static UBool U_CALLCONV uset_cleanup(); - -static UnicodeSet *uni32Singleton; -static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER; - -/** - * Cleanup function for UnicodeSet - */ -static UBool U_CALLCONV uset_cleanup(void) { - delete uni32Singleton; - uni32Singleton = NULL; - uni32InitOnce.reset(); - return TRUE; -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -namespace { - -// Cache some sets for other services -------------------------------------- *** -void U_CALLCONV createUni32Set(UErrorCode &errorCode) { - U_ASSERT(uni32Singleton == NULL); - uni32Singleton = new UnicodeSet(UNICODE_STRING_SIMPLE("[:age=3.2:]"), errorCode); - if(uni32Singleton==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } else { - uni32Singleton->freeze(); - } - ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); -} - - -U_CFUNC UnicodeSet * -uniset_getUnicode32Instance(UErrorCode &errorCode) { - umtx_initOnce(uni32InitOnce, &createUni32Set, errorCode); - return uni32Singleton; -} - -// helper functions for matching of pattern syntax pieces ------------------ *** -// these functions are parallel to the PERL_OPEN etc. strings above - -// using these functions is not only faster than UnicodeString::compare() and -// caseCompare(), but they also make UnicodeSet work for simple patterns when -// no Unicode properties data is available - when caseCompare() fails - -static inline UBool -isPerlOpen(const UnicodeString &pattern, int32_t pos) { - UChar c; - return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P); -} - -/*static inline UBool -isPerlClose(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==CLOSE_BRACE; -}*/ - -static inline UBool -isNameOpen(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N; -} - -static inline UBool -isPOSIXOpen(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON; -} - -/*static inline UBool -isPOSIXClose(const UnicodeString &pattern, int32_t pos) { - return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE; -}*/ - -// TODO memory debugging provided inside uniset.cpp -// could be made available here but probably obsolete with use of modern -// memory leak checker tools -#define _dbgct(me) - -} // namespace - -//---------------------------------------------------------------- -// Constructors &c -//---------------------------------------------------------------- - -/** - * Constructs a set from the given pattern, optionally ignoring - * white space. See the class description for the syntax of the - * pattern language. - * @param pattern a string specifying what characters are in the set - */ -UnicodeSet::UnicodeSet(const UnicodeString& pattern, - UErrorCode& status) : - len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - if(U_SUCCESS(status)){ - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - /* test for NULL */ - if(list == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - }else{ - allocateStrings(status); - applyPattern(pattern, status); - } - } - _dbgct(this); -} - -//---------------------------------------------------------------- -// Public API -//---------------------------------------------------------------- - -UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, - UErrorCode& status) { - // Equivalent to - // return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status); - // but without dependency on closeOver(). - ParsePosition pos(0); - applyPatternIgnoreSpace(pattern, pos, NULL, status); - if (U_FAILURE(status)) return *this; - - int32_t i = pos.getIndex(); - // Skip over trailing whitespace - ICU_Utility::skipWhitespace(pattern, i, TRUE); - if (i != pattern.length()) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } - return *this; -} - -void -UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, - ParsePosition& pos, - const SymbolTable* symbols, - UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - if (isFrozen()) { - status = U_NO_WRITE_PERMISSION; - return; - } - // Need to build the pattern in a temporary string because - // _applyPattern calls add() etc., which set pat to empty. - UnicodeString rebuiltPat; - RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status); - if (U_FAILURE(status)) return; - if (chars.inVariable()) { - // syntaxError(chars, "Extra chars in variable value"); - status = U_MALFORMED_SET; - return; - } - setPattern(rebuiltPat); -} - -/** - * Return true if the given position, in the given pattern, appears - * to be the start of a UnicodeSet pattern. - */ -UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { - return ((pos+1) < pattern.length() && - pattern.charAt(pos) == (UChar)91/*[*/) || - resemblesPropertyPattern(pattern, pos); -} - -//---------------------------------------------------------------- -// Implementation: Pattern parsing -//---------------------------------------------------------------- - -namespace { - -/** - * A small all-inline class to manage a UnicodeSet pointer. Add - * operator->() etc. as needed. - */ -class UnicodeSetPointer { - UnicodeSet* p; -public: - inline UnicodeSetPointer() : p(0) {} - inline ~UnicodeSetPointer() { delete p; } - inline UnicodeSet* pointer() { return p; } - inline UBool allocate() { - if (p == 0) { - p = new UnicodeSet(); - } - return p != 0; - } -}; - -constexpr int32_t MAX_DEPTH = 100; - -} // namespace - -/** - * Parse the pattern from the given RuleCharacterIterator. The - * iterator is advanced over the parsed pattern. - * @param chars iterator over the pattern characters. Upon return - * it will be advanced to the first character after the parsed - * pattern, or the end of the iteration if all characters are - * parsed. - * @param symbols symbol table to use to parse and dereference - * variables, or null if none. - * @param rebuiltPat the pattern that was parsed, rebuilt or - * copied from the input pattern, as appropriate. - * @param options a bit mask of zero or more of the following: - * IGNORE_SPACE, CASE. - */ -void UnicodeSet::applyPattern(RuleCharacterIterator& chars, - const SymbolTable* symbols, - UnicodeString& rebuiltPat, - uint32_t options, - UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), - int32_t depth, - UErrorCode& ec) { - if (U_FAILURE(ec)) return; - if (depth > MAX_DEPTH) { - ec = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - // Syntax characters: [ ] ^ - & { } - - // Recognized special forms for chars, sets: c-c s-s s&s - - int32_t opts = RuleCharacterIterator::PARSE_VARIABLES | - RuleCharacterIterator::PARSE_ESCAPES; - if ((options & USET_IGNORE_SPACE) != 0) { - opts |= RuleCharacterIterator::SKIP_WHITESPACE; - } - - UnicodeString patLocal, buf; - UBool usePat = FALSE; - UnicodeSetPointer scratch; - RuleCharacterIterator::Pos backup; - - // mode: 0=before [, 1=between [...], 2=after ] - // lastItem: 0=none, 1=char, 2=set - int8_t lastItem = 0, mode = 0; - UChar32 lastChar = 0; - UChar op = 0; - - UBool invert = FALSE; - - clear(); - - while (mode != 2 && !chars.atEnd()) { - U_ASSERT((lastItem == 0 && op == 0) || - (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) || - (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ || - op == INTERSECTION /*'&'*/))); - - UChar32 c = 0; - UBool literal = FALSE; - UnicodeSet* nested = 0; // alias - do not delete - - // -------- Check for property pattern - - // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed - int8_t setMode = 0; - if (resemblesPropertyPattern(chars, opts)) { - setMode = 2; - } - - // -------- Parse '[' of opening delimiter OR nested set. - // If there is a nested set, use `setMode' to define how - // the set should be parsed. If the '[' is part of the - // opening delimiter for this pattern, parse special - // strings "[", "[^", "[-", and "[^-". Check for stand-in - // characters representing a nested set in the symbol - // table. - - else { - // Prepare to backup if necessary - chars.getPos(backup); - c = chars.next(opts, literal, ec); - if (U_FAILURE(ec)) return; - - if (c == 0x5B /*'['*/ && !literal) { - if (mode == 1) { - chars.setPos(backup); // backup - setMode = 1; - } else { - // Handle opening '[' delimiter - mode = 1; - patLocal.append((UChar) 0x5B /*'['*/); - chars.getPos(backup); // prepare to backup - c = chars.next(opts, literal, ec); - if (U_FAILURE(ec)) return; - if (c == 0x5E /*'^'*/ && !literal) { - invert = TRUE; - patLocal.append((UChar) 0x5E /*'^'*/); - chars.getPos(backup); // prepare to backup - c = chars.next(opts, literal, ec); - if (U_FAILURE(ec)) return; - } - // Fall through to handle special leading '-'; - // otherwise restart loop for nested [], \p{}, etc. - if (c == HYPHEN /*'-'*/) { - literal = TRUE; - // Fall through to handle literal '-' below - } else { - chars.setPos(backup); // backup - continue; - } - } - } else if (symbols != 0) { - const UnicodeFunctor *m = symbols->lookupMatcher(c); - if (m != 0) { - const UnicodeSet *ms = dynamic_cast(m); - if (ms == NULL) { - ec = U_MALFORMED_SET; - return; - } - // casting away const, but `nested' won't be modified - // (important not to modify stored set) - nested = const_cast(ms); - setMode = 3; - } - } - } - - // -------- Handle a nested set. This either is inline in - // the pattern or represented by a stand-in that has - // previously been parsed and was looked up in the symbol - // table. - - if (setMode != 0) { - if (lastItem == 1) { - if (op != 0) { - // syntaxError(chars, "Char expected after operator"); - ec = U_MALFORMED_SET; - return; - } - add(lastChar, lastChar); - _appendToPat(patLocal, lastChar, FALSE); - lastItem = 0; - op = 0; - } - - if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) { - patLocal.append(op); - } - - if (nested == 0) { - // lazy allocation - if (!scratch.allocate()) { - ec = U_MEMORY_ALLOCATION_ERROR; - return; - } - nested = scratch.pointer(); - } - switch (setMode) { - case 1: - nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec); - break; - case 2: - chars.skipIgnored(opts); - nested->applyPropertyPattern(chars, patLocal, ec); - if (U_FAILURE(ec)) return; - break; - case 3: // `nested' already parsed - nested->_toPattern(patLocal, FALSE); - break; - } - - usePat = TRUE; - - if (mode == 0) { - // Entire pattern is a category; leave parse loop - *this = *nested; - mode = 2; - break; - } - - switch (op) { - case HYPHEN: /*'-'*/ - removeAll(*nested); - break; - case INTERSECTION: /*'&'*/ - retainAll(*nested); - break; - case 0: - addAll(*nested); - break; - } - - op = 0; - lastItem = 2; - - continue; - } - - if (mode == 0) { - // syntaxError(chars, "Missing '['"); - ec = U_MALFORMED_SET; - return; - } - - // -------- Parse special (syntax) characters. If the - // current character is not special, or if it is escaped, - // then fall through and handle it below. - - if (!literal) { - switch (c) { - case 0x5D /*']'*/: - if (lastItem == 1) { - add(lastChar, lastChar); - _appendToPat(patLocal, lastChar, FALSE); - } - // Treat final trailing '-' as a literal - if (op == HYPHEN /*'-'*/) { - add(op, op); - patLocal.append(op); - } else if (op == INTERSECTION /*'&'*/) { - // syntaxError(chars, "Trailing '&'"); - ec = U_MALFORMED_SET; - return; - } - patLocal.append((UChar) 0x5D /*']'*/); - mode = 2; - continue; - case HYPHEN /*'-'*/: - if (op == 0) { - if (lastItem != 0) { - op = (UChar) c; - continue; - } else { - // Treat final trailing '-' as a literal - add(c, c); - c = chars.next(opts, literal, ec); - if (U_FAILURE(ec)) return; - if (c == 0x5D /*']'*/ && !literal) { - patLocal.append(HYPHEN_RIGHT_BRACE, 2); - mode = 2; - continue; - } - } - } - // syntaxError(chars, "'-' not after char or set"); - ec = U_MALFORMED_SET; - return; - case INTERSECTION /*'&'*/: - if (lastItem == 2 && op == 0) { - op = (UChar) c; - continue; - } - // syntaxError(chars, "'&' not after set"); - ec = U_MALFORMED_SET; - return; - case 0x5E /*'^'*/: - // syntaxError(chars, "'^' not after '['"); - ec = U_MALFORMED_SET; - return; - case 0x7B /*'{'*/: - if (op != 0) { - // syntaxError(chars, "Missing operand after operator"); - ec = U_MALFORMED_SET; - return; - } - if (lastItem == 1) { - add(lastChar, lastChar); - _appendToPat(patLocal, lastChar, FALSE); - } - lastItem = 0; - buf.truncate(0); - { - UBool ok = FALSE; - while (!chars.atEnd()) { - c = chars.next(opts, literal, ec); - if (U_FAILURE(ec)) return; - if (c == 0x7D /*'}'*/ && !literal) { - ok = TRUE; - break; - } - buf.append(c); - } - if (buf.length() < 1 || !ok) { - // syntaxError(chars, "Invalid multicharacter string"); - ec = U_MALFORMED_SET; - return; - } - } - // We have new string. Add it to set and continue; - // we don't need to drop through to the further - // processing - add(buf); - patLocal.append((UChar) 0x7B /*'{'*/); - _appendToPat(patLocal, buf, FALSE); - patLocal.append((UChar) 0x7D /*'}'*/); - continue; - case SymbolTable::SYMBOL_REF: - // symbols nosymbols - // [a-$] error error (ambiguous) - // [a$] anchor anchor - // [a-$x] var "x"* literal '$' - // [a-$.] error literal '$' - // *We won't get here in the case of var "x" - { - chars.getPos(backup); - c = chars.next(opts, literal, ec); - if (U_FAILURE(ec)) return; - UBool anchor = (c == 0x5D /*']'*/ && !literal); - if (symbols == 0 && !anchor) { - c = SymbolTable::SYMBOL_REF; - chars.setPos(backup); - break; // literal '$' - } - if (anchor && op == 0) { - if (lastItem == 1) { - add(lastChar, lastChar); - _appendToPat(patLocal, lastChar, FALSE); - } - add(U_ETHER); - usePat = TRUE; - patLocal.append((UChar) SymbolTable::SYMBOL_REF); - patLocal.append((UChar) 0x5D /*']'*/); - mode = 2; - continue; - } - // syntaxError(chars, "Unquoted '$'"); - ec = U_MALFORMED_SET; - return; - } - default: - break; - } - } - - // -------- Parse literal characters. This includes both - // escaped chars ("\u4E01") and non-syntax characters - // ("a"). - - switch (lastItem) { - case 0: - lastItem = 1; - lastChar = c; - break; - case 1: - if (op == HYPHEN /*'-'*/) { - if (lastChar >= c) { - // Don't allow redundant (a-a) or empty (b-a) ranges; - // these are most likely typos. - // syntaxError(chars, "Invalid range"); - ec = U_MALFORMED_SET; - return; - } - add(lastChar, c); - _appendToPat(patLocal, lastChar, FALSE); - patLocal.append(op); - _appendToPat(patLocal, c, FALSE); - lastItem = 0; - op = 0; - } else { - add(lastChar, lastChar); - _appendToPat(patLocal, lastChar, FALSE); - lastChar = c; - } - break; - case 2: - if (op != 0) { - // syntaxError(chars, "Set expected after operator"); - ec = U_MALFORMED_SET; - return; - } - lastChar = c; - lastItem = 1; - break; - } - } - - if (mode != 2) { - // syntaxError(chars, "Missing ']'"); - ec = U_MALFORMED_SET; - return; - } - - chars.skipIgnored(opts); - - /** - * Handle global flags (invert, case insensitivity). If this - * pattern should be compiled case-insensitive, then we need - * to close over case BEFORE COMPLEMENTING. This makes - * patterns like /[^abc]/i work. - */ - if ((options & USET_CASE_INSENSITIVE) != 0) { - (this->*caseClosure)(USET_CASE_INSENSITIVE); - } - else if ((options & USET_ADD_CASE_MAPPINGS) != 0) { - (this->*caseClosure)(USET_ADD_CASE_MAPPINGS); - } - if (invert) { - complement(); - } - - // Use the rebuilt pattern (patLocal) only if necessary. Prefer the - // generated pattern. - if (usePat) { - rebuiltPat.append(patLocal); - } else { - _generatePattern(rebuiltPat, FALSE); - } - if (isBogus() && U_SUCCESS(ec)) { - // We likely ran out of memory. AHHH! - ec = U_MEMORY_ALLOCATION_ERROR; - } -} - -//---------------------------------------------------------------- -// Property set implementation -//---------------------------------------------------------------- - -namespace { - -static UBool numericValueFilter(UChar32 ch, void* context) { - return u_getNumericValue(ch) == *(double*)context; -} - -static UBool versionFilter(UChar32 ch, void* context) { - static const UVersionInfo none = { 0, 0, 0, 0 }; - UVersionInfo v; - u_charAge(ch, v); - UVersionInfo* version = (UVersionInfo*)context; - return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0; -} - -static UBool scriptExtensionsFilter(UChar32 ch, void* context) { - return uscript_hasScript(ch, *(UScriptCode*)context); -} - -} // namespace - -/** - * Generic filter-based scanning code for UCD property UnicodeSets. - */ -void UnicodeSet::applyFilter(UnicodeSet::Filter filter, - void* context, - const UnicodeSet* inclusions, - UErrorCode &status) { - if (U_FAILURE(status)) return; - - // Logically, walk through all Unicode characters, noting the start - // and end of each range for which filter.contain(c) is - // true. Add each range to a set. - // - // To improve performance, use an inclusions set which - // encodes information about character ranges that are known - // to have identical properties. - // inclusions contains the first characters of - // same-value ranges for the given property. - - clear(); - - UChar32 startHasProperty = -1; - int32_t limitRange = inclusions->getRangeCount(); - - for (int j=0; jgetRangeStart(j); - UChar32 end = inclusions->getRangeEnd(j); - - // for all the code points in the range, process - for (UChar32 ch = start; ch <= end; ++ch) { - // only add to this UnicodeSet on inflection points -- - // where the hasProperty value changes to false - if ((*filter)(ch, context)) { - if (startHasProperty < 0) { - startHasProperty = ch; - } - } else if (startHasProperty >= 0) { - add(startHasProperty, ch-1); - startHasProperty = -1; - } - } - } - if (startHasProperty >= 0) { - add((UChar32)startHasProperty, (UChar32)0x10FFFF); - } - if (isBogus() && U_SUCCESS(status)) { - // We likely ran out of memory. AHHH! - status = U_MEMORY_ALLOCATION_ERROR; - } -} - -namespace { - -/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */ -uint32_t U_CALLCONV generalCategoryMaskFilter(const void *context, uint32_t value) { - uint32_t mask = *(const uint32_t *)context; - value = U_MASK(value) & mask; - if (value != 0) { value = 1; } - return value; -} - -/** Maps one map value to 1, all others to 0. */ -uint32_t U_CALLCONV intValueFilter(const void *context, uint32_t value) { - uint32_t v = *(const uint32_t *)context; - return value == v ? 1 : 0; -} - -} // namespace - -void UnicodeSet::applyIntPropertyValue(const UCPMap *map, - UCPMapValueFilter *filter, const void *context, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return; } - clear(); - UChar32 start = 0, end; - uint32_t value; - while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, - filter, context, &value)) >= 0) { - if (value != 0) { - add(start, end); - } - start = end + 1; - } - if (isBogus()) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } -} - -namespace { - -static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { - /* Note: we use ' ' in compiler code page */ - int32_t j = 0; - char ch; - --dstCapacity; /* make room for term. zero */ - while ((ch = *src++) != 0) { - if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) { - continue; - } - if (j >= dstCapacity) return FALSE; - dst[j++] = ch; - } - if (j > 0 && dst[j-1] == ' ') --j; - dst[j] = 0; - return TRUE; -} - -} // namespace - -//---------------------------------------------------------------- -// Property set API -//---------------------------------------------------------------- - -#define FAIL(ec) {ec=U_ILLEGAL_ARGUMENT_ERROR; return *this;} - -UnicodeSet& -UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { - if (U_FAILURE(ec)) { return *this; } - // All of the following check isFrozen() before modifying this set. - if (prop == UCHAR_GENERAL_CATEGORY_MASK) { - const UCPMap *map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &ec); - applyIntPropertyValue(map, generalCategoryMaskFilter, &value, ec); - } else if (prop == UCHAR_SCRIPT_EXTENSIONS) { - const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); - UScriptCode script = (UScriptCode)value; - applyFilter(scriptExtensionsFilter, &script, inclusions, ec); - } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) { - if (value == 0 || value == 1) { - const USet *set = u_getBinaryPropertySet(prop, &ec); - if (U_FAILURE(ec)) { return *this; } - copyFrom(*UnicodeSet::fromUSet(set), TRUE); - if (value == 0) { - complement(); - } - } else { - clear(); - } - } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { - const UCPMap *map = u_getIntPropertyMap(prop, &ec); - applyIntPropertyValue(map, intValueFilter, &value, ec); - } else { - // This code used to always call getInclusions(property source) - // which sets an error for an unsupported property. - ec = U_ILLEGAL_ARGUMENT_ERROR; - // Otherwise we would just clear() this set because - // getIntPropertyValue(c, prop) returns 0 for all code points. - } - return *this; -} - -UnicodeSet& -UnicodeSet::applyPropertyAlias(const UnicodeString& prop, - const UnicodeString& value, - UErrorCode& ec) { - if (U_FAILURE(ec) || isFrozen()) return *this; - - // prop and value used to be converted to char * using the default - // converter instead of the invariant conversion. - // This should not be necessary because all Unicode property and value - // names use only invariant characters. - // If there are any variant characters, then we won't find them anyway. - // Checking first avoids assertion failures in the conversion. - if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) || - !uprv_isInvariantUString(value.getBuffer(), value.length()) - ) { - FAIL(ec); - } - CharString pname, vname; - pname.appendInvariantChars(prop, ec); - vname.appendInvariantChars(value, ec); - if (U_FAILURE(ec)) return *this; - - UProperty p; - int32_t v; - UBool invert = FALSE; - - if (value.length() > 0) { - p = u_getPropertyEnum(pname.data()); - if (p == UCHAR_INVALID_CODE) FAIL(ec); - - // Treat gc as gcm - if (p == UCHAR_GENERAL_CATEGORY) { - p = UCHAR_GENERAL_CATEGORY_MASK; - } - - if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) || - (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) || - (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) { - v = u_getPropertyValueEnum(p, vname.data()); - if (v == UCHAR_INVALID_CODE) { - // Handle numeric CCC - if (p == UCHAR_CANONICAL_COMBINING_CLASS || - p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS || - p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) { - char* end; - double val = uprv_strtod(vname.data(), &end); - // Anything between 0 and 255 is valid even if unused. - // Cast double->int only after range check. - // We catch NaN here because comparing it with both 0 and 255 will be false - // (as are all comparisons with NaN). - if (*end != 0 || !(0 <= val && val <= 255) || - (v = (int32_t)val) != val) { - // non-integral value or outside 0..255, or trailing junk - FAIL(ec); - } - } else { - FAIL(ec); - } - } - } - - else { - - switch (p) { - case UCHAR_NUMERIC_VALUE: - { - char* end; - double val = uprv_strtod(vname.data(), &end); - if (*end != 0) { - FAIL(ec); - } - applyFilter(numericValueFilter, &val, - CharacterProperties::getInclusionsForProperty(p, ec), ec); - return *this; - } - case UCHAR_NAME: - { - // Must munge name, since u_charFromName() does not do - // 'loose' matching. - char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength - if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); - UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec); - if (U_SUCCESS(ec)) { - clear(); - add(ch); - return *this; - } else { - FAIL(ec); - } - } - case UCHAR_UNICODE_1_NAME: - // ICU 49 deprecates the Unicode_1_Name property APIs. - FAIL(ec); - case UCHAR_AGE: - { - // Must munge name, since u_versionFromString() does not do - // 'loose' matching. - char buf[128]; - if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); - UVersionInfo version; - u_versionFromString(version, buf); - applyFilter(versionFilter, &version, - CharacterProperties::getInclusionsForProperty(p, ec), ec); - return *this; - } - case UCHAR_SCRIPT_EXTENSIONS: - v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data()); - if (v == UCHAR_INVALID_CODE) { - FAIL(ec); - } - // fall through to calling applyIntPropertyValue() - break; - default: - // p is a non-binary, non-enumerated property that we - // don't support (yet). - FAIL(ec); - } - } - } - - else { - // value is empty. Interpret as General Category, Script, or - // Binary property. - p = UCHAR_GENERAL_CATEGORY_MASK; - v = u_getPropertyValueEnum(p, pname.data()); - if (v == UCHAR_INVALID_CODE) { - p = UCHAR_SCRIPT; - v = u_getPropertyValueEnum(p, pname.data()); - if (v == UCHAR_INVALID_CODE) { - p = u_getPropertyEnum(pname.data()); - if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) { - v = 1; - } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) { - set(MIN_VALUE, MAX_VALUE); - return *this; - } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) { - set(0, 0x7F); - return *this; - } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) { - // [:Assigned:]=[:^Cn:] - p = UCHAR_GENERAL_CATEGORY_MASK; - v = U_GC_CN_MASK; - invert = TRUE; - } else { - FAIL(ec); - } - } - } - } - - applyIntPropertyValue(p, v, ec); - if(invert) { - complement(); - } - - if (isBogus() && U_SUCCESS(ec)) { - // We likely ran out of memory. AHHH! - ec = U_MEMORY_ALLOCATION_ERROR; - } - return *this; -} - -//---------------------------------------------------------------- -// Property set patterns -//---------------------------------------------------------------- - -/** - * Return true if the given position, in the given pattern, appears - * to be the start of a property set pattern. - */ -UBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern, - int32_t pos) { - // Patterns are at least 5 characters long - if ((pos+5) > pattern.length()) { - return FALSE; - } - - // Look for an opening [:, [:^, \p, or \P - return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos); -} - -/** - * Return true if the given iterator appears to point at a - * property pattern. Regardless of the result, return with the - * iterator unchanged. - * @param chars iterator over the pattern characters. Upon return - * it will be unchanged. - * @param iterOpts RuleCharacterIterator options - */ -UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars, - int32_t iterOpts) { - // NOTE: literal will always be FALSE, because we don't parse escapes. - UBool result = FALSE, literal; - UErrorCode ec = U_ZERO_ERROR; - iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES; - RuleCharacterIterator::Pos pos; - chars.getPos(pos); - UChar32 c = chars.next(iterOpts, literal, ec); - if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) { - UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE, - literal, ec); - result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) : - (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/); - } - chars.setPos(pos); - return result && U_SUCCESS(ec); -} - -/** - * Parse the given property pattern at the given parse position. - */ -UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, - ParsePosition& ppos, - UErrorCode &ec) { - int32_t pos = ppos.getIndex(); - - UBool posix = FALSE; // true for [:pat:], false for \p{pat} \P{pat} \N{pat} - UBool isName = FALSE; // true for \N{pat}, o/w false - UBool invert = FALSE; - - if (U_FAILURE(ec)) return *this; - - // Minimum length is 5 characters, e.g. \p{L} - if ((pos+5) > pattern.length()) { - FAIL(ec); - } - - // On entry, ppos should point to one of the following locations: - // Look for an opening [:, [:^, \p, or \P - if (isPOSIXOpen(pattern, pos)) { - posix = TRUE; - pos += 2; - pos = ICU_Utility::skipWhitespace(pattern, pos); - if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) { - ++pos; - invert = TRUE; - } - } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) { - UChar c = pattern.charAt(pos+1); - invert = (c == UPPER_P); - isName = (c == UPPER_N); - pos += 2; - pos = ICU_Utility::skipWhitespace(pattern, pos); - if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) { - // Syntax error; "\p" or "\P" not followed by "{" - FAIL(ec); - } - } else { - // Open delimiter not seen - FAIL(ec); - } - - // Look for the matching close delimiter, either :] or } - int32_t close; - if (posix) { - close = pattern.indexOf(POSIX_CLOSE, 2, pos); - } else { - close = pattern.indexOf(CLOSE_BRACE, pos); - } - if (close < 0) { - // Syntax error; close delimiter missing - FAIL(ec); - } - - // Look for an '=' sign. If this is present, we will parse a - // medium \p{gc=Cf} or long \p{GeneralCategory=Format} - // pattern. - int32_t equals = pattern.indexOf(EQUALS, pos); - UnicodeString propName, valueName; - if (equals >= 0 && equals < close && !isName) { - // Equals seen; parse medium/long pattern - pattern.extractBetween(pos, equals, propName); - pattern.extractBetween(equals+1, close, valueName); - } - - else { - // Handle case where no '=' is seen, and \N{} - pattern.extractBetween(pos, close, propName); - - // Handle \N{name} - if (isName) { - // This is a little inefficient since it means we have to - // parse NAME_PROP back to UCHAR_NAME even though we already - // know it's UCHAR_NAME. If we refactor the API to - // support args of (UProperty, char*) then we can remove - // NAME_PROP and make this a little more efficient. - valueName = propName; - propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV); - } - } - - applyPropertyAlias(propName, valueName, ec); - - if (U_SUCCESS(ec)) { - if (invert) { - complement(); - } - - // Move to the limit position after the close delimiter if the - // parse succeeded. - ppos.setIndex(close + (posix ? 2 : 1)); - } - - return *this; -} - -/** - * Parse a property pattern. - * @param chars iterator over the pattern characters. Upon return - * it will be advanced to the first character after the parsed - * pattern, or the end of the iteration if all characters are - * parsed. - * @param rebuiltPat the pattern that was parsed, rebuilt or - * copied from the input pattern, as appropriate. - */ -void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, - UnicodeString& rebuiltPat, - UErrorCode& ec) { - if (U_FAILURE(ec)) return; - UnicodeString pattern; - chars.lookahead(pattern); - ParsePosition pos(0); - applyPropertyPattern(pattern, pos, ec); - if (U_FAILURE(ec)) return; - if (pos.getIndex() == 0) { - // syntaxError(chars, "Invalid property pattern"); - ec = U_MALFORMED_SET; - return; - } - chars.jumpahead(pos.getIndex()); - rebuiltPat.append(pattern, 0, pos.getIndex()); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/unisetspan.cpp b/deps/node/deps/icu-small/source/common/unisetspan.cpp deleted file mode 100644 index 0a889347..00000000 --- a/deps/node/deps/icu-small/source/common/unisetspan.cpp +++ /dev/null @@ -1,1509 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2007-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: unisetspan.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2007mar01 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/uniset.h" -#include "unicode/ustring.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "uvector.h" -#include "unisetspan.h" - -U_NAMESPACE_BEGIN - -/* - * List of offsets from the current position from where to try matching - * a code point or a string. - * Store offsets rather than indexes to simplify the code and use the same list - * for both increments (in span()) and decrements (in spanBack()). - * - * Assumption: The maximum offset is limited, and the offsets that are stored - * at any one time are relatively dense, that is, there are normally no gaps of - * hundreds or thousands of offset values. - * - * The implementation uses a circular buffer of byte flags, - * each indicating whether the corresponding offset is in the list. - * This avoids inserting into a sorted list of offsets (or absolute indexes) and - * physically moving part of the list. - * - * Note: In principle, the caller should setMaxLength() to the maximum of the - * max string length and U16_LENGTH/U8_LENGTH to account for - * "long" single code points. - * However, this implementation uses at least a staticList with more than - * U8_LENGTH entries anyway. - * - * Note: If maxLength were guaranteed to be no more than 32 or 64, - * the list could be stored as bit flags in a single integer. - * Rather than handling a circular buffer with a start list index, - * the integer would simply be shifted when lower offsets are removed. - * UnicodeSet does not have a limit on the lengths of strings. - */ -class OffsetList { // Only ever stack-allocated, does not need to inherit UMemory. -public: - OffsetList() : list(staticList), capacity(0), length(0), start(0) {} - - ~OffsetList() { - if(list!=staticList) { - uprv_free(list); - } - } - - // Call exactly once if the list is to be used. - void setMaxLength(int32_t maxLength) { - if(maxLength<=(int32_t)sizeof(staticList)) { - capacity=(int32_t)sizeof(staticList); - } else { - UBool *l=(UBool *)uprv_malloc(maxLength); - if(l!=NULL) { - list=l; - capacity=maxLength; - } - } - uprv_memset(list, 0, capacity); - } - - void clear() { - uprv_memset(list, 0, capacity); - start=length=0; - } - - UBool isEmpty() const { - return (UBool)(length==0); - } - - // Reduce all stored offsets by delta, used when the current position - // moves by delta. - // There must not be any offsets lower than delta. - // If there is an offset equal to delta, it is removed. - // delta=[1..maxLength] - void shift(int32_t delta) { - int32_t i=start+delta; - if(i>=capacity) { - i-=capacity; - } - if(list[i]) { - list[i]=FALSE; - --length; - } - start=i; - } - - // Add an offset. The list must not contain it yet. - // offset=[1..maxLength] - void addOffset(int32_t offset) { - int32_t i=start+offset; - if(i>=capacity) { - i-=capacity; - } - list[i]=TRUE; - ++length; - } - - // offset=[1..maxLength] - UBool containsOffset(int32_t offset) const { - int32_t i=start+offset; - if(i>=capacity) { - i-=capacity; - } - return list[i]; - } - - // Find the lowest stored offset from a non-empty list, remove it, - // and reduce all other offsets by this minimum. - // Returns [1..maxLength]. - int32_t popMinimum() { - // Look for the next offset in list[start+1..capacity-1]. - int32_t i=start, result; - while(++imaxLength16) { - maxLength16=length16; - } - if((which&UTF8) && (thisRelevant || (which&CONTAINED))) { - int32_t length8=getUTF8Length(s16, length16); - utf8Length+=length8; - if(length8>maxLength8) { - maxLength8=length8; - } - } - } - if(!someRelevant) { - maxLength16=maxLength8=0; - return; - } - - // Freeze after checking for the need to use strings at all because freezing - // a set takes some time and memory which are wasted if there are no relevant strings. - if(all) { - spanSet.freeze(); - } - - uint8_t *spanBackLengths; - uint8_t *spanUTF8Lengths; - uint8_t *spanBackUTF8Lengths; - - // Allocate a block of meta data. - int32_t allocSize; - if(all) { - // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings. - allocSize=stringsLength*(4+1+1+1+1)+utf8Length; - } else { - allocSize=stringsLength; // One set of span lengths. - if(which&UTF8) { - // UTF-8 lengths and UTF-8 strings. - allocSize+=stringsLength*4+utf8Length; - } - } - if(allocSize<=(int32_t)sizeof(staticLengths)) { - utf8Lengths=staticLengths; - } else { - utf8Lengths=(int32_t *)uprv_malloc(allocSize); - if(utf8Lengths==NULL) { - maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE. - return; // Out of memory. - } - } - - if(all) { - // Store span lengths for all span() variants. - spanLengths=(uint8_t *)(utf8Lengths+stringsLength); - spanBackLengths=spanLengths+stringsLength; - spanUTF8Lengths=spanBackLengths+stringsLength; - spanBackUTF8Lengths=spanUTF8Lengths+stringsLength; - utf8=spanBackUTF8Lengths+stringsLength; - } else { - // Store span lengths for only one span() variant. - if(which&UTF8) { - spanLengths=(uint8_t *)(utf8Lengths+stringsLength); - utf8=spanLengths+stringsLength; - } else { - spanLengths=(uint8_t *)utf8Lengths; - } - spanBackLengths=spanUTF8Lengths=spanBackUTF8Lengths=spanLengths; - } - - // Set the meta data and pSpanNotSet and write the UTF-8 strings. - int32_t utf8Count=0; // Count UTF-8 bytes written so far. - - for(i=0; ifreeze(); - } -} - -// Copy constructor. Assumes which==ALL for a frozen set. -UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, - const UVector &newParentSetStrings) - : spanSet(otherStringSpan.spanSet), pSpanNotSet(NULL), strings(newParentSetStrings), - utf8Lengths(NULL), spanLengths(NULL), utf8(NULL), - utf8Length(otherStringSpan.utf8Length), - maxLength16(otherStringSpan.maxLength16), maxLength8(otherStringSpan.maxLength8), - all(TRUE) { - if(otherStringSpan.pSpanNotSet==&otherStringSpan.spanSet) { - pSpanNotSet=&spanSet; - } else { - pSpanNotSet=(UnicodeSet *)otherStringSpan.pSpanNotSet->clone(); - } - - // Allocate a block of meta data. - // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings. - int32_t stringsLength=strings.size(); - int32_t allocSize=stringsLength*(4+1+1+1+1)+utf8Length; - if(allocSize<=(int32_t)sizeof(staticLengths)) { - utf8Lengths=staticLengths; - } else { - utf8Lengths=(int32_t *)uprv_malloc(allocSize); - if(utf8Lengths==NULL) { - maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE. - return; // Out of memory. - } - } - - spanLengths=(uint8_t *)(utf8Lengths+stringsLength); - utf8=spanLengths+stringsLength*4; - uprv_memcpy(utf8Lengths, otherStringSpan.utf8Lengths, allocSize); -} - -UnicodeSetStringSpan::~UnicodeSetStringSpan() { - if(pSpanNotSet!=NULL && pSpanNotSet!=&spanSet) { - delete pSpanNotSet; - } - if(utf8Lengths!=NULL && utf8Lengths!=staticLengths) { - uprv_free(utf8Lengths); - } -} - -void UnicodeSetStringSpan::addToSpanNotSet(UChar32 c) { - if(pSpanNotSet==NULL || pSpanNotSet==&spanSet) { - if(spanSet.contains(c)) { - return; // Nothing to do. - } - UnicodeSet *newSet=(UnicodeSet *)spanSet.cloneAsThawed(); - if(newSet==NULL) { - return; // Out of memory. - } else { - pSpanNotSet=newSet; - } - } - pSpanNotSet->add(c); -} - -// Compare strings without any argument checks. Requires length>0. -static inline UBool -matches16(const UChar *s, const UChar *t, int32_t length) { - do { - if(*s++!=*t++) { - return FALSE; - } - } while(--length>0); - return TRUE; -} - -static inline UBool -matches8(const uint8_t *s, const uint8_t *t, int32_t length) { - do { - if(*s++!=*t++) { - return FALSE; - } - } while(--length>0); - return TRUE; -} - -// Compare 16-bit Unicode strings (which may be malformed UTF-16) -// at code point boundaries. -// That is, each edge of a match must not be in the middle of a surrogate pair. -static inline UBool -matches16CPB(const UChar *s, int32_t start, int32_t limit, const UChar *t, int32_t length) { - s+=start; - limit-=start; - return matches16(s, t, length) && - !(0=0xd800 && c<=0xdbff && length>=2 && U16_IS_TRAIL(c2=s[1])) { - return set.contains(U16_GET_SUPPLEMENTARY(c, c2)) ? 2 : -2; - } - return set.contains(c) ? 1 : -1; -} - -static inline int32_t -spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) { - UChar c=s[length-1], c2; - if(c>=0xdc00 && c<=0xdfff && length>=2 && U16_IS_LEAD(c2=s[length-2])) { - return set.contains(U16_GET_SUPPLEMENTARY(c2, c)) ? 2 : -2; - } - return set.contains(c) ? 1 : -1; -} - -static inline int32_t -spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) { - UChar32 c=*s; - if(U8_IS_SINGLE(c)) { - return set.contains(c) ? 1 : -1; - } - // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD(). - int32_t i=0; - U8_NEXT_OR_FFFD(s, i, length, c); - return set.contains(c) ? i : -i; -} - -static inline int32_t -spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) { - UChar32 c=s[length-1]; - if(U8_IS_SINGLE(c)) { - return set.contains(c) ? 1 : -1; - } - int32_t i=length-1; - c=utf8_prevCharSafeBody(s, 0, &i, c, -3); - length-=i; - return set.contains(c) ? length : -length; -} - -/* - * Note: In span() when spanLength==0 (after a string match, or at the beginning - * after an empty code point span) and in spanNot() and spanNotUTF8(), - * string matching could use a binary search - * because all string matches are done from the same start index. - * - * For UTF-8, this would require a comparison function that returns UTF-16 order. - * - * This optimization should not be necessary for normal UnicodeSets because - * most sets have no strings, and most sets with strings have - * very few very short strings. - * For cases with many strings, it might be better to use a different API - * and implementation with a DFA (state machine). - */ - -/* - * Algorithm for span(USET_SPAN_CONTAINED) - * - * Theoretical algorithm: - * - Iterate through the string, and at each code point boundary: - * + If the code point there is in the set, then remember to continue after it. - * + If a set string matches at the current position, then remember to continue after it. - * + Either recursively span for each code point or string match, - * or recursively span for all but the shortest one and - * iteratively continue the span with the shortest local match. - * + Remember the longest recursive span (the farthest end point). - * + If there is no match at the current position, neither for the code point there - * nor for any set string, then stop and return the longest recursive span length. - * - * Optimized implementation: - * - * (We assume that most sets will have very few very short strings. - * A span using a string-less set is extremely fast.) - * - * Create and cache a spanSet which contains all of the single code points - * of the original set but none of its strings. - * - * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED). - * - Loop: - * + Try to match each set string at the end of the spanLength. - * ~ Set strings that start with set-contained code points must be matched - * with a partial overlap because the recursive algorithm would have tried - * to match them at every position. - * ~ Set strings that entirely consist of set-contained code points - * are irrelevant for span(USET_SPAN_CONTAINED) because the - * recursive algorithm would continue after them anyway - * and find the longest recursive match from their end. - * ~ Rather than recursing, note each end point of a set string match. - * + If no set string matched after spanSet.span(), then return - * with where the spanSet.span() ended. - * + If at least one set string matched after spanSet.span(), then - * pop the shortest string match end point and continue - * the loop, trying to match all set strings from there. - * + If at least one more set string matched after a previous string match, - * then test if the code point after the previous string match is also - * contained in the set. - * Continue the loop with the shortest end point of either this code point - * or a matching set string. - * + If no more set string matched after a previous string match, - * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED). - * Stop if spanLength==0, otherwise continue the loop. - * - * By noting each end point of a set string match, - * the function visits each string position at most once and finishes - * in linear time. - * - * The recursive algorithm may visit the same string position many times - * if multiple paths lead to it and finishes in exponential time. - */ - -/* - * Algorithm for span(USET_SPAN_SIMPLE) - * - * Theoretical algorithm: - * - Iterate through the string, and at each code point boundary: - * + If the code point there is in the set, then remember to continue after it. - * + If a set string matches at the current position, then remember to continue after it. - * + Continue from the farthest match position and ignore all others. - * + If there is no match at the current position, - * then stop and return the current position. - * - * Optimized implementation: - * - * (Same assumption and spanSet as above.) - * - * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED). - * - Loop: - * + Try to match each set string at the end of the spanLength. - * ~ Set strings that start with set-contained code points must be matched - * with a partial overlap because the standard algorithm would have tried - * to match them earlier. - * ~ Set strings that entirely consist of set-contained code points - * must be matched with a full overlap because the longest-match algorithm - * would hide set string matches that end earlier. - * Such set strings need not be matched earlier inside the code point span - * because the standard algorithm would then have continued after - * the set string match anyway. - * ~ Remember the longest set string match (farthest end point) from the earliest - * starting point. - * + If no set string matched after spanSet.span(), then return - * with where the spanSet.span() ended. - * + If at least one set string matched, then continue the loop after the - * longest match from the earliest position. - * + If no more set string matched after a previous string match, - * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED). - * Stop if spanLength==0, otherwise continue the loop. - */ - -int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { - if(spanCondition==USET_SPAN_NOT_CONTAINED) { - return spanNot(s, length); - } - int32_t spanLength=spanSet.span(s, length, USET_SPAN_CONTAINED); - if(spanLength==length) { - return length; - } - - // Consider strings; they may overlap with the span. - OffsetList offsets; - if(spanCondition==USET_SPAN_CONTAINED) { - // Use offset list to try all possibilities. - offsets.setMaxLength(maxLength16); - } - int32_t pos=spanLength, rest=length-pos; - int32_t i, stringsLength=strings.size(); - for(;;) { - if(spanCondition==USET_SPAN_CONTAINED) { - for(i=0; i=LONG_SPAN) { - overlap=length16; - // While contained: No point matching fully inside the code point span. - U16_BACK_1(s16, 0, overlap); // Length of the string minus the last code point. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t inc=length16-overlap; // Keep overlap+inc==length16. - for(;;) { - if(inc>rest) { - break; - } - // Try to match if the increment is not listed already. - if(!offsets.containsOffset(inc) && matches16CPB(s, pos-overlap, length, s16, length16)) { - if(inc==rest) { - return length; // Reached the end of the string. - } - offsets.addOffset(inc); - } - if(overlap==0) { - break; - } - --overlap; - ++inc; - } - } - } else /* USET_SPAN_SIMPLE */ { - int32_t maxInc=0, maxOverlap=0; - for(i=0; i=LONG_SPAN) { - overlap=length16; - // Longest match: Need to match fully inside the code point span - // to find the match from the earliest start. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t inc=length16-overlap; // Keep overlap+inc==length16. - for(;;) { - if(inc>rest || overlapmaxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) && - matches16CPB(s, pos-overlap, length, s16, length16) - ) { - maxInc=inc; // Longest match from earliest start. - maxOverlap=overlap; - break; - } - --overlap; - ++inc; - } - } - - if(maxInc!=0 || maxOverlap!=0) { - // Longest-match algorithm, and there was a string match. - // Simply continue after it. - pos+=maxInc; - rest-=maxInc; - if(rest==0) { - return length; // Reached the end of the string. - } - spanLength=0; // Match strings from after a string match. - continue; - } - } - // Finished trying to match all strings at pos. - - if(spanLength!=0 || pos==0) { - // The position is after an unlimited code point span (spanLength!=0), - // not after a string match. - // The only position where spanLength==0 after a span is pos==0. - // Otherwise, an unlimited code point span is only tried again when no - // strings match, and if such a non-initial span fails we stop. - if(offsets.isEmpty()) { - return pos; // No strings matched after a span. - } - // Match strings from after the next string match. - } else { - // The position is after a string match (or a single code point). - if(offsets.isEmpty()) { - // No more strings matched after a previous string match. - // Try another code point span from after the last string match. - spanLength=spanSet.span(s+pos, rest, USET_SPAN_CONTAINED); - if( spanLength==rest || // Reached the end of the string, or - spanLength==0 // neither strings nor span progressed. - ) { - return pos+spanLength; - } - pos+=spanLength; - rest-=spanLength; - continue; // spanLength>0: Match strings from after a span. - } else { - // Try to match only one code point from after a string match if some - // string matched beyond it, so that we try all possible positions - // and don't overshoot. - spanLength=spanOne(spanSet, s+pos, rest); - if(spanLength>0) { - if(spanLength==rest) { - return length; // Reached the end of the string. - } - // Match strings after this code point. - // There cannot be any increments below it because UnicodeSet strings - // contain multiple code points. - pos+=spanLength; - rest-=spanLength; - offsets.shift(spanLength); - spanLength=0; - continue; // Match strings from after a single code point. - } - // Match strings from after the next string match. - } - } - int32_t minOffset=offsets.popMinimum(); - pos+=minOffset; - rest-=minOffset; - spanLength=0; // Match strings from after a string match. - } -} - -int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { - if(spanCondition==USET_SPAN_NOT_CONTAINED) { - return spanNotBack(s, length); - } - int32_t pos=spanSet.spanBack(s, length, USET_SPAN_CONTAINED); - if(pos==0) { - return 0; - } - int32_t spanLength=length-pos; - - // Consider strings; they may overlap with the span. - OffsetList offsets; - if(spanCondition==USET_SPAN_CONTAINED) { - // Use offset list to try all possibilities. - offsets.setMaxLength(maxLength16); - } - int32_t i, stringsLength=strings.size(); - uint8_t *spanBackLengths=spanLengths; - if(all) { - spanBackLengths+=stringsLength; - } - for(;;) { - if(spanCondition==USET_SPAN_CONTAINED) { - for(i=0; i=LONG_SPAN) { - overlap=length16; - // While contained: No point matching fully inside the code point span. - int32_t len1=0; - U16_FWD_1(s16, len1, overlap); - overlap-=len1; // Length of the string minus the first code point. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t dec=length16-overlap; // Keep dec+overlap==length16. - for(;;) { - if(dec>pos) { - break; - } - // Try to match if the decrement is not listed already. - if(!offsets.containsOffset(dec) && matches16CPB(s, pos-dec, length, s16, length16)) { - if(dec==pos) { - return 0; // Reached the start of the string. - } - offsets.addOffset(dec); - } - if(overlap==0) { - break; - } - --overlap; - ++dec; - } - } - } else /* USET_SPAN_SIMPLE */ { - int32_t maxDec=0, maxOverlap=0; - for(i=0; i=LONG_SPAN) { - overlap=length16; - // Longest match: Need to match fully inside the code point span - // to find the match from the latest end. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t dec=length16-overlap; // Keep dec+overlap==length16. - for(;;) { - if(dec>pos || overlapmaxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) && - matches16CPB(s, pos-dec, length, s16, length16) - ) { - maxDec=dec; // Longest match from latest end. - maxOverlap=overlap; - break; - } - --overlap; - ++dec; - } - } - - if(maxDec!=0 || maxOverlap!=0) { - // Longest-match algorithm, and there was a string match. - // Simply continue before it. - pos-=maxDec; - if(pos==0) { - return 0; // Reached the start of the string. - } - spanLength=0; // Match strings from before a string match. - continue; - } - } - // Finished trying to match all strings at pos. - - if(spanLength!=0 || pos==length) { - // The position is before an unlimited code point span (spanLength!=0), - // not before a string match. - // The only position where spanLength==0 before a span is pos==length. - // Otherwise, an unlimited code point span is only tried again when no - // strings match, and if such a non-initial span fails we stop. - if(offsets.isEmpty()) { - return pos; // No strings matched before a span. - } - // Match strings from before the next string match. - } else { - // The position is before a string match (or a single code point). - if(offsets.isEmpty()) { - // No more strings matched before a previous string match. - // Try another code point span from before the last string match. - int32_t oldPos=pos; - pos=spanSet.spanBack(s, oldPos, USET_SPAN_CONTAINED); - spanLength=oldPos-pos; - if( pos==0 || // Reached the start of the string, or - spanLength==0 // neither strings nor span progressed. - ) { - return pos; - } - continue; // spanLength>0: Match strings from before a span. - } else { - // Try to match only one code point from before a string match if some - // string matched beyond it, so that we try all possible positions - // and don't overshoot. - spanLength=spanOneBack(spanSet, s, pos); - if(spanLength>0) { - if(spanLength==pos) { - return 0; // Reached the start of the string. - } - // Match strings before this code point. - // There cannot be any decrements below it because UnicodeSet strings - // contain multiple code points. - pos-=spanLength; - offsets.shift(spanLength); - spanLength=0; - continue; // Match strings from before a single code point. - } - // Match strings from before the next string match. - } - } - pos-=offsets.popMinimum(); - spanLength=0; // Match strings from before a string match. - } -} - -int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { - if(spanCondition==USET_SPAN_NOT_CONTAINED) { - return spanNotUTF8(s, length); - } - int32_t spanLength=spanSet.spanUTF8((const char *)s, length, USET_SPAN_CONTAINED); - if(spanLength==length) { - return length; - } - - // Consider strings; they may overlap with the span. - OffsetList offsets; - if(spanCondition==USET_SPAN_CONTAINED) { - // Use offset list to try all possibilities. - offsets.setMaxLength(maxLength8); - } - int32_t pos=spanLength, rest=length-pos; - int32_t i, stringsLength=strings.size(); - uint8_t *spanUTF8Lengths=spanLengths; - if(all) { - spanUTF8Lengths+=2*stringsLength; - } - for(;;) { - const uint8_t *s8=utf8; - int32_t length8; - if(spanCondition==USET_SPAN_CONTAINED) { - for(i=0; i=LONG_SPAN) { - overlap=length8; - // While contained: No point matching fully inside the code point span. - U8_BACK_1(s8, 0, overlap); // Length of the string minus the last code point. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t inc=length8-overlap; // Keep overlap+inc==length8. - for(;;) { - if(inc>rest) { - break; - } - // Try to match if the increment is not listed already. - // Match at code point boundaries. (The UTF-8 strings were converted - // from UTF-16 and are guaranteed to be well-formed.) - if(!U8_IS_TRAIL(s[pos-overlap]) && - !offsets.containsOffset(inc) && - matches8(s+pos-overlap, s8, length8)) { - if(inc==rest) { - return length; // Reached the end of the string. - } - offsets.addOffset(inc); - } - if(overlap==0) { - break; - } - --overlap; - ++inc; - } - s8+=length8; - } - } else /* USET_SPAN_SIMPLE */ { - int32_t maxInc=0, maxOverlap=0; - for(i=0; i=LONG_SPAN) { - overlap=length8; - // Longest match: Need to match fully inside the code point span - // to find the match from the earliest start. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t inc=length8-overlap; // Keep overlap+inc==length8. - for(;;) { - if(inc>rest || overlapmaxOverlap || - /* redundant overlap==maxOverlap && */ inc>maxInc) && - matches8(s+pos-overlap, s8, length8)) { - maxInc=inc; // Longest match from earliest start. - maxOverlap=overlap; - break; - } - --overlap; - ++inc; - } - s8+=length8; - } - - if(maxInc!=0 || maxOverlap!=0) { - // Longest-match algorithm, and there was a string match. - // Simply continue after it. - pos+=maxInc; - rest-=maxInc; - if(rest==0) { - return length; // Reached the end of the string. - } - spanLength=0; // Match strings from after a string match. - continue; - } - } - // Finished trying to match all strings at pos. - - if(spanLength!=0 || pos==0) { - // The position is after an unlimited code point span (spanLength!=0), - // not after a string match. - // The only position where spanLength==0 after a span is pos==0. - // Otherwise, an unlimited code point span is only tried again when no - // strings match, and if such a non-initial span fails we stop. - if(offsets.isEmpty()) { - return pos; // No strings matched after a span. - } - // Match strings from after the next string match. - } else { - // The position is after a string match (or a single code point). - if(offsets.isEmpty()) { - // No more strings matched after a previous string match. - // Try another code point span from after the last string match. - spanLength=spanSet.spanUTF8((const char *)s+pos, rest, USET_SPAN_CONTAINED); - if( spanLength==rest || // Reached the end of the string, or - spanLength==0 // neither strings nor span progressed. - ) { - return pos+spanLength; - } - pos+=spanLength; - rest-=spanLength; - continue; // spanLength>0: Match strings from after a span. - } else { - // Try to match only one code point from after a string match if some - // string matched beyond it, so that we try all possible positions - // and don't overshoot. - spanLength=spanOneUTF8(spanSet, s+pos, rest); - if(spanLength>0) { - if(spanLength==rest) { - return length; // Reached the end of the string. - } - // Match strings after this code point. - // There cannot be any increments below it because UnicodeSet strings - // contain multiple code points. - pos+=spanLength; - rest-=spanLength; - offsets.shift(spanLength); - spanLength=0; - continue; // Match strings from after a single code point. - } - // Match strings from after the next string match. - } - } - int32_t minOffset=offsets.popMinimum(); - pos+=minOffset; - rest-=minOffset; - spanLength=0; // Match strings from after a string match. - } -} - -int32_t UnicodeSetStringSpan::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { - if(spanCondition==USET_SPAN_NOT_CONTAINED) { - return spanNotBackUTF8(s, length); - } - int32_t pos=spanSet.spanBackUTF8((const char *)s, length, USET_SPAN_CONTAINED); - if(pos==0) { - return 0; - } - int32_t spanLength=length-pos; - - // Consider strings; they may overlap with the span. - OffsetList offsets; - if(spanCondition==USET_SPAN_CONTAINED) { - // Use offset list to try all possibilities. - offsets.setMaxLength(maxLength8); - } - int32_t i, stringsLength=strings.size(); - uint8_t *spanBackUTF8Lengths=spanLengths; - if(all) { - spanBackUTF8Lengths+=3*stringsLength; - } - for(;;) { - const uint8_t *s8=utf8; - int32_t length8; - if(spanCondition==USET_SPAN_CONTAINED) { - for(i=0; i=LONG_SPAN) { - overlap=length8; - // While contained: No point matching fully inside the code point span. - int32_t len1=0; - U8_FWD_1(s8, len1, overlap); - overlap-=len1; // Length of the string minus the first code point. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t dec=length8-overlap; // Keep dec+overlap==length8. - for(;;) { - if(dec>pos) { - break; - } - // Try to match if the decrement is not listed already. - // Match at code point boundaries. (The UTF-8 strings were converted - // from UTF-16 and are guaranteed to be well-formed.) - if( !U8_IS_TRAIL(s[pos-dec]) && - !offsets.containsOffset(dec) && - matches8(s+pos-dec, s8, length8) - ) { - if(dec==pos) { - return 0; // Reached the start of the string. - } - offsets.addOffset(dec); - } - if(overlap==0) { - break; - } - --overlap; - ++dec; - } - s8+=length8; - } - } else /* USET_SPAN_SIMPLE */ { - int32_t maxDec=0, maxOverlap=0; - for(i=0; i=LONG_SPAN) { - overlap=length8; - // Longest match: Need to match fully inside the code point span - // to find the match from the latest end. - } - if(overlap>spanLength) { - overlap=spanLength; - } - int32_t dec=length8-overlap; // Keep dec+overlap==length8. - for(;;) { - if(dec>pos || overlapmaxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) && - matches8(s+pos-dec, s8, length8) - ) { - maxDec=dec; // Longest match from latest end. - maxOverlap=overlap; - break; - } - --overlap; - ++dec; - } - s8+=length8; - } - - if(maxDec!=0 || maxOverlap!=0) { - // Longest-match algorithm, and there was a string match. - // Simply continue before it. - pos-=maxDec; - if(pos==0) { - return 0; // Reached the start of the string. - } - spanLength=0; // Match strings from before a string match. - continue; - } - } - // Finished trying to match all strings at pos. - - if(spanLength!=0 || pos==length) { - // The position is before an unlimited code point span (spanLength!=0), - // not before a string match. - // The only position where spanLength==0 before a span is pos==length. - // Otherwise, an unlimited code point span is only tried again when no - // strings match, and if such a non-initial span fails we stop. - if(offsets.isEmpty()) { - return pos; // No strings matched before a span. - } - // Match strings from before the next string match. - } else { - // The position is before a string match (or a single code point). - if(offsets.isEmpty()) { - // No more strings matched before a previous string match. - // Try another code point span from before the last string match. - int32_t oldPos=pos; - pos=spanSet.spanBackUTF8((const char *)s, oldPos, USET_SPAN_CONTAINED); - spanLength=oldPos-pos; - if( pos==0 || // Reached the start of the string, or - spanLength==0 // neither strings nor span progressed. - ) { - return pos; - } - continue; // spanLength>0: Match strings from before a span. - } else { - // Try to match only one code point from before a string match if some - // string matched beyond it, so that we try all possible positions - // and don't overshoot. - spanLength=spanOneBackUTF8(spanSet, s, pos); - if(spanLength>0) { - if(spanLength==pos) { - return 0; // Reached the start of the string. - } - // Match strings before this code point. - // There cannot be any decrements below it because UnicodeSet strings - // contain multiple code points. - pos-=spanLength; - offsets.shift(spanLength); - spanLength=0; - continue; // Match strings from before a single code point. - } - // Match strings from before the next string match. - } - } - pos-=offsets.popMinimum(); - spanLength=0; // Match strings from before a string match. - } -} - -/* - * Algorithm for spanNot()==span(USET_SPAN_NOT_CONTAINED) - * - * Theoretical algorithm: - * - Iterate through the string, and at each code point boundary: - * + If the code point there is in the set, then return with the current position. - * + If a set string matches at the current position, then return with the current position. - * - * Optimized implementation: - * - * (Same assumption as for span() above.) - * - * Create and cache a spanNotSet which contains all of the single code points - * of the original set but none of its strings. - * For each set string add its initial code point to the spanNotSet. - * (Also add its final code point for spanNotBack().) - * - * - Loop: - * + Do spanLength=spanNotSet.span(USET_SPAN_NOT_CONTAINED). - * + If the current code point is in the original set, then - * return the current position. - * + If any set string matches at the current position, then - * return the current position. - * + If there is no match at the current position, neither for the code point there - * nor for any set string, then skip this code point and continue the loop. - * This happens for set-string-initial code points that were added to spanNotSet - * when there is not actually a match for such a set string. - */ - -int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const { - int32_t pos=0, rest=length; - int32_t i, stringsLength=strings.size(); - do { - // Span until we find a code point from the set, - // or a code point that starts or ends some string. - i=pSpanNotSet->span(s+pos, rest, USET_SPAN_NOT_CONTAINED); - if(i==rest) { - return length; // Reached the end of the string. - } - pos+=i; - rest-=i; - - // Check whether the current code point is in the original set, - // without the string starts and ends. - int32_t cpLength=spanOne(spanSet, s+pos, rest); - if(cpLength>0) { - return pos; // There is a set element at pos. - } - - // Try to match the strings at pos. - for(i=0; ispanBack(s, pos, USET_SPAN_NOT_CONTAINED); - if(pos==0) { - return 0; // Reached the start of the string. - } - - // Check whether the current code point is in the original set, - // without the string starts and ends. - int32_t cpLength=spanOneBack(spanSet, s, pos); - if(cpLength>0) { - return pos; // There is a set element at pos. - } - - // Try to match the strings at pos. - for(i=0; ispanUTF8((const char *)s+pos, rest, USET_SPAN_NOT_CONTAINED); - if(i==rest) { - return length; // Reached the end of the string. - } - pos+=i; - rest-=i; - - // Check whether the current code point is in the original set, - // without the string starts and ends. - int32_t cpLength=spanOneUTF8(spanSet, s+pos, rest); - if(cpLength>0) { - return pos; // There is a set element at pos. - } - - // Try to match the strings at pos. - const uint8_t *s8=utf8; - int32_t length8; - for(i=0; ispanBackUTF8((const char *)s, pos, USET_SPAN_NOT_CONTAINED); - if(pos==0) { - return 0; // Reached the start of the string. - } - - // Check whether the current code point is in the original set, - // without the string starts and ends. - int32_t cpLength=spanOneBackUTF8(spanSet, s, pos); - if(cpLength>0) { - return pos; // There is a set element at pos. - } - - // Try to match the strings at pos. - const uint8_t *s8=utf8; - int32_t length8; - for(i=0; i=0xfe. - LONG_SPAN=0xfe, - // All code points in the string are contained in the parent set. - ALL_CP_CONTAINED=0xff - }; - - // Add a starting or ending string character to the spanNotSet - // so that a character span ends before any string. - void addToSpanNotSet(UChar32 c); - - int32_t spanNot(const UChar *s, int32_t length) const; - int32_t spanNotBack(const UChar *s, int32_t length) const; - int32_t spanNotUTF8(const uint8_t *s, int32_t length) const; - int32_t spanNotBackUTF8(const uint8_t *s, int32_t length) const; - - // Set for span(). Same as parent but without strings. - UnicodeSet spanSet; - - // Set for span(not contained). - // Same as spanSet, plus characters that start or end strings. - UnicodeSet *pSpanNotSet; - - // The strings of the parent set. - const UVector &strings; - - // Pointer to the UTF-8 string lengths. - // Also pointer to further allocated storage for meta data and - // UTF-8 string contents as necessary. - int32_t *utf8Lengths; - - // Pointer to the part of the (utf8Lengths) memory block that stores - // the lengths of span(), spanBack() etc. for each string. - uint8_t *spanLengths; - - // Pointer to the part of the (utf8Lengths) memory block that stores - // the UTF-8 versions of the parent set's strings. - uint8_t *utf8; - - // Number of bytes for all UTF-8 versions of strings together. - int32_t utf8Length; - - // Maximum lengths of relevant strings. - int32_t maxLength16; - int32_t maxLength8; - - // Set up for all variants of span()? - UBool all; - - // Memory for small numbers and lengths of strings. - // For example, for 8 strings: - // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters - // = 112 bytes = int32_t[28]. - int32_t staticLengths[32]; -}; - -UBool UnicodeSetStringSpan::needsStringSpanUTF16() { - return (UBool)(maxLength16!=0); -} - -UBool UnicodeSetStringSpan::needsStringSpanUTF8() { - return (UBool)(maxLength8!=0); -} - -UBool UnicodeSetStringSpan::contains(UChar32 c) const { - return spanSet.contains(c); -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unistr.cpp b/deps/node/deps/icu-small/source/common/unistr.cpp deleted file mode 100644 index ff85734d..00000000 --- a/deps/node/deps/icu-small/source/common/unistr.cpp +++ /dev/null @@ -1,1979 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1999-2016, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* -* File unistr.cpp -* -* Modification History: -* -* Date Name Description -* 09/25/98 stephen Creation. -* 04/20/99 stephen Overhauled per 4/16 code review. -* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX -* 11/18/99 aliu Added handleReplaceBetween() to make inherit from -* Replaceable. -* 06/25/01 grhoten Removed the dependency on iostream -****************************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/appendable.h" -#include "unicode/putil.h" -#include "cstring.h" -#include "cmemory.h" -#include "unicode/ustring.h" -#include "unicode/unistr.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "uelement.h" -#include "ustr_imp.h" -#include "umutex.h" -#include "uassert.h" - -#if 0 - -#include -using namespace std; - -//DEBUGGING -void -print(const UnicodeString& s, - const char *name) -{ - UChar c; - cout << name << ":|"; - for(int i = 0; i < s.length(); ++i) { - c = s[i]; - if(c>= 0x007E || c < 0x0020) - cout << "[0x" << hex << s[i] << "]"; - else - cout << (char) s[i]; - } - cout << '|' << endl; -} - -void -print(const UChar *s, - int32_t len, - const char *name) -{ - UChar c; - cout << name << ":|"; - for(int i = 0; i < len; ++i) { - c = s[i]; - if(c>= 0x007E || c < 0x0020) - cout << "[0x" << hex << s[i] << "]"; - else - cout << (char) s[i]; - } - cout << '|' << endl; -} -// END DEBUGGING -#endif - -// Local function definitions for now - -// need to copy areas that may overlap -static -inline void -us_arrayCopy(const UChar *src, int32_t srcStart, - UChar *dst, int32_t dstStart, int32_t count) -{ - if(count>0) { - uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src)); - } -} - -// u_unescapeAt() callback to get a UChar from a UnicodeString -U_CDECL_BEGIN -static UChar U_CALLCONV -UnicodeString_charAt(int32_t offset, void *context) { - return ((icu::UnicodeString*) context)->charAt(offset); -} -U_CDECL_END - -U_NAMESPACE_BEGIN - -/* The Replaceable virtual destructor can't be defined in the header - due to how AIX works with multiple definitions of virtual functions. -*/ -Replaceable::~Replaceable() {} - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) - -UnicodeString U_EXPORT2 -operator+ (const UnicodeString &s1, const UnicodeString &s2) { - return - UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). - append(s1). - append(s2); -} - -//======================================== -// Reference Counting functions, put at top of file so that optimizing compilers -// have a chance to automatically inline. -//======================================== - -void -UnicodeString::addRef() { - umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1); -} - -int32_t -UnicodeString::removeRef() { - return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1); -} - -int32_t -UnicodeString::refCount() const { - return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1)); -} - -void -UnicodeString::releaseArray() { - if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) { - uprv_free((int32_t *)fUnion.fFields.fArray - 1); - } -} - - - -//======================================== -// Constructors -//======================================== - -// The default constructor is inline in unistr.h. - -UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { - fUnion.fFields.fLengthAndFlags = 0; - if(count <= 0 || (uint32_t)c > 0x10ffff) { - // just allocate and do not do anything else - allocate(capacity); - } else if(c <= 0xffff) { - int32_t length = count; - if(capacity < length) { - capacity = length; - } - if(allocate(capacity)) { - UChar *array = getArrayStart(); - UChar unit = (UChar)c; - for(int32_t i = 0; i < length; ++i) { - array[i] = unit; - } - setLength(length); - } - } else { // supplementary code point, write surrogate pairs - if(count > (INT32_MAX / 2)) { - // We would get more than 2G UChars. - allocate(capacity); - return; - } - int32_t length = count * 2; - if(capacity < length) { - capacity = length; - } - if(allocate(capacity)) { - UChar *array = getArrayStart(); - UChar lead = U16_LEAD(c); - UChar trail = U16_TRAIL(c); - for(int32_t i = 0; i < length; i += 2) { - array[i] = lead; - array[i + 1] = trail; - } - setLength(length); - } - } -} - -UnicodeString::UnicodeString(UChar ch) { - fUnion.fFields.fLengthAndFlags = kLength1 | kShortString; - fUnion.fStackFields.fBuffer[0] = ch; -} - -UnicodeString::UnicodeString(UChar32 ch) { - fUnion.fFields.fLengthAndFlags = kShortString; - int32_t i = 0; - UBool isError = FALSE; - U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError); - // We test isError so that the compiler does not complain that we don't. - // If isError then i==0 which is what we want anyway. - if(!isError) { - setShortLength(i); - } -} - -UnicodeString::UnicodeString(const UChar *text) { - fUnion.fFields.fLengthAndFlags = kShortString; - doAppend(text, 0, -1); -} - -UnicodeString::UnicodeString(const UChar *text, - int32_t textLength) { - fUnion.fFields.fLengthAndFlags = kShortString; - doAppend(text, 0, textLength); -} - -UnicodeString::UnicodeString(UBool isTerminated, - ConstChar16Ptr textPtr, - int32_t textLength) { - fUnion.fFields.fLengthAndFlags = kReadonlyAlias; - const UChar *text = textPtr; - if(text == NULL) { - // treat as an empty string, do not alias - setToEmpty(); - } else if(textLength < -1 || - (textLength == -1 && !isTerminated) || - (textLength >= 0 && isTerminated && text[textLength] != 0) - ) { - setToBogus(); - } else { - if(textLength == -1) { - // text is terminated, or else it would have failed the above test - textLength = u_strlen(text); - } - setArray(const_cast(text), textLength, - isTerminated ? textLength + 1 : textLength); - } -} - -UnicodeString::UnicodeString(UChar *buff, - int32_t buffLength, - int32_t buffCapacity) { - fUnion.fFields.fLengthAndFlags = kWritableAlias; - if(buff == NULL) { - // treat as an empty string, do not alias - setToEmpty(); - } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { - setToBogus(); - } else { - if(buffLength == -1) { - // fLength = u_strlen(buff); but do not look beyond buffCapacity - const UChar *p = buff, *limit = buff + buffCapacity; - while(p != limit && *p != 0) { - ++p; - } - buffLength = (int32_t)(p - buff); - } - setArray(buff, buffLength, buffCapacity); - } -} - -UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) { - fUnion.fFields.fLengthAndFlags = kShortString; - if(src==NULL) { - // treat as an empty string - } else { - if(length<0) { - length=(int32_t)uprv_strlen(src); - } - if(cloneArrayIfNeeded(length, length, FALSE)) { - u_charsToUChars(src, getArrayStart(), length); - setLength(length); - } else { - setToBogus(); - } - } -} - -#if U_CHARSET_IS_UTF8 - -UnicodeString::UnicodeString(const char *codepageData) { - fUnion.fFields.fLengthAndFlags = kShortString; - if(codepageData != 0) { - setToUTF8(codepageData); - } -} - -UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) { - fUnion.fFields.fLengthAndFlags = kShortString; - // if there's nothing to convert, do nothing - if(codepageData == 0 || dataLength == 0 || dataLength < -1) { - return; - } - if(dataLength == -1) { - dataLength = (int32_t)uprv_strlen(codepageData); - } - setToUTF8(StringPiece(codepageData, dataLength)); -} - -// else see unistr_cnv.cpp -#endif - -UnicodeString::UnicodeString(const UnicodeString& that) { - fUnion.fFields.fLengthAndFlags = kShortString; - copyFrom(that); -} - -UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT { - fUnion.fFields.fLengthAndFlags = kShortString; - moveFrom(src); -} - -UnicodeString::UnicodeString(const UnicodeString& that, - int32_t srcStart) { - fUnion.fFields.fLengthAndFlags = kShortString; - setTo(that, srcStart); -} - -UnicodeString::UnicodeString(const UnicodeString& that, - int32_t srcStart, - int32_t srcLength) { - fUnion.fFields.fLengthAndFlags = kShortString; - setTo(that, srcStart, srcLength); -} - -// Replaceable base class clone() default implementation, does not clone -Replaceable * -Replaceable::clone() const { - return NULL; -} - -// UnicodeString overrides clone() with a real implementation -Replaceable * -UnicodeString::clone() const { - return new UnicodeString(*this); -} - -//======================================== -// array allocation -//======================================== - -namespace { - -const int32_t kGrowSize = 128; - -// The number of bytes for one int32_t reference counter and capacity UChars -// must fit into a 32-bit size_t (at least when on a 32-bit platform). -// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(), -// and round up to a multiple of 16 bytes. -// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5. -// (With more complicated checks we could go up to 0x7ffffffd without rounding up, -// but that does not seem worth it.) -const int32_t kMaxCapacity = 0x7ffffff5; - -int32_t getGrowCapacity(int32_t newLength) { - int32_t growSize = (newLength >> 2) + kGrowSize; - if(growSize <= (kMaxCapacity - newLength)) { - return newLength + growSize; - } else { - return kMaxCapacity; - } -} - -} // namespace - -UBool -UnicodeString::allocate(int32_t capacity) { - if(capacity <= US_STACKBUF_SIZE) { - fUnion.fFields.fLengthAndFlags = kShortString; - return TRUE; - } - if(capacity <= kMaxCapacity) { - ++capacity; // for the NUL - // Switch to size_t which is unsigned so that we can allocate up to 4GB. - // Reference counter + UChars. - size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR; - // Round up to a multiple of 16. - numBytes = (numBytes + 15) & ~15; - int32_t *array = (int32_t *) uprv_malloc(numBytes); - if(array != NULL) { - // set initial refCount and point behind the refCount - *array++ = 1; - numBytes -= sizeof(int32_t); - - // have fArray point to the first UChar - fUnion.fFields.fArray = (UChar *)array; - fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR); - fUnion.fFields.fLengthAndFlags = kLongString; - return TRUE; - } - } - fUnion.fFields.fLengthAndFlags = kIsBogus; - fUnion.fFields.fArray = 0; - fUnion.fFields.fCapacity = 0; - return FALSE; -} - -//======================================== -// Destructor -//======================================== - -#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS -static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1 -static u_atomic_int32_t beyondCount(0); - -U_CAPI void unistr_printLengths() { - int32_t i; - for(i = 0; i <= 59; ++i) { - printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]); - } - int32_t beyond = beyondCount; - for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) { - beyond += finalLengthCounts[i]; - } - printf(">59, %9d\n", beyond); -} -#endif - -UnicodeString::~UnicodeString() -{ -#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS - // Count lengths of strings at the end of their lifetime. - // Useful for discussion of a desirable stack buffer size. - // Count the contents length, not the optional NUL terminator nor further capacity. - // Ignore open-buffer strings and strings which alias external storage. - if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) { - if(hasShortLength()) { - umtx_atomic_inc(finalLengthCounts + getShortLength()); - } else { - umtx_atomic_inc(&beyondCount); - } - } -#endif - - releaseArray(); -} - -//======================================== -// Factory methods -//======================================== - -UnicodeString UnicodeString::fromUTF8(StringPiece utf8) { - UnicodeString result; - result.setToUTF8(utf8); - return result; -} - -UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { - UnicodeString result; - int32_t capacity; - // Most UTF-32 strings will be BMP-only and result in a same-length - // UTF-16 string. We overestimate the capacity just slightly, - // just in case there are a few supplementary characters. - if(length <= US_STACKBUF_SIZE) { - capacity = US_STACKBUF_SIZE; - } else { - capacity = length + (length >> 4) + 4; - } - do { - UChar *utf16 = result.getBuffer(capacity); - int32_t length16; - UErrorCode errorCode = U_ZERO_ERROR; - u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, - utf32, length, - 0xfffd, // Substitution character. - NULL, // Don't care about number of substitutions. - &errorCode); - result.releaseBuffer(length16); - if(errorCode == U_BUFFER_OVERFLOW_ERROR) { - capacity = length16 + 1; // +1 for the terminating NUL. - continue; - } else if(U_FAILURE(errorCode)) { - result.setToBogus(); - } - break; - } while(TRUE); - return result; -} - -//======================================== -// Assignment -//======================================== - -UnicodeString & -UnicodeString::operator=(const UnicodeString &src) { - return copyFrom(src); -} - -UnicodeString & -UnicodeString::fastCopyFrom(const UnicodeString &src) { - return copyFrom(src, TRUE); -} - -UnicodeString & -UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { - // if assigning to ourselves, do nothing - if(this == &src) { - return *this; - } - - // is the right side bogus? - if(src.isBogus()) { - setToBogus(); - return *this; - } - - // delete the current contents - releaseArray(); - - if(src.isEmpty()) { - // empty string - use the stack buffer - setToEmpty(); - return *this; - } - - // fLength>0 and not an "open" src.getBuffer(minCapacity) - fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags; - switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) { - case kShortString: - // short string using the stack buffer, do the same - uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer, - getShortLength() * U_SIZEOF_UCHAR); - break; - case kLongString: - // src uses a refCounted string buffer, use that buffer with refCount - // src is const, use a cast - we don't actually change it - ((UnicodeString &)src).addRef(); - // copy all fields, share the reference-counted buffer - fUnion.fFields.fArray = src.fUnion.fFields.fArray; - fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; - if(!hasShortLength()) { - fUnion.fFields.fLength = src.fUnion.fFields.fLength; - } - break; - case kReadonlyAlias: - if(fastCopy) { - // src is a readonly alias, do the same - // -> maintain the readonly alias as such - fUnion.fFields.fArray = src.fUnion.fFields.fArray; - fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; - if(!hasShortLength()) { - fUnion.fFields.fLength = src.fUnion.fFields.fLength; - } - break; - } - // else if(!fastCopy) fall through to case kWritableAlias - // -> allocate a new buffer and copy the contents - U_FALLTHROUGH; - case kWritableAlias: { - // src is a writable alias; we make a copy of that instead - int32_t srcLength = src.length(); - if(allocate(srcLength)) { - u_memcpy(getArrayStart(), src.getArrayStart(), srcLength); - setLength(srcLength); - break; - } - // if there is not enough memory, then fall through to setting to bogus - U_FALLTHROUGH; - } - default: - // if src is bogus, set ourselves to bogus - // do not call setToBogus() here because fArray and flags are not consistent here - fUnion.fFields.fLengthAndFlags = kIsBogus; - fUnion.fFields.fArray = 0; - fUnion.fFields.fCapacity = 0; - break; - } - - return *this; -} - -UnicodeString &UnicodeString::moveFrom(UnicodeString &src) U_NOEXCEPT { - // No explicit check for self move assignment, consistent with standard library. - // Self move assignment causes no crash nor leak but might make the object bogus. - releaseArray(); - copyFieldsFrom(src, TRUE); - return *this; -} - -// Same as moveFrom() except without memory management. -void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT { - int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags; - if(lengthAndFlags & kUsingStackBuffer) { - // Short string using the stack buffer, copy the contents. - // Check for self assignment to prevent "overlap in memcpy" warnings, - // although it should be harmless to copy a buffer to itself exactly. - if(this != &src) { - uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer, - getShortLength() * U_SIZEOF_UCHAR); - } - } else { - // In all other cases, copy all fields. - fUnion.fFields.fArray = src.fUnion.fFields.fArray; - fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; - if(!hasShortLength()) { - fUnion.fFields.fLength = src.fUnion.fFields.fLength; - } - if(setSrcToBogus) { - // Set src to bogus without releasing any memory. - src.fUnion.fFields.fLengthAndFlags = kIsBogus; - src.fUnion.fFields.fArray = NULL; - src.fUnion.fFields.fCapacity = 0; - } - } -} - -void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT { - UnicodeString temp; // Empty short string: Known not to need releaseArray(). - // Copy fields without resetting source values in between. - temp.copyFieldsFrom(*this, FALSE); - this->copyFieldsFrom(other, FALSE); - other.copyFieldsFrom(temp, FALSE); - // Set temp to an empty string so that other's memory is not released twice. - temp.fUnion.fFields.fLengthAndFlags = kShortString; -} - -//======================================== -// Miscellaneous operations -//======================================== - -UnicodeString UnicodeString::unescape() const { - UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity - if (result.isBogus()) { - return result; - } - const UChar *array = getBuffer(); - int32_t len = length(); - int32_t prev = 0; - for (int32_t i=0;;) { - if (i == len) { - result.append(array, prev, len - prev); - break; - } - if (array[i++] == 0x5C /*'\\'*/) { - result.append(array, prev, (i - 1) - prev); - UChar32 c = unescapeAt(i); // advances i - if (c < 0) { - result.remove(); // return empty string - break; // invalid escape sequence - } - result.append(c); - prev = i; - } - } - return result; -} - -UChar32 UnicodeString::unescapeAt(int32_t &offset) const { - return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); -} - -//======================================== -// Read-only implementation -//======================================== -UBool -UnicodeString::doEquals(const UnicodeString &text, int32_t len) const { - // Requires: this & text not bogus and have same lengths. - // Byte-wise comparison works for equality regardless of endianness. - return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0; -} - -int8_t -UnicodeString::doCompare( int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const -{ - // compare illegal string values - if(isBogus()) { - return -1; - } - - // pin indices to legal values - pinIndices(start, length); - - if(srcChars == NULL) { - // treat const UChar *srcChars==NULL as an empty string - return length == 0 ? 0 : 1; - } - - // get the correct pointer - const UChar *chars = getArrayStart(); - - chars += start; - srcChars += srcStart; - - int32_t minLength; - int8_t lengthResult; - - // get the srcLength if necessary - if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); - } - - // are we comparing different lengths? - if(length != srcLength) { - if(length < srcLength) { - minLength = length; - lengthResult = -1; - } else { - minLength = srcLength; - lengthResult = 1; - } - } else { - minLength = length; - lengthResult = 0; - } - - /* - * note that uprv_memcmp() returns an int but we return an int8_t; - * we need to take care not to truncate the result - - * one way to do this is to right-shift the value to - * move the sign bit into the lower 8 bits and making sure that this - * does not become 0 itself - */ - - if(minLength > 0 && chars != srcChars) { - int32_t result; - -# if U_IS_BIG_ENDIAN - // big-endian: byte comparison works - result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); - if(result != 0) { - return (int8_t)(result >> 15 | 1); - } -# else - // little-endian: compare UChar units - do { - result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); - if(result != 0) { - return (int8_t)(result >> 15 | 1); - } - } while(--minLength > 0); -# endif - } - return lengthResult; -} - -/* String compare in code point order - doCompare() compares in code unit order. */ -int8_t -UnicodeString::doCompareCodePointOrder(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const -{ - // compare illegal string values - // treat const UChar *srcChars==NULL as an empty string - if(isBogus()) { - return -1; - } - - // pin indices to legal values - pinIndices(start, length); - - if(srcChars == NULL) { - srcStart = srcLength = 0; - } - - int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE); - /* translate the 32-bit result into an 8-bit one */ - if(diff!=0) { - return (int8_t)(diff >> 15 | 1); - } else { - return 0; - } -} - -int32_t -UnicodeString::getLength() const { - return length(); -} - -UChar -UnicodeString::getCharAt(int32_t offset) const { - return charAt(offset); -} - -UChar32 -UnicodeString::getChar32At(int32_t offset) const { - return char32At(offset); -} - -UChar32 -UnicodeString::char32At(int32_t offset) const -{ - int32_t len = length(); - if((uint32_t)offset < (uint32_t)len) { - const UChar *array = getArrayStart(); - UChar32 c; - U16_GET(array, 0, offset, len, c); - return c; - } else { - return kInvalidUChar; - } -} - -int32_t -UnicodeString::getChar32Start(int32_t offset) const { - if((uint32_t)offset < (uint32_t)length()) { - const UChar *array = getArrayStart(); - U16_SET_CP_START(array, 0, offset); - return offset; - } else { - return 0; - } -} - -int32_t -UnicodeString::getChar32Limit(int32_t offset) const { - int32_t len = length(); - if((uint32_t)offset < (uint32_t)len) { - const UChar *array = getArrayStart(); - U16_SET_CP_LIMIT(array, 0, offset, len); - return offset; - } else { - return len; - } -} - -int32_t -UnicodeString::countChar32(int32_t start, int32_t length) const { - pinIndices(start, length); - // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL - return u_countChar32(getArrayStart()+start, length); -} - -UBool -UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { - pinIndices(start, length); - // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL - return u_strHasMoreChar32Than(getArrayStart()+start, length, number); -} - -int32_t -UnicodeString::moveIndex32(int32_t index, int32_t delta) const { - // pin index - int32_t len = length(); - if(index<0) { - index=0; - } else if(index>len) { - index=len; - } - - const UChar *array = getArrayStart(); - if(delta>0) { - U16_FWD_N(array, index, len, delta); - } else { - U16_BACK_N(array, 0, index, -delta); - } - - return index; -} - -void -UnicodeString::doExtract(int32_t start, - int32_t length, - UChar *dst, - int32_t dstStart) const -{ - // pin indices to legal values - pinIndices(start, length); - - // do not copy anything if we alias dst itself - const UChar *array = getArrayStart(); - if(array + start != dst + dstStart) { - us_arrayCopy(array, start, dst, dstStart, length); - } -} - -int32_t -UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, - UErrorCode &errorCode) const { - int32_t len = length(); - if(U_SUCCESS(errorCode)) { - if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else { - const UChar *array = getArrayStart(); - if(len>0 && len<=destCapacity && array!=dest) { - u_memcpy(dest, array, len); - } - return u_terminateUChars(dest, destCapacity, len, &errorCode); - } - } - - return len; -} - -int32_t -UnicodeString::extract(int32_t start, - int32_t length, - char *target, - int32_t targetCapacity, - enum EInvariant) const -{ - // if the arguments are illegal, then do nothing - if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { - return 0; - } - - // pin the indices to legal values - pinIndices(start, length); - - if(length <= targetCapacity) { - u_UCharsToChars(getArrayStart() + start, target, length); - } - UErrorCode status = U_ZERO_ERROR; - return u_terminateChars(target, targetCapacity, length, &status); -} - -UnicodeString -UnicodeString::tempSubString(int32_t start, int32_t len) const { - pinIndices(start, len); - const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer - if(array==NULL) { - array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string - len=-2; // bogus result string - } - return UnicodeString(FALSE, array + start, len); -} - -int32_t -UnicodeString::toUTF8(int32_t start, int32_t len, - char *target, int32_t capacity) const { - pinIndices(start, len); - int32_t length8; - UErrorCode errorCode = U_ZERO_ERROR; - u_strToUTF8WithSub(target, capacity, &length8, - getBuffer() + start, len, - 0xFFFD, // Standard substitution character. - NULL, // Don't care about number of substitutions. - &errorCode); - return length8; -} - -#if U_CHARSET_IS_UTF8 - -int32_t -UnicodeString::extract(int32_t start, int32_t len, - char *target, uint32_t dstSize) const { - // if the arguments are illegal, then do nothing - if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { - return 0; - } - return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); -} - -// else see unistr_cnv.cpp -#endif - -void -UnicodeString::extractBetween(int32_t start, - int32_t limit, - UnicodeString& target) const { - pinIndex(start); - pinIndex(limit); - doExtract(start, limit - start, target); -} - -// When converting from UTF-16 to UTF-8, the result will have at most 3 times -// as many bytes as the source has UChars. -// The "worst cases" are writing systems like Indic, Thai and CJK with -// 3:1 bytes:UChars. -void -UnicodeString::toUTF8(ByteSink &sink) const { - int32_t length16 = length(); - if(length16 != 0) { - char stackBuffer[1024]; - int32_t capacity = (int32_t)sizeof(stackBuffer); - UBool utf8IsOwned = FALSE; - char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, - 3*length16, - stackBuffer, capacity, - &capacity); - int32_t length8 = 0; - UErrorCode errorCode = U_ZERO_ERROR; - u_strToUTF8WithSub(utf8, capacity, &length8, - getBuffer(), length16, - 0xFFFD, // Standard substitution character. - NULL, // Don't care about number of substitutions. - &errorCode); - if(errorCode == U_BUFFER_OVERFLOW_ERROR) { - utf8 = (char *)uprv_malloc(length8); - if(utf8 != NULL) { - utf8IsOwned = TRUE; - errorCode = U_ZERO_ERROR; - u_strToUTF8WithSub(utf8, length8, &length8, - getBuffer(), length16, - 0xFFFD, // Standard substitution character. - NULL, // Don't care about number of substitutions. - &errorCode); - } else { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - } - if(U_SUCCESS(errorCode)) { - sink.Append(utf8, length8); - sink.Flush(); - } - if(utf8IsOwned) { - uprv_free(utf8); - } - } -} - -int32_t -UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { - int32_t length32=0; - if(U_SUCCESS(errorCode)) { - // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. - u_strToUTF32WithSub(utf32, capacity, &length32, - getBuffer(), length(), - 0xfffd, // Substitution character. - NULL, // Don't care about number of substitutions. - &errorCode); - } - return length32; -} - -int32_t -UnicodeString::indexOf(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const -{ - if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { - return -1; - } - - // UnicodeString does not find empty substrings - if(srcLength < 0 && srcChars[srcStart] == 0) { - return -1; - } - - // get the indices within bounds - pinIndices(start, length); - - // find the first occurrence of the substring - const UChar *array = getArrayStart(); - const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); - if(match == NULL) { - return -1; - } else { - return (int32_t)(match - array); - } -} - -int32_t -UnicodeString::doIndexOf(UChar c, - int32_t start, - int32_t length) const -{ - // pin indices - pinIndices(start, length); - - // find the first occurrence of c - const UChar *array = getArrayStart(); - const UChar *match = u_memchr(array + start, c, length); - if(match == NULL) { - return -1; - } else { - return (int32_t)(match - array); - } -} - -int32_t -UnicodeString::doIndexOf(UChar32 c, - int32_t start, - int32_t length) const { - // pin indices - pinIndices(start, length); - - // find the first occurrence of c - const UChar *array = getArrayStart(); - const UChar *match = u_memchr32(array + start, c, length); - if(match == NULL) { - return -1; - } else { - return (int32_t)(match - array); - } -} - -int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const -{ - if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { - return -1; - } - - // UnicodeString does not find empty substrings - if(srcLength < 0 && srcChars[srcStart] == 0) { - return -1; - } - - // get the indices within bounds - pinIndices(start, length); - - // find the last occurrence of the substring - const UChar *array = getArrayStart(); - const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); - if(match == NULL) { - return -1; - } else { - return (int32_t)(match - array); - } -} - -int32_t -UnicodeString::doLastIndexOf(UChar c, - int32_t start, - int32_t length) const -{ - if(isBogus()) { - return -1; - } - - // pin indices - pinIndices(start, length); - - // find the last occurrence of c - const UChar *array = getArrayStart(); - const UChar *match = u_memrchr(array + start, c, length); - if(match == NULL) { - return -1; - } else { - return (int32_t)(match - array); - } -} - -int32_t -UnicodeString::doLastIndexOf(UChar32 c, - int32_t start, - int32_t length) const { - // pin indices - pinIndices(start, length); - - // find the last occurrence of c - const UChar *array = getArrayStart(); - const UChar *match = u_memrchr32(array + start, c, length); - if(match == NULL) { - return -1; - } else { - return (int32_t)(match - array); - } -} - -//======================================== -// Write implementation -//======================================== - -UnicodeString& -UnicodeString::findAndReplace(int32_t start, - int32_t length, - const UnicodeString& oldText, - int32_t oldStart, - int32_t oldLength, - const UnicodeString& newText, - int32_t newStart, - int32_t newLength) -{ - if(isBogus() || oldText.isBogus() || newText.isBogus()) { - return *this; - } - - pinIndices(start, length); - oldText.pinIndices(oldStart, oldLength); - newText.pinIndices(newStart, newLength); - - if(oldLength == 0) { - return *this; - } - - while(length > 0 && length >= oldLength) { - int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); - if(pos < 0) { - // no more oldText's here: done - break; - } else { - // we found oldText, replace it by newText and go beyond it - replace(pos, oldLength, newText, newStart, newLength); - length -= pos + oldLength - start; - start = pos + newLength; - } - } - - return *this; -} - - -void -UnicodeString::setToBogus() -{ - releaseArray(); - - fUnion.fFields.fLengthAndFlags = kIsBogus; - fUnion.fFields.fArray = 0; - fUnion.fFields.fCapacity = 0; -} - -// turn a bogus string into an empty one -void -UnicodeString::unBogus() { - if(fUnion.fFields.fLengthAndFlags & kIsBogus) { - setToEmpty(); - } -} - -const char16_t * -UnicodeString::getTerminatedBuffer() { - if(!isWritable()) { - return nullptr; - } - UChar *array = getArrayStart(); - int32_t len = length(); - if(len < getCapacity()) { - if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) { - // If len= 0 && isTerminated && text[textLength] != 0) - ) { - setToBogus(); - return *this; - } - - releaseArray(); - - if(textLength == -1) { - // text is terminated, or else it would have failed the above test - textLength = u_strlen(text); - } - fUnion.fFields.fLengthAndFlags = kReadonlyAlias; - setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); - return *this; -} - -// setTo() analogous to the writable-aliasing constructor with the same signature -UnicodeString & -UnicodeString::setTo(UChar *buffer, - int32_t buffLength, - int32_t buffCapacity) { - if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { - // do not modify a string that has an "open" getBuffer(minCapacity) - return *this; - } - - if(buffer == NULL) { - // treat as an empty string, do not alias - releaseArray(); - setToEmpty(); - return *this; - } - - if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { - setToBogus(); - return *this; - } else if(buffLength == -1) { - // buffLength = u_strlen(buff); but do not look beyond buffCapacity - const UChar *p = buffer, *limit = buffer + buffCapacity; - while(p != limit && *p != 0) { - ++p; - } - buffLength = (int32_t)(p - buffer); - } - - releaseArray(); - - fUnion.fFields.fLengthAndFlags = kWritableAlias; - setArray(buffer, buffLength, buffCapacity); - return *this; -} - -UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) { - unBogus(); - int32_t length = utf8.length(); - int32_t capacity; - // The UTF-16 string will be at most as long as the UTF-8 string. - if(length <= US_STACKBUF_SIZE) { - capacity = US_STACKBUF_SIZE; - } else { - capacity = length + 1; // +1 for the terminating NUL. - } - UChar *utf16 = getBuffer(capacity); - int32_t length16; - UErrorCode errorCode = U_ZERO_ERROR; - u_strFromUTF8WithSub(utf16, getCapacity(), &length16, - utf8.data(), length, - 0xfffd, // Substitution character. - NULL, // Don't care about number of substitutions. - &errorCode); - releaseBuffer(length16); - if(U_FAILURE(errorCode)) { - setToBogus(); - } - return *this; -} - -UnicodeString& -UnicodeString::setCharAt(int32_t offset, - UChar c) -{ - int32_t len = length(); - if(cloneArrayIfNeeded() && len > 0) { - if(offset < 0) { - offset = 0; - } else if(offset >= len) { - offset = len - 1; - } - - getArrayStart()[offset] = c; - } - return *this; -} - -UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - UChar32 srcChar) { - UChar buffer[U16_MAX_LENGTH]; - int32_t count = 0; - UBool isError = FALSE; - U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); - // We test isError so that the compiler does not complain that we don't. - // If isError (srcChar is not a valid code point) then count==0 which means - // we remove the source segment rather than replacing it with srcChar. - return doReplace(start, _length, buffer, 0, isError ? 0 : count); -} - -UnicodeString& -UnicodeString::append(UChar32 srcChar) { - UChar buffer[U16_MAX_LENGTH]; - int32_t _length = 0; - UBool isError = FALSE; - U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); - // We test isError so that the compiler does not complain that we don't. - // If isError then _length==0 which turns the doAppend() into a no-op anyway. - return isError ? *this : doAppend(buffer, 0, _length); -} - -UnicodeString& -UnicodeString::doReplace( int32_t start, - int32_t length, - const UnicodeString& src, - int32_t srcStart, - int32_t srcLength) -{ - // pin the indices to legal values - src.pinIndices(srcStart, srcLength); - - // get the characters from src - // and replace the range in ourselves with them - return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); -} - -UnicodeString& -UnicodeString::doReplace(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) -{ - if(!isWritable()) { - return *this; - } - - int32_t oldLength = this->length(); - - // optimize (read-only alias).remove(0, start) and .remove(start, end) - if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) { - if(start == 0) { - // remove prefix by adjusting the array pointer - pinIndex(length); - fUnion.fFields.fArray += length; - fUnion.fFields.fCapacity -= length; - setLength(oldLength - length); - return *this; - } else { - pinIndex(start); - if(length >= (oldLength - start)) { - // remove suffix by reducing the length (like truncate()) - setLength(start); - fUnion.fFields.fCapacity = start; // not NUL-terminated any more - return *this; - } - } - } - - if(start == oldLength) { - return doAppend(srcChars, srcStart, srcLength); - } - - if(srcChars == 0) { - srcLength = 0; - } else { - // Perform all remaining operations relative to srcChars + srcStart. - // From this point forward, do not use srcStart. - srcChars += srcStart; - if (srcLength < 0) { - // get the srcLength if necessary - srcLength = u_strlen(srcChars); - } - } - - // pin the indices to legal values - pinIndices(start, length); - - // Calculate the size of the string after the replace. - // Avoid int32_t overflow. - int32_t newLength = oldLength - length; - if(srcLength > (INT32_MAX - newLength)) { - setToBogus(); - return *this; - } - newLength += srcLength; - - // Check for insertion into ourself - const UChar *oldArray = getArrayStart(); - if (isBufferWritable() && - oldArray < srcChars + srcLength && - srcChars < oldArray + oldLength) { - // Copy into a new UnicodeString and start over - UnicodeString copy(srcChars, srcLength); - if (copy.isBogus()) { - setToBogus(); - return *this; - } - return doReplace(start, length, copy.getArrayStart(), 0, srcLength); - } - - // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents; - // therefore we need to keep the current fArray - UChar oldStackBuffer[US_STACKBUF_SIZE]; - if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { - // copy the stack buffer contents because it will be overwritten with - // fUnion.fFields values - u_memcpy(oldStackBuffer, oldArray, oldLength); - oldArray = oldStackBuffer; - } - - // clone our array and allocate a bigger array if needed - int32_t *bufferToDelete = 0; - if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength), - FALSE, &bufferToDelete) - ) { - return *this; - } - - // now do the replace - - UChar *newArray = getArrayStart(); - if(newArray != oldArray) { - // if fArray changed, then we need to copy everything except what will change - us_arrayCopy(oldArray, 0, newArray, 0, start); - us_arrayCopy(oldArray, start + length, - newArray, start + srcLength, - oldLength - (start + length)); - } else if(length != srcLength) { - // fArray did not change; copy only the portion that isn't changing, leaving a hole - us_arrayCopy(oldArray, start + length, - newArray, start + srcLength, - oldLength - (start + length)); - } - - // now fill in the hole with the new string - us_arrayCopy(srcChars, 0, newArray, start, srcLength); - - setLength(newLength); - - // delayed delete in case srcChars == fArray when we started, and - // to keep oldArray alive for the above operations - if (bufferToDelete) { - uprv_free(bufferToDelete); - } - - return *this; -} - -// Versions of doReplace() only for append() variants. -// doReplace() and doAppend() optimize for different cases. - -UnicodeString& -UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) { - if(srcLength == 0) { - return *this; - } - - // pin the indices to legal values - src.pinIndices(srcStart, srcLength); - return doAppend(src.getArrayStart(), srcStart, srcLength); -} - -UnicodeString& -UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) { - if(!isWritable() || srcLength == 0 || srcChars == NULL) { - return *this; - } - - // Perform all remaining operations relative to srcChars + srcStart. - // From this point forward, do not use srcStart. - srcChars += srcStart; - - if(srcLength < 0) { - // get the srcLength if necessary - if((srcLength = u_strlen(srcChars)) == 0) { - return *this; - } - } - - int32_t oldLength = length(); - int32_t newLength = oldLength + srcLength; - - // Check for append onto ourself - const UChar* oldArray = getArrayStart(); - if (isBufferWritable() && - oldArray < srcChars + srcLength && - srcChars < oldArray + oldLength) { - // Copy into a new UnicodeString and start over - UnicodeString copy(srcChars, srcLength); - if (copy.isBogus()) { - setToBogus(); - return *this; - } - return doAppend(copy.getArrayStart(), 0, srcLength); - } - - // optimize append() onto a large-enough, owned string - if((newLength <= getCapacity() && isBufferWritable()) || - cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { - UChar *newArray = getArrayStart(); - // Do not copy characters when - // UChar *buffer=str.getAppendBuffer(...); - // is followed by - // str.append(buffer, length); - // or - // str.appendString(buffer, length) - // or similar. - if(srcChars != newArray + oldLength) { - us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength); - } - setLength(newLength); - } - return *this; -} - -/** - * Replaceable API - */ -void -UnicodeString::handleReplaceBetween(int32_t start, - int32_t limit, - const UnicodeString& text) { - replaceBetween(start, limit, text); -} - -/** - * Replaceable API - */ -void -UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { - if (limit <= start) { - return; // Nothing to do; avoid bogus malloc call - } - UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); - // Check to make sure text is not null. - if (text != NULL) { - extractBetween(start, limit, text, 0); - insert(dest, text, 0, limit - start); - uprv_free(text); - } -} - -/** - * Replaceable API - * - * NOTE: This is for the Replaceable class. There is no rep.cpp, - * so we implement this function here. - */ -UBool Replaceable::hasMetaData() const { - return TRUE; -} - -/** - * Replaceable API - */ -UBool UnicodeString::hasMetaData() const { - return FALSE; -} - -UnicodeString& -UnicodeString::doReverse(int32_t start, int32_t length) { - if(length <= 1 || !cloneArrayIfNeeded()) { - return *this; - } - - // pin the indices to legal values - pinIndices(start, length); - if(length <= 1) { // pinIndices() might have shrunk the length - return *this; - } - - UChar *left = getArrayStart() + start; - UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2) - UChar swap; - UBool hasSupplementary = FALSE; - - // Before the loop we know left=2. - do { - hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); - hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); - *right-- = swap; - } while(left < right); - // Make sure to test the middle code unit of an odd-length string. - // Redundant if the length is even. - hasSupplementary |= (UBool)U16_IS_LEAD(*left); - - /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ - if(hasSupplementary) { - UChar swap2; - - left = getArrayStart() + start; - right = left + length - 1; // -1 so that we can look at *(left+1) if left= targetLength || !cloneArrayIfNeeded(targetLength)) { - return FALSE; - } else { - // move contents up by padding width - UChar *array = getArrayStart(); - int32_t start = targetLength - oldLength; - us_arrayCopy(array, 0, array, start, oldLength); - - // fill in padding character - while(--start >= 0) { - array[start] = padChar; - } - setLength(targetLength); - return TRUE; - } -} - -UBool -UnicodeString::padTrailing(int32_t targetLength, - UChar padChar) -{ - int32_t oldLength = length(); - if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { - return FALSE; - } else { - // fill in padding character - UChar *array = getArrayStart(); - int32_t length = targetLength; - while(--length >= oldLength) { - array[length] = padChar; - } - setLength(targetLength); - return TRUE; - } -} - -//======================================== -// Hashing -//======================================== -int32_t -UnicodeString::doHashCode() const -{ - /* Delegate hash computation to uhash. This makes UnicodeString - * hashing consistent with UChar* hashing. */ - int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length()); - if (hashCode == kInvalidHashCode) { - hashCode = kEmptyHashCode; - } - return hashCode; -} - -//======================================== -// External Buffer -//======================================== - -char16_t * -UnicodeString::getBuffer(int32_t minCapacity) { - if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { - fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; - setZeroLength(); - return getArrayStart(); - } else { - return nullptr; - } -} - -void -UnicodeString::releaseBuffer(int32_t newLength) { - if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) { - // set the new fLength - int32_t capacity=getCapacity(); - if(newLength==-1) { - // the new length is the string length, capped by fCapacity - const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; - while(pcapacity) { - newLength=capacity; - } - setLength(newLength); - fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer; - } -} - -//======================================== -// Miscellaneous -//======================================== -UBool -UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, - int32_t growCapacity, - UBool doCopyArray, - int32_t **pBufferToDelete, - UBool forceClone) { - // default parameters need to be static, therefore - // the defaults are -1 to have convenience defaults - if(newCapacity == -1) { - newCapacity = getCapacity(); - } - - // while a getBuffer(minCapacity) is "open", - // prevent any modifications of the string by returning FALSE here - // if the string is bogus, then only an assignment or similar can revive it - if(!isWritable()) { - return FALSE; - } - - /* - * We need to make a copy of the array if - * the buffer is read-only, or - * the buffer is refCounted (shared), and refCount>1, or - * the buffer is too small. - * Return FALSE if memory could not be allocated. - */ - if(forceClone || - fUnion.fFields.fLengthAndFlags & kBufferIsReadonly || - (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) || - newCapacity > getCapacity() - ) { - // check growCapacity for default value and use of the stack buffer - if(growCapacity < 0) { - growCapacity = newCapacity; - } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { - growCapacity = US_STACKBUF_SIZE; - } - - // save old values - UChar oldStackBuffer[US_STACKBUF_SIZE]; - UChar *oldArray; - int32_t oldLength = length(); - int16_t flags = fUnion.fFields.fLengthAndFlags; - - if(flags&kUsingStackBuffer) { - U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */ - if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { - // copy the stack buffer contents because it will be overwritten with - // fUnion.fFields values - us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength); - oldArray = oldStackBuffer; - } else { - oldArray = NULL; // no need to copy from the stack buffer to itself - } - } else { - oldArray = fUnion.fFields.fArray; - U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */ - } - - // allocate a new array - if(allocate(growCapacity) || - (newCapacity < growCapacity && allocate(newCapacity)) - ) { - if(doCopyArray) { - // copy the contents - // do not copy more than what fits - it may be smaller than before - int32_t minLength = oldLength; - newCapacity = getCapacity(); - if(newCapacity < minLength) { - minLength = newCapacity; - } - if(oldArray != NULL) { - us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); - } - setLength(minLength); - } else { - setZeroLength(); - } - - // release the old array - if(flags & kRefCounted) { - // the array is refCounted; decrement and release if 0 - u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1); - if(umtx_atomic_dec(pRefCount) == 0) { - if(pBufferToDelete == 0) { - // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t - // is defined as volatile. (Volatile has useful non-standard behavior - // with this compiler.) - uprv_free((void *)pRefCount); - } else { - // the caller requested to delete it himself - *pBufferToDelete = (int32_t *)pRefCount; - } - } - } - } else { - // not enough memory for growCapacity and not even for the smaller newCapacity - // reset the old values for setToBogus() to release the array - if(!(flags&kUsingStackBuffer)) { - fUnion.fFields.fArray = oldArray; - } - fUnion.fFields.fLengthAndFlags = flags; - setToBogus(); - return FALSE; - } - } - return TRUE; -} - -// UnicodeStringAppendable ------------------------------------------------- *** - -UnicodeStringAppendable::~UnicodeStringAppendable() {} - -UBool -UnicodeStringAppendable::appendCodeUnit(UChar c) { - return str.doAppend(&c, 0, 1).isWritable(); -} - -UBool -UnicodeStringAppendable::appendCodePoint(UChar32 c) { - UChar buffer[U16_MAX_LENGTH]; - int32_t cLength = 0; - UBool isError = FALSE; - U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError); - return !isError && str.doAppend(buffer, 0, cLength).isWritable(); -} - -UBool -UnicodeStringAppendable::appendString(const UChar *s, int32_t length) { - return str.doAppend(s, 0, length).isWritable(); -} - -UBool -UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) { - return str.cloneArrayIfNeeded(str.length() + appendCapacity); -} - -UChar * -UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, - int32_t desiredCapacityHint, - UChar *scratch, int32_t scratchCapacity, - int32_t *resultCapacity) { - if(minCapacity < 1 || scratchCapacity < minCapacity) { - *resultCapacity = 0; - return NULL; - } - int32_t oldLength = str.length(); - if(minCapacity <= (kMaxCapacity - oldLength) && - desiredCapacityHint <= (kMaxCapacity - oldLength) && - str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { - *resultCapacity = str.getCapacity() - oldLength; - return str.getArrayStart() + oldLength; - } - *resultCapacity = scratchCapacity; - return scratch; -} - -U_NAMESPACE_END - -U_NAMESPACE_USE - -U_CAPI int32_t U_EXPORT2 -uhash_hashUnicodeString(const UElement key) { - const UnicodeString *str = (const UnicodeString*) key.pointer; - return (str == NULL) ? 0 : str->hashCode(); -} - -// Moved here from uhash_us.cpp so that using a UVector of UnicodeString* -// does not depend on hashtable code. -U_CAPI UBool U_EXPORT2 -uhash_compareUnicodeString(const UElement key1, const UElement key2) { - const UnicodeString *str1 = (const UnicodeString*) key1.pointer; - const UnicodeString *str2 = (const UnicodeString*) key2.pointer; - if (str1 == str2) { - return TRUE; - } - if (str1 == NULL || str2 == NULL) { - return FALSE; - } - return *str1 == *str2; -} - -#ifdef U_STATIC_IMPLEMENTATION -/* -This should never be called. It is defined here to make sure that the -virtual vector deleting destructor is defined within unistr.cpp. -The vector deleting destructor is already a part of UObject, -but defining it here makes sure that it is included with this object file. -This makes sure that static library dependencies are kept to a minimum. -*/ -static void uprv_UnicodeStringDummy(void) { - delete [] (new UnicodeString[2]); -} -#endif diff --git a/deps/node/deps/icu-small/source/common/unistr_case.cpp b/deps/node/deps/icu-small/source/common/unistr_case.cpp deleted file mode 100644 index 2138d60c..00000000 --- a/deps/node/deps/icu-small/source/common/unistr_case.cpp +++ /dev/null @@ -1,250 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unistr_case.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2004aug19 -* created by: Markus W. Scherer -* -* Case-mapping functions moved here from unistr.cpp -*/ - -#include "unicode/utypes.h" -#include "unicode/brkiter.h" -#include "unicode/casemap.h" -#include "unicode/edits.h" -#include "unicode/putil.h" -#include "cstring.h" -#include "cmemory.h" -#include "unicode/ustring.h" -#include "unicode/unistr.h" -#include "unicode/uchar.h" -#include "uassert.h" -#include "ucasemap_imp.h" -#include "uelement.h" - -U_NAMESPACE_BEGIN - -//======================================== -// Read-only implementation -//======================================== - -int8_t -UnicodeString::doCaseCompare(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const -{ - // compare illegal string values - // treat const UChar *srcChars==NULL as an empty string - if(isBogus()) { - return -1; - } - - // pin indices to legal values - pinIndices(start, length); - - if(srcChars == NULL) { - srcStart = srcLength = 0; - } - - // get the correct pointer - const UChar *chars = getArrayStart(); - - chars += start; - if(srcStart!=0) { - srcChars += srcStart; - } - - if(chars != srcChars) { - UErrorCode errorCode=U_ZERO_ERROR; - int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, - options|U_COMPARE_IGNORE_CASE, &errorCode); - if(result!=0) { - return (int8_t)(result >> 24 | 1); - } - } else { - // get the srcLength if necessary - if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); - } - if(length != srcLength) { - return (int8_t)((length - srcLength) >> 24 | 1); - } - } - return 0; -} - -//======================================== -// Write implementation -//======================================== - -UnicodeString & -UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UStringCaseMapper *stringCaseMapper) { - if(isEmpty() || !isWritable()) { - // nothing to do - return *this; - } - - UChar oldBuffer[2 * US_STACKBUF_SIZE]; - UChar *oldArray; - int32_t oldLength = length(); - int32_t newLength; - UBool writable = isBufferWritable(); - UErrorCode errorCode = U_ZERO_ERROR; - -#if !UCONFIG_NO_BREAK_ITERATION - // Read-only alias to the original string contents for the titlecasing BreakIterator. - // We cannot set the iterator simply to *this because *this is being modified. - UnicodeString oldString; -#endif - - // Try to avoid heap-allocating a new character array for this string. - if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) { - // Short string: Copy the contents into a temporary buffer and - // case-map back into the current array, or into the stack buffer. - UChar *buffer = getArrayStart(); - int32_t capacity; - oldArray = oldBuffer; - u_memcpy(oldBuffer, buffer, oldLength); - if (writable) { - capacity = getCapacity(); - } else { - // Switch from the read-only alias or shared heap buffer to the stack buffer. - if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) { - return *this; - } - U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer); - buffer = fUnion.fStackFields.fBuffer; - capacity = US_STACKBUF_SIZE; - } -#if !UCONFIG_NO_BREAK_ITERATION - if (iter != nullptr) { - oldString.setTo(FALSE, oldArray, oldLength); - iter->setText(oldString); - } -#endif - newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR - buffer, capacity, - oldArray, oldLength, NULL, errorCode); - if (U_SUCCESS(errorCode)) { - setLength(newLength); - return *this; - } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { - // common overflow handling below - } else { - setToBogus(); - return *this; - } - } else { - // Longer string or read-only buffer: - // Collect only changes and then apply them to this string. - // Case mapping often changes only small parts of a string, - // and often does not change its length. - oldArray = getArrayStart(); - Edits edits; - UChar replacementChars[200]; -#if !UCONFIG_NO_BREAK_ITERATION - if (iter != nullptr) { - oldString.setTo(FALSE, oldArray, oldLength); - iter->setText(oldString); - } -#endif - stringCaseMapper(caseLocale, options | U_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR - replacementChars, UPRV_LENGTHOF(replacementChars), - oldArray, oldLength, &edits, errorCode); - if (U_SUCCESS(errorCode)) { - // Grow the buffer at most once, not for multiple doReplace() calls. - newLength = oldLength + edits.lengthDelta(); - if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) { - return *this; - } - for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) { - doReplace(ei.destinationIndex(), ei.oldLength(), - replacementChars, ei.replacementIndex(), ei.newLength()); - } - if (U_FAILURE(errorCode)) { - setToBogus(); - } - return *this; - } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { - // common overflow handling below - newLength = oldLength + edits.lengthDelta(); - } else { - setToBogus(); - return *this; - } - } - - // Handle buffer overflow, newLength is known. - // We need to allocate a new buffer for the internal string case mapping function. - // This is very similar to how doReplace() keeps the old array pointer - // and deletes the old array itself after it is done. - // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. - int32_t *bufferToDelete = 0; - if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) { - return *this; - } - errorCode = U_ZERO_ERROR; - // No need to iter->setText() again: The case mapper restarts via iter->first(). - newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR - getArrayStart(), getCapacity(), - oldArray, oldLength, NULL, errorCode); - if (bufferToDelete) { - uprv_free(bufferToDelete); - } - if (U_SUCCESS(errorCode)) { - setLength(newLength); - } else { - setToBogus(); - } - return *this; -} - -UnicodeString & -UnicodeString::foldCase(uint32_t options) { - return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); -} - -U_NAMESPACE_END - -// Defined here to reduce dependencies on break iterator -U_CAPI int32_t U_EXPORT2 -uhash_hashCaselessUnicodeString(const UElement key) { - U_NAMESPACE_USE - const UnicodeString *str = (const UnicodeString*) key.pointer; - if (str == NULL) { - return 0; - } - // Inefficient; a better way would be to have a hash function in - // UnicodeString that does case folding on the fly. - UnicodeString copy(*str); - return copy.foldCase().hashCode(); -} - -// Defined here to reduce dependencies on break iterator -U_CAPI UBool U_EXPORT2 -uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { - U_NAMESPACE_USE - const UnicodeString *str1 = (const UnicodeString*) key1.pointer; - const UnicodeString *str2 = (const UnicodeString*) key2.pointer; - if (str1 == str2) { - return TRUE; - } - if (str1 == NULL || str2 == NULL) { - return FALSE; - } - return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; -} diff --git a/deps/node/deps/icu-small/source/common/unistr_case_locale.cpp b/deps/node/deps/icu-small/source/common/unistr_case_locale.cpp deleted file mode 100644 index f0f3048d..00000000 --- a/deps/node/deps/icu-small/source/common/unistr_case_locale.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: unistr_case_locale.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011may31 -* created by: Markus W. Scherer -* -* Locale-sensitive case mapping functions (ones that call uloc_getDefault()) -* were moved here to break dependency cycles among parts of the common library. -*/ - -#include "unicode/utypes.h" -#include "unicode/locid.h" -#include "unicode/ucasemap.h" -#include "unicode/unistr.h" -#include "ucasemap_imp.h" - -U_NAMESPACE_BEGIN - -//======================================== -// Write implementation -//======================================== - -UnicodeString & -UnicodeString::toLower() { - return caseMap(ustrcase_getCaseLocale(NULL), 0, - UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); -} - -UnicodeString & -UnicodeString::toLower(const Locale &locale) { - return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, - UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); -} - -UnicodeString & -UnicodeString::toUpper() { - return caseMap(ustrcase_getCaseLocale(NULL), 0, - UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); -} - -UnicodeString & -UnicodeString::toUpper(const Locale &locale) { - return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, - UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/unistr_cnv.cpp b/deps/node/deps/icu-small/source/common/unistr_cnv.cpp deleted file mode 100644 index 64d3c168..00000000 --- a/deps/node/deps/icu-small/source/common/unistr_cnv.cpp +++ /dev/null @@ -1,417 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unistr_cnv.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2004aug19 -* created by: Markus W. Scherer -* -* Character conversion functions moved here from unistr.cpp -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/putil.h" -#include "cstring.h" -#include "cmemory.h" -#include "unicode/ustring.h" -#include "unicode/unistr.h" -#include "unicode/ucnv.h" -#include "ucnv_imp.h" -#include "putilimp.h" -#include "ustr_cnv.h" -#include "ustr_imp.h" - -U_NAMESPACE_BEGIN - -//======================================== -// Constructors -//======================================== - -#if !U_CHARSET_IS_UTF8 - -UnicodeString::UnicodeString(const char *codepageData) { - fUnion.fFields.fLengthAndFlags = kShortString; - if(codepageData != 0) { - doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0); - } -} - -UnicodeString::UnicodeString(const char *codepageData, - int32_t dataLength) { - fUnion.fFields.fLengthAndFlags = kShortString; - if(codepageData != 0) { - doCodepageCreate(codepageData, dataLength, 0); - } -} - -// else see unistr.cpp -#endif - -UnicodeString::UnicodeString(const char *codepageData, - const char *codepage) { - fUnion.fFields.fLengthAndFlags = kShortString; - if(codepageData != 0) { - doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage); - } -} - -UnicodeString::UnicodeString(const char *codepageData, - int32_t dataLength, - const char *codepage) { - fUnion.fFields.fLengthAndFlags = kShortString; - if(codepageData != 0) { - doCodepageCreate(codepageData, dataLength, codepage); - } -} - -UnicodeString::UnicodeString(const char *src, int32_t srcLength, - UConverter *cnv, - UErrorCode &errorCode) { - fUnion.fFields.fLengthAndFlags = kShortString; - if(U_SUCCESS(errorCode)) { - // check arguments - if(src==NULL) { - // treat as an empty string, do nothing more - } else if(srcLength<-1) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else { - // get input length - if(srcLength==-1) { - srcLength=(int32_t)uprv_strlen(src); - } - if(srcLength>0) { - if(cnv!=0) { - // use the provided converter - ucnv_resetToUnicode(cnv); - doCodepageCreate(src, srcLength, cnv, errorCode); - } else { - // use the default converter - cnv=u_getDefaultConverter(&errorCode); - doCodepageCreate(src, srcLength, cnv, errorCode); - u_releaseDefaultConverter(cnv); - } - } - } - - if(U_FAILURE(errorCode)) { - setToBogus(); - } - } -} - -//======================================== -// Codeset conversion -//======================================== - -#if !U_CHARSET_IS_UTF8 - -int32_t -UnicodeString::extract(int32_t start, - int32_t length, - char *target, - uint32_t dstSize) const { - return extract(start, length, target, dstSize, 0); -} - -// else see unistr.cpp -#endif - -int32_t -UnicodeString::extract(int32_t start, - int32_t length, - char *target, - uint32_t dstSize, - const char *codepage) const -{ - // if the arguments are illegal, then do nothing - if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { - return 0; - } - - // pin the indices to legal values - pinIndices(start, length); - - // We need to cast dstSize to int32_t for all subsequent code. - // I don't know why the API was defined with uint32_t but we are stuck with it. - // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize - // as a limit in some functions, it may wrap around and yield a pointer - // that compares less-than target. - int32_t capacity; - if(dstSize < 0x7fffffff) { - // Assume that the capacity is real and a limit pointer won't wrap around. - capacity = (int32_t)dstSize; - } else { - // Pin the capacity so that a limit pointer does not wrap around. - char *targetLimit = (char *)U_MAX_PTR(target); - // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff - // greater than target and does not wrap around the top of the address space. - capacity = (int32_t)(targetLimit - target); - } - - // create the converter - UConverter *converter; - UErrorCode status = U_ZERO_ERROR; - - // just write the NUL if the string length is 0 - if(length == 0) { - return u_terminateChars(target, capacity, 0, &status); - } - - // if the codepage is the default, use our cache - // if it is an empty string, then use the "invariant character" conversion - if (codepage == 0) { - const char *defaultName = ucnv_getDefaultName(); - if(UCNV_FAST_IS_UTF8(defaultName)) { - return toUTF8(start, length, target, capacity); - } - converter = u_getDefaultConverter(&status); - } else if (*codepage == 0) { - // use the "invariant characters" conversion - int32_t destLength; - if(length <= capacity) { - destLength = length; - } else { - destLength = capacity; - } - u_UCharsToChars(getArrayStart() + start, target, destLength); - return u_terminateChars(target, capacity, length, &status); - } else { - converter = ucnv_open(codepage, &status); - } - - length = doExtract(start, length, target, capacity, converter, status); - - // close the converter - if (codepage == 0) { - u_releaseDefaultConverter(converter); - } else { - ucnv_close(converter); - } - - return length; -} - -int32_t -UnicodeString::extract(char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const -{ - if(U_FAILURE(errorCode)) { - return 0; - } - - if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - // nothing to do? - if(isEmpty()) { - return u_terminateChars(dest, destCapacity, 0, &errorCode); - } - - // get the converter - UBool isDefaultConverter; - if(cnv==0) { - isDefaultConverter=TRUE; - cnv=u_getDefaultConverter(&errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - } else { - isDefaultConverter=FALSE; - ucnv_resetFromUnicode(cnv); - } - - // convert - int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode); - - // release the converter - if(isDefaultConverter) { - u_releaseDefaultConverter(cnv); - } - - return len; -} - -int32_t -UnicodeString::doExtract(int32_t start, int32_t length, - char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const -{ - if(U_FAILURE(errorCode)) { - if(destCapacity!=0) { - *dest=0; - } - return 0; - } - - const UChar *src=getArrayStart()+start, *srcLimit=src+length; - char *originalDest=dest; - const char *destLimit; - - if(destCapacity==0) { - destLimit=dest=0; - } else if(destCapacity==-1) { - // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used. - destLimit=(char*)U_MAX_PTR(dest); - // for NUL-termination, translate into highest int32_t - destCapacity=0x7fffffff; - } else { - destLimit=dest+destCapacity; - } - - // perform the conversion - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); - length=(int32_t)(dest-originalDest); - - // if an overflow occurs, then get the preflighting length - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - char buffer[1024]; - - destLimit=buffer+sizeof(buffer); - do { - dest=buffer; - errorCode=U_ZERO_ERROR; - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); - length+=(int32_t)(dest-buffer); - } while(errorCode==U_BUFFER_OVERFLOW_ERROR); - } - - return u_terminateChars(originalDest, destCapacity, length, &errorCode); -} - -void -UnicodeString::doCodepageCreate(const char *codepageData, - int32_t dataLength, - const char *codepage) -{ - // if there's nothing to convert, do nothing - if(codepageData == 0 || dataLength == 0 || dataLength < -1) { - return; - } - if(dataLength == -1) { - dataLength = (int32_t)uprv_strlen(codepageData); - } - - UErrorCode status = U_ZERO_ERROR; - - // create the converter - // if the codepage is the default, use our cache - // if it is an empty string, then use the "invariant character" conversion - UConverter *converter; - if (codepage == 0) { - const char *defaultName = ucnv_getDefaultName(); - if(UCNV_FAST_IS_UTF8(defaultName)) { - setToUTF8(StringPiece(codepageData, dataLength)); - return; - } - converter = u_getDefaultConverter(&status); - } else if(*codepage == 0) { - // use the "invariant characters" conversion - if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) { - u_charsToUChars(codepageData, getArrayStart(), dataLength); - setLength(dataLength); - } else { - setToBogus(); - } - return; - } else { - converter = ucnv_open(codepage, &status); - } - - // if we failed, set the appropriate flags and return - if(U_FAILURE(status)) { - setToBogus(); - return; - } - - // perform the conversion - doCodepageCreate(codepageData, dataLength, converter, status); - if(U_FAILURE(status)) { - setToBogus(); - } - - // close the converter - if(codepage == 0) { - u_releaseDefaultConverter(converter); - } else { - ucnv_close(converter); - } -} - -void -UnicodeString::doCodepageCreate(const char *codepageData, - int32_t dataLength, - UConverter *converter, - UErrorCode &status) -{ - if(U_FAILURE(status)) { - return; - } - - // set up the conversion parameters - const char *mySource = codepageData; - const char *mySourceEnd = mySource + dataLength; - UChar *array, *myTarget; - - // estimate the size needed: - int32_t arraySize; - if(dataLength <= US_STACKBUF_SIZE) { - // try to use the stack buffer - arraySize = US_STACKBUF_SIZE; - } else { - // 1.25 UChar's per source byte should cover most cases - arraySize = dataLength + (dataLength >> 2); - } - - // we do not care about the current contents - UBool doCopyArray = FALSE; - for(;;) { - if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) { - setToBogus(); - break; - } - - // perform the conversion - array = getArrayStart(); - myTarget = array + length(); - ucnv_toUnicode(converter, &myTarget, array + getCapacity(), - &mySource, mySourceEnd, 0, TRUE, &status); - - // update the conversion parameters - setLength((int32_t)(myTarget - array)); - - // allocate more space and copy data, if needed - if(status == U_BUFFER_OVERFLOW_ERROR) { - // reset the error code - status = U_ZERO_ERROR; - - // keep the previous conversion results - doCopyArray = TRUE; - - // estimate the new size needed, larger than before - // try 2 UChar's per remaining source byte - arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource)); - } else { - break; - } - } -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unistr_props.cpp b/deps/node/deps/icu-small/source/common/unistr_props.cpp deleted file mode 100644 index 691bd085..00000000 --- a/deps/node/deps/icu-small/source/common/unistr_props.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unistr_props.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2004aug25 -* created by: Markus W. Scherer -* -* Character property dependent functions moved here from unistr.cpp -*/ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/unistr.h" -#include "unicode/utf16.h" - -U_NAMESPACE_BEGIN - -UnicodeString& -UnicodeString::trim() -{ - if(isBogus()) { - return *this; - } - - UChar *array = getArrayStart(); - UChar32 c; - int32_t oldLength = this->length(); - int32_t i = oldLength, length; - - // first cut off trailing white space - for(;;) { - length = i; - if(i <= 0) { - break; - } - U16_PREV(array, 0, i, c); - if(!(c == 0x20 || u_isWhitespace(c))) { - break; - } - } - if(length < oldLength) { - setLength(length); - } - - // find leading white space - int32_t start; - i = 0; - for(;;) { - start = i; - if(i >= length) { - break; - } - U16_NEXT(array, i, length, c); - if(!(c == 0x20 || u_isWhitespace(c))) { - break; - } - } - - // move string forward over leading white space - if(start > 0) { - doReplace(0, start, 0, 0, 0); - } - - return *this; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp b/deps/node/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp deleted file mode 100644 index 4969884b..00000000 --- a/deps/node/deps/icu-small/source/common/unistr_titlecase_brkiter.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: unistr_titlecase_brkiter.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2011may30 -* created by: Markus W. Scherer -* -* Titlecasing functions that are based on BreakIterator -* were moved here to break dependency cycles among parts of the common library. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/brkiter.h" -#include "unicode/locid.h" -#include "unicode/ucasemap.h" -#include "unicode/unistr.h" -#include "ucasemap_imp.h" - -U_NAMESPACE_BEGIN - -UnicodeString & -UnicodeString::toTitle(BreakIterator *iter) { - return toTitle(iter, Locale::getDefault(), 0); -} - -UnicodeString & -UnicodeString::toTitle(BreakIterator *iter, const Locale &locale) { - return toTitle(iter, locale, 0); -} - -UnicodeString & -UnicodeString::toTitle(BreakIterator *iter, const Locale &locale, uint32_t options) { - LocalPointer ownedIter; - UErrorCode errorCode = U_ZERO_ERROR; - iter = ustrcase_getTitleBreakIterator(&locale, "", options, iter, ownedIter, errorCode); - if (iter == nullptr) { - setToBogus(); - return *this; - } - caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, iter, ustrcase_internalToTitle); - return *this; -} - -U_NAMESPACE_END - -#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/deps/node/deps/icu-small/source/common/unistrappender.h b/deps/node/deps/icu-small/source/common/unistrappender.h deleted file mode 100644 index 134f3149..00000000 --- a/deps/node/deps/icu-small/source/common/unistrappender.h +++ /dev/null @@ -1,90 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 2015, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* -* File unistrappender.h -****************************************************************************** -*/ - -#ifndef __UNISTRAPPENDER_H__ -#define __UNISTRAPPENDER_H__ - -#include "unicode/unistr.h" -#include "unicode/uobject.h" -#include "unicode/utf16.h" -#include "unicode/utypes.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -/** - * An optimization for the slowness of calling UnicodeString::append() - * one character at a time in a loop. It stores appends in a buffer while - * never actually calling append on the unicode string unless the buffer - * fills up or is flushed. - * - * proper usage: - * { - * UnicodeStringAppender appender(astring); - * for (int32_t i = 0; i < 100; ++i) { - * appender.append((UChar) i); - * } - * // appender flushed automatically when it goes out of scope. - * } - */ -class UnicodeStringAppender : public UMemory { -public: - - /** - * dest is the UnicodeString being appended to. It must always - * exist while this instance exists. - */ - UnicodeStringAppender(UnicodeString &dest) : fDest(&dest), fIdx(0) { } - - inline void append(UChar x) { - if (fIdx == UPRV_LENGTHOF(fBuffer)) { - fDest->append(fBuffer, 0, fIdx); - fIdx = 0; - } - fBuffer[fIdx++] = x; - } - - inline void append(UChar32 x) { - if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { - fDest->append(fBuffer, 0, fIdx); - fIdx = 0; - } - U16_APPEND_UNSAFE(fBuffer, fIdx, x); - } - - /** - * Ensures that all appended characters have been written out to dest. - */ - inline void flush() { - if (fIdx) { - fDest->append(fBuffer, 0, fIdx); - } - fIdx = 0; - } - - /** - * flush the buffer when we go out of scope. - */ - ~UnicodeStringAppender() { - flush(); - } -private: - UnicodeString *fDest; - int32_t fIdx; - UChar fBuffer[32]; - UnicodeStringAppender(const UnicodeStringAppender &other); - UnicodeStringAppender &operator=(const UnicodeStringAppender &other); -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/unorm.cpp b/deps/node/deps/icu-small/source/common/unorm.cpp deleted file mode 100644 index 93f77e66..00000000 --- a/deps/node/deps/icu-small/source/common/unorm.cpp +++ /dev/null @@ -1,280 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (c) 1996-2014, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* File unorm.cpp -* -* Created by: Vladimir Weinstein 12052000 -* -* Modification history : -* -* Date Name Description -* 02/01/01 synwee Added normalization quickcheck enum and method. -* 02/12/01 synwee Commented out quickcheck util api has been approved -* Added private method for doing FCD checks -* 02/23/01 synwee Modified quickcheck and checkFCE to run through -* string for codepoints < 0x300 for the normalization -* mode NFC. -* 05/25/01+ Markus Scherer total rewrite, implement all normalization here -* instead of just wrappers around normlzr.cpp, -* load unorm.dat, support Unicode 3.1 with -* supplementary code points, etc. -* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/udata.h" -#include "unicode/ustring.h" -#include "unicode/uiter.h" -#include "unicode/unorm.h" -#include "unicode/unorm2.h" -#include "normalizer2impl.h" -#include "unormimp.h" -#include "uprops.h" -#include "ustr_imp.h" - -U_NAMESPACE_USE - -/* quick check functions ---------------------------------------------------- */ - -U_CAPI UNormalizationCheckResult U_EXPORT2 -unorm_quickCheck(const UChar *src, - int32_t srcLength, - UNormalizationMode mode, - UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); -} - -U_CAPI UNormalizationCheckResult U_EXPORT2 -unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - if(options&UNORM_UNICODE_3_2) { - FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); - return unorm2_quickCheck( - reinterpret_cast(static_cast(&fn2)), - src, srcLength, pErrorCode); - } else { - return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); - } -} - -U_CAPI UBool U_EXPORT2 -unorm_isNormalized(const UChar *src, int32_t srcLength, - UNormalizationMode mode, - UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); -} - -U_CAPI UBool U_EXPORT2 -unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - if(options&UNORM_UNICODE_3_2) { - FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); - return unorm2_isNormalized( - reinterpret_cast(static_cast(&fn2)), - src, srcLength, pErrorCode); - } else { - return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); - } -} - -/* normalize() API ---------------------------------------------------------- */ - -/** Public API for normalizing. */ -U_CAPI int32_t U_EXPORT2 -unorm_normalize(const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - if(options&UNORM_UNICODE_3_2) { - FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); - return unorm2_normalize( - reinterpret_cast(static_cast(&fn2)), - src, srcLength, dest, destCapacity, pErrorCode); - } else { - return unorm2_normalize((const UNormalizer2 *)n2, - src, srcLength, dest, destCapacity, pErrorCode); - } -} - - -/* iteration functions ------------------------------------------------------ */ - -static int32_t -_iterate(UCharIterator *src, UBool forward, - UChar *dest, int32_t destCapacity, - const Normalizer2 *n2, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(pNeededToNormalize!=NULL) { - *pNeededToNormalize=FALSE; - } - if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { - return u_terminateUChars(dest, destCapacity, 0, pErrorCode); - } - - UnicodeString buffer; - UChar32 c; - if(forward) { - /* get one character and ignore its properties */ - buffer.append(uiter_next32(src)); - /* get all following characters until we see a boundary */ - while((c=uiter_next32(src))>=0) { - if(n2->hasBoundaryBefore(c)) { - /* back out the latest movement to stop at the boundary */ - src->move(src, -U16_LENGTH(c), UITER_CURRENT); - break; - } else { - buffer.append(c); - } - } - } else { - while((c=uiter_previous32(src))>=0) { - /* always write this character to the front of the buffer */ - buffer.insert(0, c); - /* stop if this just-copied character is a boundary */ - if(n2->hasBoundaryBefore(c)) { - break; - } - } - } - - UnicodeString destString(dest, 0, destCapacity); - if(buffer.length()>0 && doNormalize) { - n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); - if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { - *pNeededToNormalize= destString!=buffer; - } - return destString.length(); - } else { - /* just copy the source characters */ - return buffer.extract(dest, destCapacity, *pErrorCode); - } -} - -static int32_t -unorm_iterate(UCharIterator *src, UBool forward, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - if(options&UNORM_UNICODE_3_2) { - const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - FilteredNormalizer2 fn2(*n2, *uni32); - return _iterate(src, forward, dest, destCapacity, - &fn2, doNormalize, pNeededToNormalize, pErrorCode); - } - return _iterate(src, forward, dest, destCapacity, - n2, doNormalize, pNeededToNormalize, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -unorm_previous(UCharIterator *src, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode) { - return unorm_iterate(src, FALSE, - dest, destCapacity, - mode, options, - doNormalize, pNeededToNormalize, - pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -unorm_next(UCharIterator *src, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode) { - return unorm_iterate(src, TRUE, - dest, destCapacity, - mode, options, - doNormalize, pNeededToNormalize, - pErrorCode); -} - -/* Concatenation of normalized strings -------------------------------------- */ - -static int32_t -_concatenate(const UChar *left, int32_t leftLength, - const UChar *right, int32_t rightLength, - UChar *dest, int32_t destCapacity, - const Normalizer2 *n2, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(destCapacity<0 || (dest==NULL && destCapacity>0) || - left==NULL || leftLength<-1 || right==NULL || rightLength<-1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* check for overlapping right and destination */ - if( dest!=NULL && - ((right>=dest && right<(dest+destCapacity)) || - (rightLength>0 && dest>=right && dest<(right+rightLength))) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* allow left==dest */ - UnicodeString destString; - if(left==dest) { - destString.setTo(dest, leftLength, destCapacity); - } else { - destString.setTo(dest, 0, destCapacity); - destString.append(left, leftLength); - } - return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode). - extract(dest, destCapacity, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -unorm_concatenate(const UChar *left, int32_t leftLength, - const UChar *right, int32_t rightLength, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); - if(options&UNORM_UNICODE_3_2) { - const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - FilteredNormalizer2 fn2(*n2, *uni32); - return _concatenate(left, leftLength, right, rightLength, - dest, destCapacity, &fn2, pErrorCode); - } - return _concatenate(left, leftLength, right, rightLength, - dest, destCapacity, n2, pErrorCode); -} - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/deps/node/deps/icu-small/source/common/unormcmp.cpp b/deps/node/deps/icu-small/source/common/unormcmp.cpp deleted file mode 100644 index 689b0b53..00000000 --- a/deps/node/deps/icu-small/source/common/unormcmp.cpp +++ /dev/null @@ -1,640 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2001-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unormcmp.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004sep13 -* created by: Markus W. Scherer -* -* unorm_compare() function moved here from unorm.cpp for better modularization. -* Depends on both normalization and case folding. -* Allows unorm.cpp to not depend on any character properties code. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/unorm.h" -#include "unicode/ustring.h" -#include "cmemory.h" -#include "normalizer2impl.h" -#include "ucase.h" -#include "uprops.h" -#include "ustr_imp.h" - -U_NAMESPACE_USE - -/* compare canonically equivalent ------------------------------------------- */ - -/* - * Compare two strings for canonical equivalence. - * Further options include case-insensitive comparison and - * code point order (as opposed to code unit order). - * - * In this function, canonical equivalence is optional as well. - * If canonical equivalence is tested, then both strings must fulfill - * the FCD check. - * - * Semantically, this is equivalent to - * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) - * where code point order, NFD and foldCase are all optional. - * - * String comparisons almost always yield results before processing both strings - * completely. - * They are generally more efficient working incrementally instead of - * performing the sub-processing (strlen, normalization, case-folding) - * on the entire strings first. - * - * It is also unnecessary to not normalize identical characters. - * - * This function works in principle as follows: - * - * loop { - * get one code unit c1 from s1 (-1 if end of source) - * get one code unit c2 from s2 (-1 if end of source) - * - * if(either string finished) { - * return result; - * } - * if(c1==c2) { - * continue; - * } - * - * // c1!=c2 - * try to decompose/case-fold c1/c2, and continue if one does; - * - * // still c1!=c2 and neither decomposes/case-folds, return result - * return c1-c2; - * } - * - * When a character decomposes, then the pointer for that source changes to - * the decomposition, pushing the previous pointer onto a stack. - * When the end of the decomposition is reached, then the code unit reader - * pops the previous source from the stack. - * (Same for case-folding.) - * - * This is complicated further by operating on variable-width UTF-16. - * The top part of the loop works on code units, while lookups for decomposition - * and case-folding need code points. - * Code points are assembled after the equality/end-of-source part. - * The source pointer is only advanced beyond all code units when the code point - * actually decomposes/case-folds. - * - * If we were on a trail surrogate unit when assembling a code point, - * and the code point decomposes/case-folds, then the decomposition/folding - * result must be compared with the part of the other string that corresponds to - * this string's lead surrogate. - * Since we only assemble a code point when hitting a trail unit when the - * preceding lead units were identical, we back up the other string by one unit - * in such a case. - * - * The optional code point order comparison at the end works with - * the same fix-up as the other code point order comparison functions. - * See ustring.c and the comment near the end of this function. - * - * Assumption: A decomposition or case-folding result string never contains - * a single surrogate. This is a safe assumption in the Unicode Standard. - * Therefore, we do not need to check for surrogate pairs across - * decomposition/case-folding boundaries. - * - * Further assumptions (see verifications tstnorm.cpp): - * The API function checks for FCD first, while the core function - * first case-folds and then decomposes. This requires that case-folding does not - * un-FCD any strings. - * - * The API function may also NFD the input and turn off decomposition. - * This requires that case-folding does not un-NFD strings either. - * - * TODO If any of the above two assumptions is violated, - * then this entire code must be re-thought. - * If this happens, then a simple solution is to case-fold both strings up front - * and to turn off UNORM_INPUT_IS_FCD. - * We already do this when not both strings are in FCD because makeFCD - * would be a partial NFD before the case folding, which does not work. - * Note that all of this is only a problem when case-folding _and_ - * canonical equivalence come together. - * (Comments in unorm_compare() are more up to date than this TODO.) - */ - -/* stack element for previous-level source/decomposition pointers */ -struct CmpEquivLevel { - const UChar *start, *s, *limit; -}; -typedef struct CmpEquivLevel CmpEquivLevel; - -/** - * Internal option for unorm_cmpEquivFold() for decomposing. - * If not set, just do strcasecmp(). - */ -#define _COMPARE_EQUIV 0x80000 - -/* internal function */ -static int32_t -unorm_cmpEquivFold(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode) { - const Normalizer2Impl *nfcImpl; - - /* current-level start/limit - s1/s2 as current */ - const UChar *start1, *start2, *limit1, *limit2; - - /* decomposition and case folding variables */ - const UChar *p; - int32_t length; - - /* stacks of previous-level start/current/limit */ - CmpEquivLevel stack1[2], stack2[2]; - - /* buffers for algorithmic decompositions */ - UChar decomp1[4], decomp2[4]; - - /* case folding buffers, only use current-level start/limit */ - UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; - - /* track which is the current level per string */ - int32_t level1, level2; - - /* current code units, and code points for lookups */ - UChar32 c1, c2, cp1, cp2; - - /* no argument error checking because this itself is not an API */ - - /* - * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set - * otherwise this function must behave exactly as uprv_strCompare() - * not checking for that here makes testing this function easier - */ - - /* normalization/properties data loaded? */ - if((options&_COMPARE_EQUIV)!=0) { - nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode); - } else { - nfcImpl=NULL; - } - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* initialize */ - start1=s1; - if(length1==-1) { - limit1=NULL; - } else { - limit1=s1+length1; - } - - start2=s2; - if(length2==-1) { - limit2=NULL; - } else { - limit2=s2+length2; - } - - level1=level2=0; - c1=c2=-1; - - /* comparison loop */ - for(;;) { - /* - * here a code unit value of -1 means "get another code unit" - * below it will mean "this source is finished" - */ - - if(c1<0) { - /* get next code unit from string 1, post-increment */ - for(;;) { - if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { - if(level1==0) { - c1=-1; - break; - } - } else { - ++s1; - break; - } - - /* reached end of level buffer, pop one level */ - do { - --level1; - start1=stack1[level1].start; /*Not uninitialized*/ - } while(start1==NULL); - s1=stack1[level1].s; /*Not uninitialized*/ - limit1=stack1[level1].limit; /*Not uninitialized*/ - } - } - - if(c2<0) { - /* get next code unit from string 2, post-increment */ - for(;;) { - if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { - if(level2==0) { - c2=-1; - break; - } - } else { - ++s2; - break; - } - - /* reached end of level buffer, pop one level */ - do { - --level2; - start2=stack2[level2].start; /*Not uninitialized*/ - } while(start2==NULL); - s2=stack2[level2].s; /*Not uninitialized*/ - limit2=stack2[level2].limit; /*Not uninitialized*/ - } - } - - /* - * compare c1 and c2 - * either variable c1, c2 is -1 only if the corresponding string is finished - */ - if(c1==c2) { - if(c1<0) { - return 0; /* c1==c2==-1 indicating end of strings */ - } - c1=c2=-1; /* make us fetch new code units */ - continue; - } else if(c1<0) { - return -1; /* string 1 ends before string 2 */ - } else if(c2<0) { - return 1; /* string 2 ends before string 1 */ - } - /* c1!=c2 && c1>=0 && c2>=0 */ - - /* get complete code points for c1, c2 for lookups if either is a surrogate */ - cp1=c1; - if(U_IS_SURROGATE(c1)) { - UChar c; - - if(U_IS_SURROGATE_LEAD(c1)) { - if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { - /* advance ++s1; only below if cp1 decomposes/case-folds */ - cp1=U16_GET_SUPPLEMENTARY(c1, c); - } - } else /* isTrail(c1) */ { - if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { - cp1=U16_GET_SUPPLEMENTARY(c, c1); - } - } - } - - cp2=c2; - if(U_IS_SURROGATE(c2)) { - UChar c; - - if(U_IS_SURROGATE_LEAD(c2)) { - if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { - /* advance ++s2; only below if cp2 decomposes/case-folds */ - cp2=U16_GET_SUPPLEMENTARY(c2, c); - } - } else /* isTrail(c2) */ { - if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { - cp2=U16_GET_SUPPLEMENTARY(c, c2); - } - } - } - - /* - * go down one level for each string - * continue with the main loop as soon as there is a real change - */ - - if( level1==0 && (options&U_COMPARE_IGNORE_CASE) && - (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0 - ) { - /* cp1 case-folds to the code point "length" or to p[length] */ - if(U_IS_SURROGATE(c1)) { - if(U_IS_SURROGATE_LEAD(c1)) { - /* advance beyond source surrogate pair if it case-folds */ - ++s1; - } else /* isTrail(c1) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s2; - c2=*(s2-1); - } - } - - /* push current level pointers */ - stack1[0].start=start1; - stack1[0].s=s1; - stack1[0].limit=limit1; - ++level1; - - /* copy the folding result to fold1[] */ - if(length<=UCASE_MAX_STRING_LENGTH) { - u_memcpy(fold1, p, length); - } else { - int32_t i=0; - U16_APPEND_UNSAFE(fold1, i, length); - length=i; - } - - /* set next level pointers to case folding */ - start1=s1=fold1; - limit1=fold1+length; - - /* get ready to read from decomposition, continue with loop */ - c1=-1; - continue; - } - - if( level2==0 && (options&U_COMPARE_IGNORE_CASE) && - (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0 - ) { - /* cp2 case-folds to the code point "length" or to p[length] */ - if(U_IS_SURROGATE(c2)) { - if(U_IS_SURROGATE_LEAD(c2)) { - /* advance beyond source surrogate pair if it case-folds */ - ++s2; - } else /* isTrail(c2) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s1; - c1=*(s1-1); - } - } - - /* push current level pointers */ - stack2[0].start=start2; - stack2[0].s=s2; - stack2[0].limit=limit2; - ++level2; - - /* copy the folding result to fold2[] */ - if(length<=UCASE_MAX_STRING_LENGTH) { - u_memcpy(fold2, p, length); - } else { - int32_t i=0; - U16_APPEND_UNSAFE(fold2, i, length); - length=i; - } - - /* set next level pointers to case folding */ - start2=s2=fold2; - limit2=fold2+length; - - /* get ready to read from decomposition, continue with loop */ - c2=-1; - continue; - } - - if( level1<2 && (options&_COMPARE_EQUIV) && - 0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length)) - ) { - /* cp1 decomposes into p[length] */ - if(U_IS_SURROGATE(c1)) { - if(U_IS_SURROGATE_LEAD(c1)) { - /* advance beyond source surrogate pair if it decomposes */ - ++s1; - } else /* isTrail(c1) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s2; - c2=*(s2-1); - } - } - - /* push current level pointers */ - stack1[level1].start=start1; - stack1[level1].s=s1; - stack1[level1].limit=limit1; - ++level1; - - /* set empty intermediate level if skipped */ - if(level1<2) { - stack1[level1++].start=NULL; - } - - /* set next level pointers to decomposition */ - start1=s1=p; - limit1=p+length; - - /* get ready to read from decomposition, continue with loop */ - c1=-1; - continue; - } - - if( level2<2 && (options&_COMPARE_EQUIV) && - 0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length)) - ) { - /* cp2 decomposes into p[length] */ - if(U_IS_SURROGATE(c2)) { - if(U_IS_SURROGATE_LEAD(c2)) { - /* advance beyond source surrogate pair if it decomposes */ - ++s2; - } else /* isTrail(c2) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s1; - c1=*(s1-1); - } - } - - /* push current level pointers */ - stack2[level2].start=start2; - stack2[level2].s=s2; - stack2[level2].limit=limit2; - ++level2; - - /* set empty intermediate level if skipped */ - if(level2<2) { - stack2[level2++].start=NULL; - } - - /* set next level pointers to decomposition */ - start2=s2=p; - limit2=p+length; - - /* get ready to read from decomposition, continue with loop */ - c2=-1; - continue; - } - - /* - * no decomposition/case folding, max level for both sides: - * return difference result - * - * code point order comparison must not just return cp1-cp2 - * because when single surrogates are present then the surrogate pairs - * that formed cp1 and cp2 may be from different string indexes - * - * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units - * c1=d800 cp1=10001 c2=dc00 cp2=10000 - * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } - * - * therefore, use same fix-up as in ustring.c/uprv_strCompare() - * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ - * so we have slightly different pointer/start/limit comparisons here - */ - - if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { - /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ - if( - (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || - (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) - ) { - /* part of a surrogate pair, leave >=d800 */ - } else { - /* BMP code point - may be surrogate code point - make =d800 */ - } else { - /* BMP code point - may be surrogate code point - make spanQuickCheckYes(str, *pErrorCode); - if (U_FAILURE(*pErrorCode)) { - return FALSE; - } - /* - * ICU 2.4 had a further optimization: - * If both strings were not in FCD, then they were both NFD'ed, - * and the _COMPARE_EQUIV option was turned off. - * It is not entirely clear that this is valid with the current - * definition of the canonical caseless match. - * Therefore, ICU 2.6 removes that optimization. - */ - if(spanQCYesnormalizeSecondAndAppend(normalized, unnormalized, *pErrorCode); - if (U_SUCCESS(*pErrorCode)) { - return TRUE; - } - } - return FALSE; -} - -U_CAPI int32_t U_EXPORT2 -unorm_compare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode) { - /* argument checking */ - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(s1==0 || length1<-1 || s2==0 || length2<-1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - UnicodeString fcd1, fcd2; - int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); - options|=_COMPARE_EQUIV; - - /* - * UAX #21 Case Mappings, as fixed for Unicode version 4 - * (see Jitterbug 2021), defines a canonical caseless match as - * - * A string X is a canonical caseless match - * for a string Y if and only if - * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y))) - * - * For better performance, we check for FCD (or let the caller tell us that - * both strings are in FCD) for the inner normalization. - * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that - * case-folding preserves the FCD-ness of a string. - * The outer normalization is then only performed by unorm_cmpEquivFold() - * when there is a difference. - * - * Exception: When using the Turkic case-folding option, we do perform - * full NFD first. This is because in the Turkic case precomposed characters - * with 0049 capital I or 0069 small i fold differently whether they - * are first decomposed or not, so an FCD check - a check only for - * canonical order - is not sufficient. - */ - if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) { - const Normalizer2 *n2; - if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) { - n2=Normalizer2::getNFDInstance(*pErrorCode); - } else { - n2=Normalizer2Factory::getFCDInstance(*pErrorCode); - } - if (U_FAILURE(*pErrorCode)) { - return 0; - } - - if(normOptions&UNORM_UNICODE_3_2) { - const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode); - FilteredNormalizer2 fn2(*n2, *uni32); - if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) { - s1=fcd1.getBuffer(); - length1=fcd1.length(); - } - if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) { - s2=fcd2.getBuffer(); - length2=fcd2.length(); - } - } else { - if(_normalize(n2, s1, length1, fcd1, pErrorCode)) { - s1=fcd1.getBuffer(); - length1=fcd1.length(); - } - if(_normalize(n2, s2, length2, fcd2, pErrorCode)) { - s2=fcd2.getBuffer(); - length2=fcd2.length(); - } - } - } - - if(U_SUCCESS(*pErrorCode)) { - return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode); - } else { - return 0; - } -} - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/deps/node/deps/icu-small/source/common/unormimp.h b/deps/node/deps/icu-small/source/common/unormimp.h deleted file mode 100644 index 7f280551..00000000 --- a/deps/node/deps/icu-small/source/common/unormimp.h +++ /dev/null @@ -1,488 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2001-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unormimp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001may25 -* created by: Markus W. Scherer -*/ - -#ifndef __UNORMIMP_H__ -#define __UNORMIMP_H__ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "udataswp.h" - -/* - * The 2001-2010 implementation of the normalization code loads its data from - * unorm.icu, which is generated with the gennorm tool. - * The format of that file is described at the end of this file. - */ - -/* norm32 value constants */ -enum { - /* quick check flags 0..3 set mean "no" for their forms */ - _NORM_QC_NFC=0x11, /* no|maybe */ - _NORM_QC_NFKC=0x22, /* no|maybe */ - _NORM_QC_NFD=4, /* no */ - _NORM_QC_NFKD=8, /* no */ - - _NORM_QC_ANY_NO=0xf, - - /* quick check flags 4..5 mean "maybe" for their forms; test flags>=_NORM_QC_MAYBE */ - _NORM_QC_MAYBE=0x10, - _NORM_QC_ANY_MAYBE=0x30, - - _NORM_QC_MASK=0x3f, - - _NORM_COMBINES_FWD=0x40, - _NORM_COMBINES_BACK=0x80, - _NORM_COMBINES_ANY=0xc0, - - _NORM_CC_SHIFT=8, /* UnicodeData.txt combining class in bits 15..8 */ - _NORM_CC_MASK=0xff00, - - _NORM_EXTRA_SHIFT=16, /* 16 bits for the index to UChars and other extra data */ - _NORM_EXTRA_INDEX_TOP=0xfc00, /* start of surrogate specials after shift */ - - _NORM_EXTRA_SURROGATE_MASK=0x3ff, - _NORM_EXTRA_SURROGATE_TOP=0x3f0, /* hangul etc. */ - - _NORM_EXTRA_HANGUL=_NORM_EXTRA_SURROGATE_TOP, - _NORM_EXTRA_JAMO_L, - _NORM_EXTRA_JAMO_V, - _NORM_EXTRA_JAMO_T -}; - -/* norm32 value constants using >16 bits */ -#define _NORM_MIN_SPECIAL 0xfc000000 -#define _NORM_SURROGATES_TOP 0xfff00000 -#define _NORM_MIN_HANGUL 0xfff00000 -#define _NORM_MIN_JAMO_V 0xfff20000 -#define _NORM_JAMO_V_TOP 0xfff30000 - -/* value constants for auxTrie */ -enum { - _NORM_AUX_COMP_EX_SHIFT=10, - _NORM_AUX_UNSAFE_SHIFT=11, - _NORM_AUX_NFC_SKIPPABLE_F_SHIFT=12 -}; - -#define _NORM_AUX_MAX_FNC ((int32_t)1<<_NORM_AUX_COMP_EX_SHIFT) - -#define _NORM_AUX_FNC_MASK (uint32_t)(_NORM_AUX_MAX_FNC-1) -#define _NORM_AUX_COMP_EX_MASK ((uint32_t)1<<_NORM_AUX_COMP_EX_SHIFT) -#define _NORM_AUX_UNSAFE_MASK ((uint32_t)1<<_NORM_AUX_UNSAFE_SHIFT) -#define _NORM_AUX_NFC_SKIP_F_MASK ((uint32_t)1<<_NORM_AUX_NFC_SKIPPABLE_F_SHIFT) - -/* canonStartSets[0..31] contains indexes for what is in the array */ -enum { - _NORM_SET_INDEX_CANON_SETS_LENGTH, /* number of uint16_t in canonical starter sets */ - _NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH, /* number of uint16_t in the BMP search table (contains pairs) */ - _NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH,/* number of uint16_t in the supplementary search table (contains triplets) */ - - /* from formatVersion 2.3: */ - _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET, /* uint16_t offset from canonStartSets[0] to the - exclusion set for CJK compatibility characters */ - _NORM_SET_INDEX_NX_UNICODE32_OFFSET, /* uint16_t offset from canonStartSets[0] to the - exclusion set for Unicode 3.2 characters */ - _NORM_SET_INDEX_NX_RESERVED_OFFSET, /* uint16_t offset from canonStartSets[0] to the - end of the previous exclusion set */ - - _NORM_SET_INDEX_TOP=32 /* changing this requires a new formatVersion */ -}; - -/* more constants for canonical starter sets */ - -/* 14 bit indexes to canonical USerializedSets */ -#define _NORM_MAX_CANON_SETS 0x4000 - -/* single-code point BMP sets are encoded directly in the search table except if result=0x4000..0x7fff */ -#define _NORM_CANON_SET_BMP_MASK 0xc000 -#define _NORM_CANON_SET_BMP_IS_INDEX 0x4000 - -/* indexes[] value names */ -enum { - _NORM_INDEX_TRIE_SIZE, /* number of bytes in normalization trie */ - _NORM_INDEX_UCHAR_COUNT, /* number of UChars in extra data */ - - _NORM_INDEX_COMBINE_DATA_COUNT, /* number of uint16_t words for combining data */ - _NORM_INDEX_COMBINE_FWD_COUNT, /* number of code points that combine forward */ - _NORM_INDEX_COMBINE_BOTH_COUNT, /* number of code points that combine forward and backward */ - _NORM_INDEX_COMBINE_BACK_COUNT, /* number of code points that combine backward */ - - _NORM_INDEX_MIN_NFC_NO_MAYBE, /* first code point with quick check NFC NO/MAYBE */ - _NORM_INDEX_MIN_NFKC_NO_MAYBE, /* first code point with quick check NFKC NO/MAYBE */ - _NORM_INDEX_MIN_NFD_NO_MAYBE, /* first code point with quick check NFD NO/MAYBE */ - _NORM_INDEX_MIN_NFKD_NO_MAYBE, /* first code point with quick check NFKD NO/MAYBE */ - - _NORM_INDEX_FCD_TRIE_SIZE, /* number of bytes in FCD trie */ - - _NORM_INDEX_AUX_TRIE_SIZE, /* number of bytes in the auxiliary trie */ - _NORM_INDEX_CANON_SET_COUNT, /* number of uint16_t in the array of serialized USet */ - - _NORM_INDEX_TOP=32 /* changing this requires a new formatVersion */ -}; - -enum { - /* FCD check: everything below this code point is known to have a 0 lead combining class */ - _NORM_MIN_WITH_LEAD_CC=0x300 -}; - -enum { - /** - * Bit 7 of the length byte for a decomposition string in extra data is - * a flag indicating whether the decomposition string is - * preceded by a 16-bit word with the leading and trailing cc - * of the decomposition (like for A-umlaut); - * if not, then both cc's are zero (like for compatibility ideographs). - */ - _NORM_DECOMP_FLAG_LENGTH_HAS_CC=0x80, - /** - * Bits 6..0 of the length byte contain the actual length. - */ - _NORM_DECOMP_LENGTH_MASK=0x7f -}; - -/** Constants for options flags for normalization. */ -enum { - /** Options bit 0, do not decompose Hangul syllables. */ - UNORM_NX_HANGUL=1, - /** Options bit 1, do not decompose CJK compatibility characters. */ - UNORM_NX_CJK_COMPAT=2 -}; - -/** - * Description of the format of unorm.icu version 2.3. - * - * Main change from version 1 to version 2: - * Use of new, common UTrie instead of normalization-specific tries. - * Change to version 2.1: add third/auxiliary trie with associated data. - * Change to version 2.2: add skippable (f) flag data (_NORM_AUX_NFC_SKIP_F_MASK). - * Change to version 2.3: add serialized sets for normalization exclusions - * stored inside canonStartSets[] - * - * For more details of how to use the data structures see the code - * in unorm.cpp (runtime normalization code) and - * in gennorm.c and gennorm/store.c (build-time data generation). - * - * For the serialized format of UTrie see utrie.c/UTrieHeader. - * - * - Overall partition - * - * unorm.dat customarily begins with a UDataInfo structure, see udata.h and .c. - * After that there are the following structures: - * - * int32_t indexes[_NORM_INDEX_TOP]; -- _NORM_INDEX_TOP=32, see enum in this file - * - * UTrie normTrie; -- size in bytes=indexes[_NORM_INDEX_TRIE_SIZE] - * - * uint16_t extraData[extraDataTop]; -- extraDataTop=indexes[_NORM_INDEX_UCHAR_COUNT] - * extraData[0] contains the number of units for - * FC_NFKC_Closure (formatVersion>=2.1) - * - * uint16_t combiningTable[combiningTableTop]; -- combiningTableTop=indexes[_NORM_INDEX_COMBINE_DATA_COUNT] - * combiningTableTop may include one 16-bit padding unit - * to make sure that fcdTrie is 32-bit-aligned - * - * UTrie fcdTrie; -- size in bytes=indexes[_NORM_INDEX_FCD_TRIE_SIZE] - * - * UTrie auxTrie; -- size in bytes=indexes[_NORM_INDEX_AUX_TRIE_SIZE] - * - * uint16_t canonStartSets[canonStartSetsTop] -- canonStartSetsTop=indexes[_NORM_INDEX_CANON_SET_COUNT] - * serialized USets and binary search tables, see below - * - * - * The indexes array contains lengths and sizes of the following arrays and structures - * as well as the following values: - * indexes[_NORM_INDEX_COMBINE_FWD_COUNT]=combineFwdTop - * -- one more than the highest combining index computed for forward-only-combining characters - * indexes[_NORM_INDEX_COMBINE_BOTH_COUNT]=combineBothTop-combineFwdTop - * -- number of combining indexes computed for both-ways-combining characters - * indexes[_NORM_INDEX_COMBINE_BACK_COUNT]=combineBackTop-combineBothTop - * -- number of combining indexes computed for backward-only-combining characters - * - * indexes[_NORM_INDEX_MIN_NF*_NO_MAYBE] (where *={ C, D, KC, KD }) - * -- first code point with a quick check NF* value of NO/MAYBE - * - * - * - Tries - * - * The main structures are two UTrie tables ("compact arrays"), - * each with one index array and one data array. - * See utrie.h and utrie.c. - * - * - * - Tries in unorm.dat - * - * The first trie (normTrie above) - * provides data for the NF* quick checks and normalization. - * The second trie (fcdTrie above) provides data just for FCD checks. - * - * - * - norm32 data words from the first trie - * - * The norm32Table contains one 32-bit word "norm32" per code point. - * It contains the following bit fields: - * 31..16 extra data index, _NORM_EXTRA_SHIFT is used to shift this field down - * if this index is <_NORM_EXTRA_INDEX_TOP then it is an index into - * extraData[] where variable-length normalization data for this - * code point is found - * if this index is <_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP - * then this is a norm32 for a leading surrogate, and the index - * value is used together with the following trailing surrogate - * code unit in the second trie access - * if this index is >=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP - * then this is a norm32 for a "special" character, - * i.e., the character is a Hangul syllable or a Jamo - * see _NORM_EXTRA_HANGUL etc. - * generally, instead of extracting this index from the norm32 and - * comparing it with the above constants, - * the normalization code compares the entire norm32 value - * with _NORM_MIN_SPECIAL, _NORM_SURROGATES_TOP, _NORM_MIN_HANGUL etc. - * - * 15..8 combining class (cc) according to UnicodeData.txt - * - * 7..6 _NORM_COMBINES_ANY flags, used in composition to see if a character - * combines with any following or preceding character(s) - * at all - * 7 _NORM_COMBINES_BACK - * 6 _NORM_COMBINES_FWD - * - * 5..0 quick check flags, set for "no" or "maybe", with separate flags for - * each normalization form - * the higher bits are "maybe" flags; for NF*D there are no such flags - * the lower bits are "no" flags for all forms, in the same order - * as the "maybe" flags, - * which is (MSB to LSB): NFKD NFD NFKC NFC - * 5..4 _NORM_QC_ANY_MAYBE - * 3..0 _NORM_QC_ANY_NO - * see further related constants - * - * - * - Extra data per code point - * - * "Extra data" is referenced by the index in norm32. - * It is variable-length data. It is only present, and only those parts - * of it are, as needed for a given character. - * The norm32 extra data index is added to the beginning of extraData[] - * to get to a vector of 16-bit words with data at the following offsets: - * - * [-1] Combining index for composition. - * Stored only if norm32&_NORM_COMBINES_ANY . - * [0] Lengths of the canonical and compatibility decomposition strings. - * Stored only if there are decompositions, i.e., - * if norm32&(_NORM_QC_NFD|_NORM_QC_NFKD) - * High byte: length of NFKD, or 0 if none - * Low byte: length of NFD, or 0 if none - * Each length byte also has another flag: - * Bit 7 of a length byte is set if there are non-zero - * combining classes (cc's) associated with the respective - * decomposition. If this flag is set, then the decomposition - * is preceded by a 16-bit word that contains the - * leading and trailing cc's. - * Bits 6..0 of a length byte are the length of the - * decomposition string, not counting the cc word. - * [1..n] NFD - * [n+1..] NFKD - * - * Each of the two decompositions consists of up to two parts: - * - The 16-bit words with the leading and trailing cc's. - * This is only stored if bit 7 of the corresponding length byte - * is set. In this case, at least one of the cc's is not zero. - * High byte: leading cc==cc of the first code point in the decomposition string - * Low byte: trailing cc==cc of the last code point in the decomposition string - * - The decomposition string in UTF-16, with length code units. - * - * - * - Combining indexes and combiningTable[] - * - * Combining indexes are stored at the [-1] offset of the extra data - * if the character combines forward or backward with any other characters. - * They are used for (re)composition in NF*C. - * Values of combining indexes are arranged according to whether a character - * combines forward, backward, or both ways: - * forward-only < both ways < backward-only - * - * The index values for forward-only and both-ways combining characters - * are indexes into the combiningTable[]. - * The index values for backward-only combining characters are simply - * incremented from the preceding index values to be unique. - * - * In the combiningTable[], a variable-length list - * of variable-length (back-index, code point) pair entries is stored - * for each forward-combining character. - * - * These back-indexes are the combining indexes of both-ways or backward-only - * combining characters that the forward-combining character combines with. - * - * Each list is sorted in ascending order of back-indexes. - * Each list is terminated with the last back-index having bit 15 set. - * - * Each pair (back-index, code point) takes up either 2 or 3 - * 16-bit words. - * The first word of a list entry is the back-index, with its bit 15 set if - * this is the last pair in the list. - * - * The second word contains flags in bits 15..13 that determine - * if there is a third word and how the combined character is encoded: - * 15 set if there is a third word in this list entry - * 14 set if the result is a supplementary character - * 13 set if the result itself combines forward - * - * According to these bits 15..14 of the second word, - * the result character is encoded as follows: - * 00 or 01 The result is <=0x1fff and stored in bits 12..0 of - * the second word. - * 10 The result is 0x2000..0xffff and stored in the third word. - * Bits 12..0 of the second word are not used. - * 11 The result is a supplementary character. - * Bits 9..0 of the leading surrogate are in bits 9..0 of - * the second word. - * Add 0xd800 to these bits to get the complete surrogate. - * Bits 12..10 of the second word are not used. - * The trailing surrogate is stored in the third word. - * - * - * - FCD trie - * - * The FCD trie is very simple. - * It is a folded trie with 16-bit data words. - * In each word, the high byte contains the leading cc of the character, - * and the low byte contains the trailing cc of the character. - * These cc's are the cc's of the first and last code points in the - * canonical decomposition of the character. - * - * Since all 16 bits are used for cc's, lead surrogates must be tested - * by checking the code unit instead of the trie data. - * This is done only if the 16-bit data word is not zero. - * If the code unit is a leading surrogate and the data word is not zero, - * then instead of cc's it contains the offset for the second trie lookup. - * - * - * - Auxiliary trie and data - * - * The auxiliary 16-bit trie contains data for additional properties. - * Bits - * 15..13 reserved - * 12 not NFC_Skippable (f) (formatVersion>=2.2) - * 11 flag: not a safe starter for canonical closure - * 10 composition exclusion - * 9.. 0 index into extraData[] to FC_NFKC_Closure string - * (not for lead surrogate), - * or lead surrogate offset (for lead surrogate, if 9..0 not zero) - * - * - FC_NFKC_Closure strings in extraData[] - * - * Strings are either stored as a single code unit or as the length - * followed by that many units. - * const UChar *s=extraData+(index from auxTrie data bits 9..0); - * int32_t length; - * if(*s<0xff00) { - * // s points to the single-unit string - * length=1; - * } else { - * length=*s&0xff; - * ++s; - * } - * - * Conditions for "NF* Skippable" from Mark Davis' com.ibm.text.UCD.NFSkippable: - * (used in NormalizerTransliterator) - * - * A skippable character is - * a) unassigned, or ALL of the following: - * b) of combining class 0. - * c) not decomposed by this normalization form. - * AND if NFC or NFKC, - * d) can never compose with a previous character. - * e) can never compose with a following character. - * f) can never change if another character is added. - * Example: a-breve might satisfy all but f, but if you - * add an ogonek it changes to a-ogonek + breve - * - * a)..e) must be tested from norm32. - * Since f) is more complicated, the (not-)NFC_Skippable flag (f) is built - * into the auxiliary trie. - * The same bit is used for NFC and NFKC; (c) differs for them. - * As usual, we build the "not skippable" flags so that unassigned - * code points get a 0 bit. - * This bit is only valid after (a)..(e) test FALSE; test NFD_NO before (f) as well. - * Test Hangul LV syllables entirely in code. - * - * - * - structure inside canonStartSets[] - * - * This array maps from code points c to sets of code points (USerializedSet). - * The result sets are the code points whose canonical decompositions start - * with c. - * - * canonStartSets[] contains the following sub-arrays: - * - * indexes[_NORM_SET_INDEX_TOP] - * - contains lengths of sub-arrays etc. - * - * startSets[indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH]-_NORM_SET_INDEX_TOP] - * - contains serialized sets (USerializedSet) of canonical starters for - * enumerating canonically equivalent strings - * indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH] includes _NORM_SET_INDEX_TOP - * for details about the structure see uset.c - * - * bmpTable[indexes[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]] - * - a sorted search table for BMP code points whose results are - * either indexes to USerializedSets or single code points for - * single-code point sets; - * each entry is a pair of { code point, result } with result=(binary) yy xxxxxx xxxxxxxx - * if yy==01 then there is a USerializedSet at canonStartSets+x - * else build a USerializedSet with result as the single code point - * - * suppTable[indexes[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]] - * - a sorted search table for supplementary code points whose results are - * either indexes to USerializedSets or single code points for - * single-code point sets; - * each entry is a triplet of { high16(cp), low16(cp), result } - * each code point's high-word may contain extra data in bits 15..5: - * if the high word has bit 15 set, then build a set with a single code point - * which is (((high16(cp)&0x1f00)<<8)|result; - * else there is a USerializedSet at canonStartSets+result - * - * FormatVersion 2.3 adds 2 serialized sets for normalization exclusions. - * They are stored in the data file so that the runtime normalization code need - * not depend on other properties and their data and implementation files. - * The _NORM_SET_INDEX_NX_..._OFFSET offsets in the canonStartSets index table - * give the location for each set. - * There is no set stored for UNORM_NX_HANGUL because it's trivial to create - * without using properties. - * - * Set contents: - * - * _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET (for UNORM_NX_CJK_COMPAT) - * [[:Ideographic:]&[:NFD_QC=No:]] - * =[CJK Ideographs]&[has canonical decomposition] - * - * _NORM_SET_INDEX_NX_UNICODE32_OFFSET (for UNORM_UNICODE_3_2) - * [:^Age=3.2:] - * =set with all code points that were not designated by the specified Unicode version - * - * _NORM_SET_INDEX_NX_RESERVED_OFFSET - * This is an offset that points to where the next, future set would start. - * Currently it indicates where the previous set ends, and thus its length. - * The name for this enum constant may in the future be applied to different - * index slots. In order to get the limit of a set, use its index slot and - * the immediately following one regardless of that one's enum name. - */ - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - -#endif diff --git a/deps/node/deps/icu-small/source/common/uobject.cpp b/deps/node/deps/icu-small/source/common/uobject.cpp deleted file mode 100644 index 1133dd9b..00000000 --- a/deps/node/deps/icu-small/source/common/uobject.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: uobject.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jun26 -* created by: Markus W. Scherer -*/ - -#include "unicode/uobject.h" -#include "cmemory.h" - -U_NAMESPACE_BEGIN - -#if U_OVERRIDE_CXX_ALLOCATION - -/* - * Default implementation of UMemory::new/delete - * using uprv_malloc() and uprv_free(). - * - * For testing, this is used together with a list of imported symbols to verify - * that ICU is not using the global ::new and ::delete operators. - * - * These operators can be implemented like this or any other appropriate way - * when customizing ICU for certain environments. - * Whenever ICU is customized in binary incompatible ways please be sure - * to use library name suffixes to distinguish such libraries from - * the standard build. - * - * Instead of just modifying these C++ new/delete operators, it is usually best - * to modify the uprv_malloc()/uprv_free()/uprv_realloc() functions in cmemory.c. - * - * Memory test on Windows/MSVC 6: - * The global operators new and delete look as follows: - * 04F 00000000 UNDEF notype () External | ??2@YAPAXI@Z (void * __cdecl operator new(unsigned int)) - * 03F 00000000 UNDEF notype () External | ??3@YAXPAX@Z (void __cdecl operator delete(void *)) - * - * These lines are from output generated by the MSVC 6 tool dumpbin with - * dumpbin /symbols *.obj - * - * ??2@YAPAXI@Z and ??3@YAXPAX@Z are the linker symbols in the .obj - * files and are imported from msvcrtd.dll (in a debug build). - * - * Make sure that with the UMemory operators new and delete defined these two symbols - * do not appear in the dumpbin /symbols output for the ICU libraries! - * - * If such a symbol appears in the output then look in the preceding lines in the output - * for which file and function calls the global new or delete operator, - * and replace with uprv_malloc/uprv_free. - */ - -void * U_EXPORT2 UMemory::operator new(size_t size) U_NO_THROW { - return uprv_malloc(size); -} - -void U_EXPORT2 UMemory::operator delete(void *p) U_NO_THROW { - if(p!=NULL) { - uprv_free(p); - } -} - -void * U_EXPORT2 UMemory::operator new[](size_t size) U_NO_THROW { - return uprv_malloc(size); -} - -void U_EXPORT2 UMemory::operator delete[](void *p) U_NO_THROW { - if(p!=NULL) { - uprv_free(p); - } -} - -#if U_HAVE_DEBUG_LOCATION_NEW -void * U_EXPORT2 UMemory::operator new(size_t size, const char* /*file*/, int /*line*/) U_NO_THROW { - return UMemory::operator new(size); -} - -void U_EXPORT2 UMemory::operator delete(void* p, const char* /*file*/, int /*line*/) U_NO_THROW { - UMemory::operator delete(p); -} -#endif /* U_HAVE_DEBUG_LOCATION_NEW */ - - -#endif - -UObject::~UObject() {} - -UClassID UObject::getDynamicClassID() const { return NULL; } - -U_NAMESPACE_END - -U_NAMESPACE_USE - -U_CAPI void U_EXPORT2 -uprv_deleteUObject(void *obj) { - delete static_cast(obj); -} diff --git a/deps/node/deps/icu-small/source/common/uposixdefs.h b/deps/node/deps/icu-small/source/common/uposixdefs.h deleted file mode 100644 index 45ca1233..00000000 --- a/deps/node/deps/icu-small/source/common/uposixdefs.h +++ /dev/null @@ -1,71 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: uposixdefs.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011jul25 -* created by: Markus W. Scherer -* -* Common definitions for implementation files working with POSIX functions. -* *Important*: #include this file before any other header files! -*/ - -#ifndef __UPOSIXDEFS_H__ -#define __UPOSIXDEFS_H__ - -/* - * Define _XOPEN_SOURCE for access to POSIX functions. - * - * We cannot use U_PLATFORM from platform.h/utypes.h because - * "The Open Group Base Specifications" - * chapter "2.2 The Compilation Environment" says: - * "In the compilation of an application that #defines a feature test macro - * specified by IEEE Std 1003.1-2001, - * no header defined by IEEE Std 1003.1-2001 shall be included prior to - * the definition of the feature test macro." - */ -#ifdef _XOPEN_SOURCE - /* Use the predefined value. */ -#else - /* - * Version 6.0: - * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition) - * also known as - * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03) - * - * Note: This definition used to be in C source code (e.g., putil.c) - * and define _XOPEN_SOURCE to different values depending on __STDC_VERSION__. - * In C++ source code (e.g., putil.cpp), __STDC_VERSION__ is not defined at all. - */ -# define _XOPEN_SOURCE 600 -#endif - -/* - * Make sure things like readlink and such functions work. - * Poorly upgraded Solaris machines can't have this defined. - * Cleanly installed Solaris can use this #define. - * - * z/OS needs this definition for timeval and to get usleep. - */ -#if !defined(_XOPEN_SOURCE_EXTENDED) && defined(__TOS_MVS__) -# define _XOPEN_SOURCE_EXTENDED 1 -#endif - -/** - * Solaris says: - * "...it is invalid to compile an XPG6 or a POSIX.1-2001 application with anything other - * than a c99 or later compiler." - * Apparently C++11 is not "or later". Work around this. - */ -#if defined(__cplusplus) && (defined(sun) || defined(__sun)) && !defined (_STDC_C99) -# define _STDC_C99 -#endif - -#endif /* __UPOSIXDEFS_H__ */ diff --git a/deps/node/deps/icu-small/source/common/uprops.cpp b/deps/node/deps/icu-small/source/common/uprops.cpp deleted file mode 100644 index 2421c15d..00000000 --- a/deps/node/deps/icu-small/source/common/uprops.cpp +++ /dev/null @@ -1,665 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uprops.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb24 -* created by: Markus W. Scherer -* -* Implementations for mostly non-core Unicode character properties -* stored in uprops.icu. -* -* With the APIs implemented here, almost all properties files and -* their associated implementation files are used from this file, -* including those for normalization and case mappings. -*/ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/ucptrie.h" -#include "unicode/unorm2.h" -#include "unicode/uscript.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "normalizer2impl.h" -#include "umutex.h" -#include "ubidi_props.h" -#include "uprops.h" -#include "ucase.h" -#include "ustr_imp.h" - -// ulayout_props_data.h is machine-generated by genprops -#define INCLUDED_FROM_UPROPS_CPP -#include "ulayout_props_data.h" - -U_NAMESPACE_USE - -/* general properties API functions ----------------------------------------- */ - -struct BinaryProperty; - -typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); - -struct BinaryProperty { - int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 - uint32_t mask; - BinaryPropertyContains *contains; -}; - -static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { - /* systematic, directly stored properties */ - return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; -} - -static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { - return static_cast(ucase_hasBinaryProperty(c, which)); -} - -static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isBidiControl(c); -} - -static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isMirrored(c); -} - -static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_isJoinControl(c); -} - -#if UCONFIG_NO_NORMALIZATION -static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { - return FALSE; -} -#else -static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); -} -#endif - -// UCHAR_NF*_INERT properties -#if UCONFIG_NO_NORMALIZATION -static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { - return FALSE; -} -#else -static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *norm2=Normalizer2Factory::getInstance( - (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); - return U_SUCCESS(errorCode) && norm2->isInert(c); -} -#endif - -#if UCONFIG_NO_NORMALIZATION -static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { - return FALSE; -} -#else -static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - UnicodeString nfd; - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); - if(U_FAILURE(errorCode)) { - return FALSE; - } - if(nfcNorm2->getDecomposition(c, nfd)) { - /* c has a decomposition */ - if(nfd.length()==1) { - c=nfd[0]; /* single BMP code point */ - } else if(nfd.length()<=U16_MAX_LENGTH && - nfd.length()==U16_LENGTH(c=nfd.char32At(0)) - ) { - /* single supplementary code point */ - } else { - c=U_SENTINEL; - } - } else if(c<0) { - return FALSE; /* protect against bad input */ - } - if(c>=0) { - /* single code point */ - const UChar *resultString; - return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0); - } else { - /* guess some large but stack-friendly capacity */ - UChar dest[2*UCASE_MAX_STRING_LENGTH]; - int32_t destLength; - destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest), - nfd.getBuffer(), nfd.length(), - U_FOLD_CASE_DEFAULT, &errorCode); - return (UBool)(U_SUCCESS(errorCode) && - 0!=u_strCompare(nfd.getBuffer(), nfd.length(), - dest, destLength, FALSE)); - } -} -#endif - -#if UCONFIG_NO_NORMALIZATION -static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { - return FALSE; -} -#else -static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); - if(U_FAILURE(errorCode)) { - return FALSE; - } - UnicodeString src(c); - UnicodeString dest; - { - // The ReorderingBuffer must be in a block because its destructor - // needs to release dest's buffer before we look at its contents. - ReorderingBuffer buffer(*kcf, dest); - // Small destCapacity for NFKC_CF(c). - if(buffer.init(5, errorCode)) { - const UChar *srcArray=src.getBuffer(); - kcf->compose(srcArray, srcArray+src.length(), FALSE, - TRUE, buffer, errorCode); - } - } - return U_SUCCESS(errorCode) && dest!=src; -} -#endif - -#if UCONFIG_NO_NORMALIZATION -static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { - return FALSE; -} -#else -static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - return - U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && - impl->isCanonSegmentStarter(c); -} -#endif - -static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return u_isalnumPOSIX(c); -} - -static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return u_isblank(c); -} - -static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return u_isgraphPOSIX(c); -} - -static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return u_isprintPOSIX(c); -} - -static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return u_isxdigit(c); -} - -static UBool isRegionalIndicator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - // Property starts are a subset of lb=RI etc. - return 0x1F1E6<=c && c<=0x1F1FF; -} - -static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ - /* - * column and mask values for binary properties from u_getUnicodeProperties(). - * Must be in order of corresponding UProperty, - * and there must be exactly one entry per binary UProperty. - * - * Properties with mask==0 are handled in code. - * For them, column is the UPropertySource value. - */ - { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, - { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, - { UPROPS_SRC_BIDI, 0, isBidiControl }, - { UPROPS_SRC_BIDI, 0, isMirrored }, - { 1, U_MASK(UPROPS_DASH), defaultContains }, - { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, - { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, - { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, - { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, - { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, - { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, - { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, - { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, - { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, - { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, - { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, - { 1, U_MASK(UPROPS_ID_START), defaultContains }, - { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, - { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, - { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, - { UPROPS_SRC_BIDI, 0, isJoinControl }, - { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE - { 1, U_MASK(UPROPS_MATH), defaultContains }, - { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, - { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, - { 1, U_MASK(UPROPS_RADICAL), defaultContains }, - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED - { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, - { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE - { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, - { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, - { 1, U_MASK(UPROPS_XID_START), defaultContains }, - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE - { 1, U_MASK(UPROPS_S_TERM), defaultContains }, - { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, - { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT - { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT - { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT - { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT - { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, - { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, - { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, - { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, - { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, - { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, - { UPROPS_SRC_CHAR, 0, isPOSIX_print }, - { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED - { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, - { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED - { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }, - { 2, U_MASK(UPROPS_2_EMOJI), defaultContains }, - { 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION), defaultContains }, - { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER), defaultContains }, - { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE), defaultContains }, - { 2, U_MASK(UPROPS_2_EMOJI_COMPONENT), defaultContains }, - { 2, 0, isRegionalIndicator }, - { 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains }, - { 2, U_MASK(UPROPS_2_EXTENDED_PICTOGRAPHIC), defaultContains }, -}; - -U_CAPI UBool U_EXPORT2 -u_hasBinaryProperty(UChar32 c, UProperty which) { - /* c is range-checked in the functions that are called from here */ - if(which>prop.shift; -} - -static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { - return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; -} - -static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { - return prop.shift; -} - -static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)u_charDirection(c); -} - -static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)ubidi_getPairedBracketType(c); -} - -static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { - return ubidi_getMaxValue(which); -} - -#if UCONFIG_NO_NORMALIZATION -static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { - return 0; -} -#else -static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return u_getCombiningClass(c); -} -#endif - -static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return (int32_t)u_charType(c); -} - -static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningGroup(c); -} - -static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return ubidi_getJoiningType(c); -} - -static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); - return UPROPS_NTV_GET_TYPE(ntv); -} - -static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - UErrorCode errorCode=U_ZERO_ERROR; - return (int32_t)uscript_getScript(c, &errorCode); -} - -/* - * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. - * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. - */ -static const UHangulSyllableType gcbToHst[]={ - U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ - U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ - U_HST_NOT_APPLICABLE, /* U_GCB_CR */ - U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ - U_HST_LEADING_JAMO, /* U_GCB_L */ - U_HST_NOT_APPLICABLE, /* U_GCB_LF */ - U_HST_LV_SYLLABLE, /* U_GCB_LV */ - U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ - U_HST_TRAILING_JAMO, /* U_GCB_T */ - U_HST_VOWEL_JAMO /* U_GCB_V */ - /* - * Omit GCB values beyond what we need for hst. - * The code below checks for the array length. - */ -}; - -static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - /* see comments on gcbToHst[] above */ - int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; - if(gcb>8; -} -#endif - -#if UCONFIG_NO_NORMALIZATION -static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { - return 0; -} -#else -static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return unorm_getFCD16(c)&0xff; -} -#endif - -static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) { - return ucptrie_get(&inpc_trie, c); -} - -static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) { - return ucptrie_get(&insc_trie, c); -} - -static int32_t getVo(const IntProperty &, UChar32 c, UProperty) { - return ucptrie_get(&vo_trie, c); -} - -static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ - /* - * column, mask and shift values for int-value properties from u_getUnicodeProperties(). - * Must be in order of corresponding UProperty, - * and there must be exactly one entry per int UProperty. - * - * Properties with mask==0 are handled in code. - * For them, column is the UPropertySource value. - */ - { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, - { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, - { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, - { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, - { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, - { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, - { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, - { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, - { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, - { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, - { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue }, - { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, - // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" - { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, - // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" - { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, - // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE - { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, - // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE - { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, - { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, - { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, - { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, - { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, - { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, - { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, - { UPROPS_SRC_INPC, 0, maxInPCValue, getInPC, getMaxValueFromShift }, - { UPROPS_SRC_INSC, 0, maxInSCValue, getInSC, getMaxValueFromShift }, - { UPROPS_SRC_VO, 0, maxVoValue, getVo, getMaxValueFromShift }, -}; - -U_CAPI int32_t U_EXPORT2 -u_getIntPropertyValue(UChar32 c, UProperty which) { - if(which= 0) { - sa->add(sa->set, start); - start = end + 1; - } -} - -#if !UCONFIG_NO_NORMALIZATION - -U_CAPI int32_t U_EXPORT2 -u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(destCapacity<0 || (dest==NULL && destCapacity>0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - // Compute the FC_NFKC_Closure on the fly: - // We have the API for complete coverage of Unicode properties, although - // this value by itself is not useful via API. - // (What could be useful is a custom normalization table that combines - // case folding and NFKC.) - // For the derivation, see Unicode's DerivedNormalizationProps.txt. - const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - // first: b = NFKC(Fold(a)) - UnicodeString folded1String; - const UChar *folded1; - int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT); - if(folded1Length<0) { - const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); - if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { - return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC - } - folded1String.setTo(c); - } else { - if(folded1Length>UCASE_MAX_STRING_LENGTH) { - folded1String.setTo(folded1Length); - } else { - folded1String.setTo(FALSE, folded1, folded1Length); - } - } - UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); - // second: c = NFKC(Fold(b)) - UnicodeString folded2String(kc1); - UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); - // if (c != b) add the mapping from a to c - if(U_FAILURE(*pErrorCode) || kc1==kc2) { - return u_terminateUChars(dest, destCapacity, 0, pErrorCode); - } else { - return kc2.extract(dest, destCapacity, *pErrorCode); - } -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/uprops.h b/deps/node/deps/icu-small/source/common/uprops.h deleted file mode 100644 index 1a8e4e84..00000000 --- a/deps/node/deps/icu-small/source/common/uprops.h +++ /dev/null @@ -1,477 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uprops.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb24 -* created by: Markus W. Scherer -* -* Constants for mostly non-core Unicode character properties -* stored in uprops.icu. -*/ - -#ifndef __UPROPS_H__ -#define __UPROPS_H__ - -#include "unicode/utypes.h" -#include "unicode/uset.h" -#include "uset_imp.h" -#include "udataswp.h" - -/* indexes[] entries */ -enum { - UPROPS_PROPS32_INDEX, - UPROPS_EXCEPTIONS_INDEX, - UPROPS_EXCEPTIONS_TOP_INDEX, - - UPROPS_ADDITIONAL_TRIE_INDEX, - UPROPS_ADDITIONAL_VECTORS_INDEX, - UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX, - - UPROPS_SCRIPT_EXTENSIONS_INDEX, - - UPROPS_RESERVED_INDEX_7, - UPROPS_RESERVED_INDEX_8, - - /* size of the data file (number of 32-bit units after the header) */ - UPROPS_DATA_TOP_INDEX, - - /* maximum values for code values in vector word 0 */ - UPROPS_MAX_VALUES_INDEX=10, - /* maximum values for code values in vector word 2 */ - UPROPS_MAX_VALUES_2_INDEX, - - UPROPS_INDEX_COUNT=16 -}; - -/* definitions for the main properties words */ -enum { - /* general category shift==0 0 (5 bits) */ - /* reserved 5 (1 bit) */ - UPROPS_NUMERIC_TYPE_VALUE_SHIFT=6 /* 6 (10 bits) */ -}; - -#define GET_CATEGORY(props) ((props)&0x1f) -#define CAT_MASK(props) U_MASK(GET_CATEGORY(props)) - -#define GET_NUMERIC_TYPE_VALUE(props) ((props)>>UPROPS_NUMERIC_TYPE_VALUE_SHIFT) - -/* constants for the storage form of numeric types and values */ -enum { - /** No numeric value. */ - UPROPS_NTV_NONE=0, - /** Decimal digits: nv=0..9 */ - UPROPS_NTV_DECIMAL_START=1, - /** Other digits: nv=0..9 */ - UPROPS_NTV_DIGIT_START=11, - /** Small integers: nv=0..154 */ - UPROPS_NTV_NUMERIC_START=21, - /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */ - UPROPS_NTV_FRACTION_START=0xb0, - /** - * Large integers: - * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33) - * (only one significant decimal digit) - */ - UPROPS_NTV_LARGE_START=0x1e0, - /** - * Sexagesimal numbers: - * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4) - */ - UPROPS_NTV_BASE60_START=0x300, - /** - * Fraction-20 values: - * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640 - * numerator: num = 2*(frac20&3)+1 - * denominator: den = 20<<(frac20>>2) - */ - UPROPS_NTV_FRACTION20_START=UPROPS_NTV_BASE60_START+36, // 0x300+9*4=0x324 - /** No numeric value (yet). */ - UPROPS_NTV_RESERVED_START=UPROPS_NTV_FRACTION20_START+24, // 0x324+6*4=0x34c - - UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1 -}; - -#define UPROPS_NTV_GET_TYPE(ntv) \ - ((ntv==UPROPS_NTV_NONE) ? U_NT_NONE : \ - (ntv=(int32_t)sizeof(pathBuffer)) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } else if(uprv_isInvariantUString(myPath, length)) { - /* - * the invariant converter is sufficient for package and tree names - * and is more efficient - */ - u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */ - } else { -#if !UCONFIG_NO_CONVERSION - /* use the default converter to support variant-character paths */ - UConverter *cnv=u_getDefaultConverter(status); - length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(pathBuffer), myPath, length, status); - u_releaseDefaultConverter(cnv); - if(U_FAILURE(*status)) { - return NULL; - } - if(length>=(int32_t)sizeof(pathBuffer)) { - /* not NUL-terminated - path too long */ - *status=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } -#else - /* the default converter is not available */ - *status=U_UNSUPPORTED_ERROR; - return NULL; -#endif - } - } - - return ures_open(path, localeID, status); -} diff --git a/deps/node/deps/icu-small/source/common/uresbund.cpp b/deps/node/deps/icu-small/source/common/uresbund.cpp deleted file mode 100644 index 3da73421..00000000 --- a/deps/node/deps/icu-small/source/common/uresbund.cpp +++ /dev/null @@ -1,3025 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1997-2016, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* -* File uresbund.cpp -* -* Modification History: -* -* Date Name Description -* 04/01/97 aliu Creation. -* 06/14/99 stephen Removed functions taking a filename suffix. -* 07/20/99 stephen Changed for UResourceBundle typedef'd to void* -* 11/09/99 weiv Added ures_getLocale() -* March 2000 weiv Total overhaul - using data in DLLs -* 06/20/2000 helena OS/400 port changes; mostly typecast. -* 06/24/02 weiv Added support for resource sharing -****************************************************************************** -*/ - -#include "unicode/ustring.h" -#include "unicode/ucnv.h" -#include "charstr.h" -#include "uresimp.h" -#include "ustr_imp.h" -#include "cwchar.h" -#include "ucln_cmn.h" -#include "cmemory.h" -#include "cstring.h" -#include "uhash.h" -#include "unicode/uenum.h" -#include "uenumimp.h" -#include "ulocimp.h" -#include "umutex.h" -#include "putilimp.h" -#include "uassert.h" - -using namespace icu; - -/* -Static cache for already opened resource bundles - mostly for keeping fallback info -TODO: This cache should probably be removed when the deprecated code is - completely removed. -*/ -static UHashtable *cache = NULL; -static icu::UInitOnce gCacheInitOnce; - -static UMutex resbMutex = U_MUTEX_INITIALIZER; - -/* INTERNAL: hashes an entry */ -static int32_t U_CALLCONV hashEntry(const UHashTok parm) { - UResourceDataEntry *b = (UResourceDataEntry *)parm.pointer; - UHashTok namekey, pathkey; - namekey.pointer = b->fName; - pathkey.pointer = b->fPath; - return uhash_hashChars(namekey)+37u*uhash_hashChars(pathkey); -} - -/* INTERNAL: compares two entries */ -static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) { - UResourceDataEntry *b1 = (UResourceDataEntry *)p1.pointer; - UResourceDataEntry *b2 = (UResourceDataEntry *)p2.pointer; - UHashTok name1, name2, path1, path2; - name1.pointer = b1->fName; - name2.pointer = b2->fName; - path1.pointer = b1->fPath; - path2.pointer = b2->fPath; - return (UBool)(uhash_compareChars(name1, name2) && - uhash_compareChars(path1, path2)); -} - - -/** - * Internal function, gets parts of locale name according - * to the position of '_' character - */ -static UBool chopLocale(char *name) { - char *i = uprv_strrchr(name, '_'); - - if(i != NULL) { - *i = '\0'; - return TRUE; - } - - return FALSE; -} - -/** - * Internal function - */ -static void entryIncrease(UResourceDataEntry *entry) { - umtx_lock(&resbMutex); - entry->fCountExisting++; - while(entry->fParent != NULL) { - entry = entry->fParent; - entry->fCountExisting++; - } - umtx_unlock(&resbMutex); -} - -/** - * Internal function. Tries to find a resource in given Resource - * Bundle, as well as in its parents - */ -static const ResourceData *getFallbackData(const UResourceBundle* resBundle, const char* * resTag, UResourceDataEntry* *realData, Resource *res, UErrorCode *status) { - UResourceDataEntry *resB = resBundle->fData; - int32_t indexR = -1; - int32_t i = 0; - *res = RES_BOGUS; - if(resB != NULL) { - if(resB->fBogus == U_ZERO_ERROR) { /* if this resource is real, */ - *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag); /* try to get data from there */ - i++; - } - if(resBundle->fHasFallback == TRUE) { - while(*res == RES_BOGUS && resB->fParent != NULL) { /* Otherwise, we'll look in parents */ - resB = resB->fParent; - if(resB->fBogus == U_ZERO_ERROR) { - i++; - *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag); - } - } - } - - if(*res != RES_BOGUS) { /* If the resource is found in parents, we need to adjust the error */ - if(i>1) { - if(uprv_strcmp(resB->fName, uloc_getDefault())==0 || uprv_strcmp(resB->fName, kRootLocaleName)==0) { - *status = U_USING_DEFAULT_WARNING; - } else { - *status = U_USING_FALLBACK_WARNING; - } - } - *realData = resB; - return (&(resB->fData)); - } else { /* If resource is not found, we need to give an error */ - *status = U_MISSING_RESOURCE_ERROR; - return NULL; - } - } else { - *status = U_MISSING_RESOURCE_ERROR; - return NULL; - } -} - -static void -free_entry(UResourceDataEntry *entry) { - UResourceDataEntry *alias; - res_unload(&(entry->fData)); - if(entry->fName != NULL && entry->fName != entry->fNameBuffer) { - uprv_free(entry->fName); - } - if(entry->fPath != NULL) { - uprv_free(entry->fPath); - } - if(entry->fPool != NULL) { - --entry->fPool->fCountExisting; - } - alias = entry->fAlias; - if(alias != NULL) { - while(alias->fAlias != NULL) { - alias = alias->fAlias; - } - --alias->fCountExisting; - } - uprv_free(entry); -} - -/* Works just like ucnv_flushCache() */ -static int32_t ures_flushCache() -{ - UResourceDataEntry *resB; - int32_t pos; - int32_t rbDeletedNum = 0; - const UHashElement *e; - UBool deletedMore; - - /*if shared data hasn't even been lazy evaluated yet - * return 0 - */ - umtx_lock(&resbMutex); - if (cache == NULL) { - umtx_unlock(&resbMutex); - return 0; - } - - do { - deletedMore = FALSE; - /*creates an enumeration to iterate through every element in the table */ - pos = UHASH_FIRST; - while ((e = uhash_nextElement(cache, &pos)) != NULL) - { - resB = (UResourceDataEntry *) e->value.pointer; - /* Deletes only if reference counter == 0 - * Don't worry about the children of this node. - * Those will eventually get deleted too, if not already. - * Don't worry about the parents of this node. - * Those will eventually get deleted too, if not already. - */ - /* 04/05/2002 [weiv] fCountExisting should now be accurate. If it's not zero, that means that */ - /* some resource bundles are still open somewhere. */ - - if (resB->fCountExisting == 0) { - rbDeletedNum++; - deletedMore = TRUE; - uhash_removeElement(cache, e); - free_entry(resB); - } - } - /* - * Do it again to catch bundles (aliases, pool bundle) whose fCountExisting - * got decremented by free_entry(). - */ - } while(deletedMore); - umtx_unlock(&resbMutex); - - return rbDeletedNum; -} - -#ifdef URES_DEBUG -#include - -U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void) { - UBool cacheNotEmpty = FALSE; - int32_t pos = UHASH_FIRST; - const UHashElement *e; - UResourceDataEntry *resB; - - umtx_lock(&resbMutex); - if (cache == NULL) { - umtx_unlock(&resbMutex); - fprintf(stderr,"%s:%d: RB Cache is NULL.\n", __FILE__, __LINE__); - return FALSE; - } - - while ((e = uhash_nextElement(cache, &pos)) != NULL) { - cacheNotEmpty=TRUE; - resB = (UResourceDataEntry *) e->value.pointer; - fprintf(stderr,"%s:%d: RB Cache: Entry @0x%p, refcount %d, name %s:%s. Pool 0x%p, alias 0x%p, parent 0x%p\n", - __FILE__, __LINE__, - (void*)resB, resB->fCountExisting, - resB->fName?resB->fName:"NULL", - resB->fPath?resB->fPath:"NULL", - (void*)resB->fPool, - (void*)resB->fAlias, - (void*)resB->fParent); - } - - fprintf(stderr,"%s:%d: RB Cache still contains %d items.\n", __FILE__, __LINE__, uhash_count(cache)); - - umtx_unlock(&resbMutex); - - return cacheNotEmpty; -} - -#endif - -static UBool U_CALLCONV ures_cleanup(void) -{ - if (cache != NULL) { - ures_flushCache(); - uhash_close(cache); - cache = NULL; - } - gCacheInitOnce.reset(); - return TRUE; -} - -/** INTERNAL: Initializes the cache for resources */ -static void U_CALLCONV createCache(UErrorCode &status) { - U_ASSERT(cache == NULL); - cache = uhash_open(hashEntry, compareEntries, NULL, &status); - ucln_common_registerCleanup(UCLN_COMMON_URES, ures_cleanup); -} - -static void initCache(UErrorCode *status) { - umtx_initOnce(gCacheInitOnce, &createCache, *status); -} - -/** INTERNAL: sets the name (locale) of the resource bundle to given name */ - -static void setEntryName(UResourceDataEntry *res, const char *name, UErrorCode *status) { - int32_t len = (int32_t)uprv_strlen(name); - if(res->fName != NULL && res->fName != res->fNameBuffer) { - uprv_free(res->fName); - } - if (len < (int32_t)sizeof(res->fNameBuffer)) { - res->fName = res->fNameBuffer; - } - else { - res->fName = (char *)uprv_malloc(len+1); - } - if(res->fName == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - uprv_strcpy(res->fName, name); - } -} - -static UResourceDataEntry * -getPoolEntry(const char *path, UErrorCode *status); - -/** - * INTERNAL: Inits and opens an entry from a data DLL. - * CAUTION: resbMutex must be locked when calling this function. - */ -static UResourceDataEntry *init_entry(const char *localeID, const char *path, UErrorCode *status) { - UResourceDataEntry *r = NULL; - UResourceDataEntry find; - /*int32_t hashValue;*/ - const char *name; - char aliasName[100] = { 0 }; - int32_t aliasLen = 0; - /*UBool isAlias = FALSE;*/ - /*UHashTok hashkey; */ - - if(U_FAILURE(*status)) { - return NULL; - } - - /* here we try to deduce the right locale name */ - if(localeID == NULL) { /* if localeID is NULL, we're trying to open default locale */ - name = uloc_getDefault(); - } else if(*localeID == 0) { /* if localeID is "" then we try to open root locale */ - name = kRootLocaleName; - } else { /* otherwise, we'll open what we're given */ - name = localeID; - } - - find.fName = (char *)name; - find.fPath = (char *)path; - - /* calculate the hash value of the entry */ - /*hashkey.pointer = (void *)&find;*/ - /*hashValue = hashEntry(hashkey);*/ - - /* check to see if we already have this entry */ - r = (UResourceDataEntry *)uhash_get(cache, &find); - if(r == NULL) { - /* if the entry is not yet in the hash table, we'll try to construct a new one */ - r = (UResourceDataEntry *) uprv_malloc(sizeof(UResourceDataEntry)); - if(r == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - uprv_memset(r, 0, sizeof(UResourceDataEntry)); - /*r->fHashKey = hashValue;*/ - - setEntryName(r, name, status); - if (U_FAILURE(*status)) { - uprv_free(r); - return NULL; - } - - if(path != NULL) { - r->fPath = (char *)uprv_strdup(path); - if(r->fPath == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(r); - return NULL; - } - } - - /* this is the actual loading */ - res_load(&(r->fData), r->fPath, r->fName, status); - - if (U_FAILURE(*status)) { - /* if we failed to load due to an out-of-memory error, exit early. */ - if (*status == U_MEMORY_ALLOCATION_ERROR) { - uprv_free(r); - return NULL; - } - /* we have no such entry in dll, so it will always use fallback */ - *status = U_USING_FALLBACK_WARNING; - r->fBogus = U_USING_FALLBACK_WARNING; - } else { /* if we have a regular entry */ - Resource aliasres; - if (r->fData.usesPoolBundle) { - r->fPool = getPoolEntry(r->fPath, status); - if (U_SUCCESS(*status)) { - const int32_t *poolIndexes = r->fPool->fData.pRoot + 1; - if(r->fData.pRoot[1 + URES_INDEX_POOL_CHECKSUM] == poolIndexes[URES_INDEX_POOL_CHECKSUM]) { - r->fData.poolBundleKeys = (const char *)(poolIndexes + (poolIndexes[URES_INDEX_LENGTH] & 0xff)); - r->fData.poolBundleStrings = r->fPool->fData.p16BitUnits; - } else { - r->fBogus = *status = U_INVALID_FORMAT_ERROR; - } - } else { - r->fBogus = *status; - } - } - if (U_SUCCESS(*status)) { - /* handle the alias by trying to get out the %%Alias tag.*/ - /* We'll try to get alias string from the bundle */ - aliasres = res_getResource(&(r->fData), "%%ALIAS"); - if (aliasres != RES_BOGUS) { - const UChar *alias = res_getString(&(r->fData), aliasres, &aliasLen); - if(alias != NULL && aliasLen > 0) { /* if there is actual alias - unload and load new data */ - u_UCharsToChars(alias, aliasName, aliasLen+1); - r->fAlias = init_entry(aliasName, path, status); - } - } - } - } - - { - UResourceDataEntry *oldR = NULL; - if((oldR = (UResourceDataEntry *)uhash_get(cache, r)) == NULL) { /* if the data is not cached */ - /* just insert it in the cache */ - UErrorCode cacheStatus = U_ZERO_ERROR; - uhash_put(cache, (void *)r, r, &cacheStatus); - if (U_FAILURE(cacheStatus)) { - *status = cacheStatus; - free_entry(r); - r = NULL; - } - } else { - /* somebody have already inserted it while we were working, discard newly opened data */ - /* Also, we could get here IF we opened an alias */ - free_entry(r); - r = oldR; - } - } - - } - if(r != NULL) { - /* return the real bundle */ - while(r->fAlias != NULL) { - r = r->fAlias; - } - r->fCountExisting++; /* we increase its reference count */ - /* if the resource has a warning */ - /* we don't want to overwrite a status with no error */ - if(r->fBogus != U_ZERO_ERROR && U_SUCCESS(*status)) { - *status = r->fBogus; /* set the returning status */ - } - } - return r; -} - -static UResourceDataEntry * -getPoolEntry(const char *path, UErrorCode *status) { - UResourceDataEntry *poolBundle = init_entry(kPoolBundleName, path, status); - if( U_SUCCESS(*status) && - (poolBundle == NULL || poolBundle->fBogus != U_ZERO_ERROR || !poolBundle->fData.isPoolBundle) - ) { - *status = U_INVALID_FORMAT_ERROR; - } - return poolBundle; -} - -/* INTERNAL: */ -/* CAUTION: resbMutex must be locked when calling this function! */ -static UResourceDataEntry * -findFirstExisting(const char* path, char* name, - UBool *isRoot, UBool *hasChopped, UBool *isDefault, UErrorCode* status) { - UResourceDataEntry *r = NULL; - UBool hasRealData = FALSE; - const char *defaultLoc = uloc_getDefault(); - *hasChopped = TRUE; /* we're starting with a fresh name */ - - while(*hasChopped && !hasRealData) { - r = init_entry(name, path, status); - /* Null pointer test */ - if (U_FAILURE(*status)) { - return NULL; - } - *isDefault = (UBool)(uprv_strncmp(name, defaultLoc, uprv_strlen(name)) == 0); - hasRealData = (UBool)(r->fBogus == U_ZERO_ERROR); - if(!hasRealData) { - /* this entry is not real. We will discard it. */ - /* However, the parent line for this entry is */ - /* not to be used - as there might be parent */ - /* lines in cache from previous openings that */ - /* are not updated yet. */ - r->fCountExisting--; - /*entryCloseInt(r);*/ - r = NULL; - *status = U_USING_FALLBACK_WARNING; - } else { - uprv_strcpy(name, r->fName); /* this is needed for supporting aliases */ - } - - *isRoot = (UBool)(uprv_strcmp(name, kRootLocaleName) == 0); - - /*Fallback data stuff*/ - *hasChopped = chopLocale(name); - } - return r; -} - -static void ures_setIsStackObject( UResourceBundle* resB, UBool state) { - if(state) { - resB->fMagic1 = 0; - resB->fMagic2 = 0; - } else { - resB->fMagic1 = MAGIC1; - resB->fMagic2 = MAGIC2; - } -} - -static UBool ures_isStackObject(const UResourceBundle* resB) { - return((resB->fMagic1 == MAGIC1 && resB->fMagic2 == MAGIC2)?FALSE:TRUE); -} - - -U_CFUNC void ures_initStackObject(UResourceBundle* resB) { - uprv_memset(resB, 0, sizeof(UResourceBundle)); - ures_setIsStackObject(resB, TRUE); -} - -static UBool // returns U_SUCCESS(*status) -loadParentsExceptRoot(UResourceDataEntry *&t1, - char name[], int32_t nameCapacity, - UBool usingUSRData, char usrDataPath[], UErrorCode *status) { - if (U_FAILURE(*status)) { return FALSE; } - UBool hasChopped = TRUE; - while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback && - res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) { - Resource parentRes = res_getResource(&t1->fData, "%%Parent"); - if (parentRes != RES_BOGUS) { // An explicit parent was found. - int32_t parentLocaleLen = 0; - const UChar *parentLocaleName = res_getString(&(t1->fData), parentRes, &parentLocaleLen); - if(parentLocaleName != NULL && 0 < parentLocaleLen && parentLocaleLen < nameCapacity) { - u_UCharsToChars(parentLocaleName, name, parentLocaleLen + 1); - if (uprv_strcmp(name, kRootLocaleName) == 0) { - return TRUE; - } - } - } - // Insert regular parents. - UErrorCode parentStatus = U_ZERO_ERROR; - UResourceDataEntry *t2 = init_entry(name, t1->fPath, &parentStatus); - if (U_FAILURE(parentStatus)) { - *status = parentStatus; - return FALSE; - } - UResourceDataEntry *u2 = NULL; - UErrorCode usrStatus = U_ZERO_ERROR; - if (usingUSRData) { // This code inserts user override data into the inheritance chain. - u2 = init_entry(name, usrDataPath, &usrStatus); - // If we failed due to out-of-memory, report that to the caller and exit early. - if (usrStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = usrStatus; - return FALSE; - } - } - - if (usingUSRData && U_SUCCESS(usrStatus) && u2->fBogus == U_ZERO_ERROR) { - t1->fParent = u2; - u2->fParent = t2; - } else { - t1->fParent = t2; - if (usingUSRData) { - // The USR override data wasn't found, set it to be deleted. - u2->fCountExisting = 0; - } - } - t1 = t2; - hasChopped = chopLocale(name); - } - return TRUE; -} - -static UBool // returns U_SUCCESS(*status) -insertRootBundle(UResourceDataEntry *&t1, UErrorCode *status) { - if (U_FAILURE(*status)) { return FALSE; } - UErrorCode parentStatus = U_ZERO_ERROR; - UResourceDataEntry *t2 = init_entry(kRootLocaleName, t1->fPath, &parentStatus); - if (U_FAILURE(parentStatus)) { - *status = parentStatus; - return FALSE; - } - t1->fParent = t2; - t1 = t2; - return TRUE; -} - -enum UResOpenType { - /** - * Open a resource bundle for the locale; - * if there is not even a base language bundle, then fall back to the default locale; - * if there is no bundle for that either, then load the root bundle. - * - * This is the default bundle loading behavior. - */ - URES_OPEN_LOCALE_DEFAULT_ROOT, - // TODO: ICU ticket #11271 "consistent default locale across locale trees" - // Add an option to look at the main locale tree for whether to - // fall back to root directly (if the locale has main data) or - // fall back to the default locale first (if the locale does not even have main data). - /** - * Open a resource bundle for the locale; - * if there is not even a base language bundle, then load the root bundle; - * never fall back to the default locale. - * - * This is used for algorithms that have good pan-Unicode default behavior, - * such as case mappings, collation, and segmentation (BreakIterator). - */ - URES_OPEN_LOCALE_ROOT, - /** - * Open a resource bundle for the exact bundle name as requested; - * no fallbacks, do not load parent bundles. - * - * This is used for supplemental (non-locale) data. - */ - URES_OPEN_DIRECT -}; -typedef enum UResOpenType UResOpenType; - -static UResourceDataEntry *entryOpen(const char* path, const char* localeID, - UResOpenType openType, UErrorCode* status) { - U_ASSERT(openType != URES_OPEN_DIRECT); - UErrorCode intStatus = U_ZERO_ERROR; - UResourceDataEntry *r = NULL; - UResourceDataEntry *t1 = NULL; - UBool isDefault = FALSE; - UBool isRoot = FALSE; - UBool hasRealData = FALSE; - UBool hasChopped = TRUE; - UBool usingUSRData = U_USE_USRDATA && ( path == NULL || uprv_strncmp(path,U_ICUDATA_NAME,8) == 0); - - char name[ULOC_FULLNAME_CAPACITY]; - char usrDataPath[96]; - - initCache(status); - - if(U_FAILURE(*status)) { - return NULL; - } - - uprv_strncpy(name, localeID, sizeof(name) - 1); - name[sizeof(name) - 1] = 0; - - if ( usingUSRData ) { - if ( path == NULL ) { - uprv_strcpy(usrDataPath, U_USRDATA_NAME); - } else { - uprv_strncpy(usrDataPath, path, sizeof(usrDataPath) - 1); - usrDataPath[0] = 'u'; - usrDataPath[1] = 's'; - usrDataPath[2] = 'r'; - usrDataPath[sizeof(usrDataPath) - 1] = 0; - } - } - - umtx_lock(&resbMutex); - { /* umtx_lock */ - /* We're going to skip all the locales that do not have any data */ - r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); - - // If we failed due to out-of-memory, report the failure and exit early. - if (intStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = intStatus; - goto finishUnlock; - } - - if(r != NULL) { /* if there is one real locale, we can look for parents. */ - t1 = r; - hasRealData = TRUE; - if ( usingUSRData ) { /* This code inserts user override data into the inheritance chain */ - UErrorCode usrStatus = U_ZERO_ERROR; - UResourceDataEntry *u1 = init_entry(t1->fName, usrDataPath, &usrStatus); - // If we failed due to out-of-memory, report the failure and exit early. - if (intStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = intStatus; - goto finishUnlock; - } - if ( u1 != NULL ) { - if(u1->fBogus == U_ZERO_ERROR) { - u1->fParent = t1; - r = u1; - } else { - /* the USR override data wasn't found, set it to be deleted */ - u1->fCountExisting = 0; - } - } - } - if (hasChopped && !isRoot) { - if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { - goto finishUnlock; - } - } - } - - /* we could have reached this point without having any real data */ - /* if that is the case, we need to chain in the default locale */ - if(r==NULL && openType == URES_OPEN_LOCALE_DEFAULT_ROOT && !isDefault && !isRoot) { - /* insert default locale */ - uprv_strcpy(name, uloc_getDefault()); - r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); - // If we failed due to out-of-memory, report the failure and exit early. - if (intStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = intStatus; - goto finishUnlock; - } - intStatus = U_USING_DEFAULT_WARNING; - if(r != NULL) { /* the default locale exists */ - t1 = r; - hasRealData = TRUE; - isDefault = TRUE; - // TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path? - if (hasChopped && !isRoot) { - if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { - goto finishUnlock; - } - } - } - } - - /* we could still have r == NULL at this point - maybe even default locale is not */ - /* present */ - if(r == NULL) { - uprv_strcpy(name, kRootLocaleName); - r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); - // If we failed due to out-of-memory, report the failure and exit early. - if (intStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = intStatus; - goto finishUnlock; - } - if(r != NULL) { - t1 = r; - intStatus = U_USING_DEFAULT_WARNING; - hasRealData = TRUE; - } else { /* we don't even have the root locale */ - *status = U_MISSING_RESOURCE_ERROR; - goto finishUnlock; - } - } else if(!isRoot && uprv_strcmp(t1->fName, kRootLocaleName) != 0 && - t1->fParent == NULL && !r->fData.noFallback) { - if (!insertRootBundle(t1, status)) { - goto finishUnlock; - } - if(!hasRealData) { - r->fBogus = U_USING_DEFAULT_WARNING; - } - } - - // TODO: Does this ever loop? - while(r != NULL && !isRoot && t1->fParent != NULL) { - t1->fParent->fCountExisting++; - t1 = t1->fParent; - } - } /* umtx_lock */ -finishUnlock: - umtx_unlock(&resbMutex); - - if(U_SUCCESS(*status)) { - if(intStatus != U_ZERO_ERROR) { - *status = intStatus; - } - return r; - } else { - return NULL; - } -} - -/** - * Version of entryOpen() and findFirstExisting() for ures_openDirect(), - * with no fallbacks. - * Parent and root locale bundles are loaded if - * the requested bundle does not have the "nofallback" flag. - */ -static UResourceDataEntry * -entryOpenDirect(const char* path, const char* localeID, UErrorCode* status) { - initCache(status); - if(U_FAILURE(*status)) { - return NULL; - } - - umtx_lock(&resbMutex); - // findFirstExisting() without fallbacks. - UResourceDataEntry *r = init_entry(localeID, path, status); - if(U_SUCCESS(*status)) { - if(r->fBogus != U_ZERO_ERROR) { - r->fCountExisting--; - r = NULL; - } - } else { - r = NULL; - } - - // Some code depends on the ures_openDirect() bundle to have a parent bundle chain, - // unless it is marked with "nofallback". - UResourceDataEntry *t1 = r; - if(r != NULL && uprv_strcmp(localeID, kRootLocaleName) != 0 && // not root - r->fParent == NULL && !r->fData.noFallback && - uprv_strlen(localeID) < ULOC_FULLNAME_CAPACITY) { - char name[ULOC_FULLNAME_CAPACITY]; - uprv_strcpy(name, localeID); - if(!chopLocale(name) || uprv_strcmp(name, kRootLocaleName) == 0 || - loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), FALSE, NULL, status)) { - if(uprv_strcmp(t1->fName, kRootLocaleName) != 0 && t1->fParent == NULL) { - insertRootBundle(t1, status); - } - } - if(U_FAILURE(*status)) { - r = NULL; - } - } - - if(r != NULL) { - // TODO: Does this ever loop? - while(t1->fParent != NULL) { - t1->fParent->fCountExisting++; - t1 = t1->fParent; - } - } - umtx_unlock(&resbMutex); - return r; -} - -/** - * Functions to create and destroy resource bundles. - * CAUTION: resbMutex must be locked when calling this function. - */ -/* INTERNAL: */ -static void entryCloseInt(UResourceDataEntry *resB) { - UResourceDataEntry *p = resB; - - while(resB != NULL) { - p = resB->fParent; - resB->fCountExisting--; - - /* Entries are left in the cache. TODO: add ures_flushCache() to force a flush - of the cache. */ -/* - if(resB->fCountExisting <= 0) { - uhash_remove(cache, resB); - if(resB->fBogus == U_ZERO_ERROR) { - res_unload(&(resB->fData)); - } - if(resB->fName != NULL) { - uprv_free(resB->fName); - } - if(resB->fPath != NULL) { - uprv_free(resB->fPath); - } - uprv_free(resB); - } -*/ - - resB = p; - } -} - -/** - * API: closes a resource bundle and cleans up. - */ - -static void entryClose(UResourceDataEntry *resB) { - umtx_lock(&resbMutex); - entryCloseInt(resB); - umtx_unlock(&resbMutex); -} - -/* -U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd) { - if(resB->fResPath == NULL) { - resB->fResPath = resB->fResBuf; - *(resB->fResPath) = 0; - } - resB->fResPathLen = uprv_strlen(toAdd); - if(RES_BUFSIZE <= resB->fResPathLen+1) { - if(resB->fResPath == resB->fResBuf) { - resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char)); - } else { - resB->fResPath = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char)); - } - } - uprv_strcpy(resB->fResPath, toAdd); -} -*/ -static void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd, UErrorCode *status) { - int32_t resPathLenOrig = resB->fResPathLen; - if(resB->fResPath == NULL) { - resB->fResPath = resB->fResBuf; - *(resB->fResPath) = 0; - resB->fResPathLen = 0; - } - resB->fResPathLen += lenToAdd; - if(RES_BUFSIZE <= resB->fResPathLen+1) { - if(resB->fResPath == resB->fResBuf) { - resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char)); - /* Check that memory was allocated correctly. */ - if (resB->fResPath == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_strcpy(resB->fResPath, resB->fResBuf); - } else { - char *temp = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char)); - /* Check that memory was reallocated correctly. */ - if (temp == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return; - } - resB->fResPath = temp; - } - } - uprv_strcpy(resB->fResPath + resPathLenOrig, toAdd); -} - -static void ures_freeResPath(UResourceBundle *resB) { - if (resB->fResPath && resB->fResPath != resB->fResBuf) { - uprv_free(resB->fResPath); - } - resB->fResPath = NULL; - resB->fResPathLen = 0; -} - -static void -ures_closeBundle(UResourceBundle* resB, UBool freeBundleObj) -{ - if(resB != NULL) { - if(resB->fData != NULL) { - entryClose(resB->fData); - } - if(resB->fVersion != NULL) { - uprv_free(resB->fVersion); - } - ures_freeResPath(resB); - - if(ures_isStackObject(resB) == FALSE && freeBundleObj) { - uprv_free(resB); - } -#if 0 /*U_DEBUG*/ - else { - /* poison the data */ - uprv_memset(resB, -1, sizeof(UResourceBundle)); - } -#endif - } -} - -U_CAPI void U_EXPORT2 -ures_close(UResourceBundle* resB) -{ - ures_closeBundle(resB, TRUE); -} - -static UResourceBundle *init_resb_result(const ResourceData *rdata, Resource r, - const char *key, int32_t idx, UResourceDataEntry *realData, - const UResourceBundle *parent, int32_t noAlias, - UResourceBundle *resB, UErrorCode *status) -{ - if(status == NULL || U_FAILURE(*status)) { - return resB; - } - if (parent == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - if(RES_GET_TYPE(r) == URES_ALIAS) { /* This is an alias, need to exchange with real data */ - if(noAlias < URES_MAX_ALIAS_LEVEL) { - int32_t len = 0; - const UChar *alias = res_getAlias(rdata, r, &len); - if(len > 0) { - /* we have an alias, now let's cut it up */ - char stackAlias[200]; - char *chAlias = NULL, *path = NULL, *locale = NULL, *keyPath = NULL; - int32_t capacity; - - /* - * Allocate enough space for both the char * version - * of the alias and parent->fResPath. - * - * We do this so that res_findResource() can modify the path, - * which allows us to remove redundant _res_findResource() variants - * in uresdata.c. - * res_findResource() now NUL-terminates each segment so that table keys - * can always be compared with strcmp() instead of strncmp(). - * Saves code there and simplifies testing and code coverage. - * - * markus 2003oct17 - */ - ++len; /* count the terminating NUL */ - if(parent->fResPath != NULL) { - capacity = (int32_t)uprv_strlen(parent->fResPath) + 1; - } else { - capacity = 0; - } - if(capacity < len) { - capacity = len; - } - if(capacity <= (int32_t)sizeof(stackAlias)) { - capacity = (int32_t)sizeof(stackAlias); - chAlias = stackAlias; - } else { - chAlias = (char *)uprv_malloc(capacity); - /* test for NULL */ - if(chAlias == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - } - u_UCharsToChars(alias, chAlias, len); - - if(*chAlias == RES_PATH_SEPARATOR) { - /* there is a path included */ - locale = uprv_strchr(chAlias+1, RES_PATH_SEPARATOR); - if(locale == NULL) { - locale = uprv_strchr(chAlias, 0); /* avoid locale == NULL to make code below work */ - } else { - *locale = 0; - locale++; - } - path = chAlias+1; - if(uprv_strcmp(path, "LOCALE") == 0) { - /* this is an XPath alias, starting with "/LOCALE/" */ - /* it contains the path to a resource which should be looked up */ - /* starting in the requested locale */ - keyPath = locale; - locale = parent->fTopLevelData->fName; /* this is the requested locale's name */ - path = realData->fPath; /* we will be looking in the same package */ - } else { - if(uprv_strcmp(path, "ICUDATA") == 0) { /* want ICU data */ - path = NULL; - } - keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR); - if(keyPath) { - *keyPath = 0; - keyPath++; - } - } - } else { - /* no path, start with a locale */ - locale = chAlias; - keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR); - if(keyPath) { - *keyPath = 0; - keyPath++; - } - path = realData->fPath; - } - - - { - /* got almost everything, let's try to open */ - /* first, open the bundle with real data */ - UResourceBundle *result = resB; - const char* temp = NULL; - UErrorCode intStatus = U_ZERO_ERROR; - UResourceBundle *mainRes = ures_openDirect(path, locale, &intStatus); - if(U_SUCCESS(intStatus)) { - if(keyPath == NULL) { - /* no key path. This means that we are going to - * to use the corresponding resource from - * another bundle - */ - /* first, we are going to get a corresponding parent - * resource to the one we are searching. - */ - char *aKey = parent->fResPath; - if(aKey) { - uprv_strcpy(chAlias, aKey); /* allocated large enough above */ - aKey = chAlias; - r = res_findResource(&(mainRes->fResData), mainRes->fRes, &aKey, &temp); - } else { - r = mainRes->fRes; - } - if(key) { - /* we need to make keyPath from parent's fResPath and - * current key, if there is a key associated - */ - len = (int32_t)(uprv_strlen(key) + 1); - if(len > capacity) { - capacity = len; - if(chAlias == stackAlias) { - chAlias = (char *)uprv_malloc(capacity); - } else { - chAlias = (char *)uprv_realloc(chAlias, capacity); - } - if(chAlias == NULL) { - ures_close(mainRes); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - } - uprv_memcpy(chAlias, key, len); - aKey = chAlias; - r = res_findResource(&(mainRes->fResData), r, &aKey, &temp); - } else if(idx != -1) { - /* if there is no key, but there is an index, try to get by the index */ - /* here we have either a table or an array, so get the element */ - int32_t type = RES_GET_TYPE(r); - if(URES_IS_TABLE(type)) { - r = res_getTableItemByIndex(&(mainRes->fResData), r, idx, (const char **)&aKey); - } else { /* array */ - r = res_getArrayItem(&(mainRes->fResData), r, idx); - } - } - if(r != RES_BOGUS) { - result = init_resb_result(&(mainRes->fResData), r, temp, -1, mainRes->fData, mainRes, noAlias+1, resB, status); - } else { - *status = U_MISSING_RESOURCE_ERROR; - result = resB; - } - } else { - /* this one is a bit trickier. - * we start finding keys, but after we resolve one alias, the path might continue. - * Consider: - * aliastest:alias { "testtypes/anotheralias/Sequence" } - * anotheralias:alias { "/ICUDATA/sh/CollationElements" } - * aliastest resource should finally have the sequence, not collation elements. - */ - UResourceDataEntry *dataEntry = mainRes->fData; - char stackPath[URES_MAX_BUFFER_SIZE]; - char *pathBuf = stackPath, *myPath = pathBuf; - if(uprv_strlen(keyPath) > URES_MAX_BUFFER_SIZE) { - pathBuf = (char *)uprv_malloc((uprv_strlen(keyPath)+1)*sizeof(char)); - if(pathBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - ures_close(mainRes); - return NULL; - } - } - uprv_strcpy(pathBuf, keyPath); - result = mainRes; - /* now we have fallback following here */ - do { - r = dataEntry->fData.rootRes; - /* this loop handles 'found' resources over several levels */ - while(*myPath && U_SUCCESS(*status)) { - r = res_findResource(&(dataEntry->fData), r, &myPath, &temp); - if(r != RES_BOGUS) { /* found a resource, but it might be an indirection */ - resB = init_resb_result(&(dataEntry->fData), r, temp, -1, dataEntry, result, noAlias+1, resB, status); - result = resB; - if(result) { - r = result->fRes; /* switch to a new resource, possibly a new tree */ - dataEntry = result->fData; - } - } else { /* no resource found, we don't really want to look anymore on this level */ - break; - } - } - dataEntry = dataEntry->fParent; - uprv_strcpy(pathBuf, keyPath); - myPath = pathBuf; - } while(r == RES_BOGUS && dataEntry != NULL); - if(r == RES_BOGUS) { - *status = U_MISSING_RESOURCE_ERROR; - result = resB; - } - if(pathBuf != stackPath) { - uprv_free(pathBuf); - } - } - } else { /* we failed to open the resource we're aliasing to */ - *status = intStatus; - } - if(chAlias != stackAlias) { - uprv_free(chAlias); - } - if(mainRes != result) { - ures_close(mainRes); - } - return result; - } - } else { - /* bad alias, should be an error */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return resB; - } - } else { - *status = U_TOO_MANY_ALIASES_ERROR; - return resB; - } - } - if(resB == NULL) { - resB = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle)); - /* test for NULL */ - if (resB == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - ures_setIsStackObject(resB, FALSE); - resB->fResPath = NULL; - resB->fResPathLen = 0; - } else { - if(resB->fData != NULL) { - entryClose(resB->fData); - } - if(resB->fVersion != NULL) { - uprv_free(resB->fVersion); - } - /* - weiv: if stack object was passed in, it doesn't really need to be reinited, - since the purpose of initing is to remove stack junk. However, at this point - we would not do anything to an allocated object, so stack object should be - treated the same - */ - /* - if(ures_isStackObject(resB) != FALSE) { - ures_initStackObject(resB); - } - */ - if(parent != resB) { - ures_freeResPath(resB); - } - } - resB->fData = realData; - entryIncrease(resB->fData); - resB->fHasFallback = FALSE; - resB->fIsTopLevel = FALSE; - resB->fIndex = -1; - resB->fKey = key; - /*resB->fParentRes = parent;*/ - resB->fTopLevelData = parent->fTopLevelData; - if(parent->fResPath && parent != resB) { - ures_appendResPath(resB, parent->fResPath, parent->fResPathLen, status); - } - if(key != NULL) { - ures_appendResPath(resB, key, (int32_t)uprv_strlen(key), status); - if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) { - ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status); - } - } else if(idx >= 0) { - char buf[256]; - int32_t len = T_CString_integerToString(buf, idx, 10); - ures_appendResPath(resB, buf, len, status); - if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) { - ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status); - } - } - /* Make sure that Purify doesn't complain about uninitialized memory copies. */ - { - int32_t usedLen = ((resB->fResBuf == resB->fResPath) ? resB->fResPathLen : 0); - uprv_memset(resB->fResBuf + usedLen, 0, sizeof(resB->fResBuf) - usedLen); - } - - resB->fVersion = NULL; - resB->fRes = r; - /*resB->fParent = parent->fRes;*/ - uprv_memmove(&resB->fResData, rdata, sizeof(ResourceData)); - resB->fSize = res_countArrayItems(&(resB->fResData), resB->fRes); - return resB; -} - -UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status) { - UBool isStackObject; - if(U_FAILURE(*status) || r == original) { - return r; - } - if(original != NULL) { - if(r == NULL) { - isStackObject = FALSE; - r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle)); - /* test for NULL */ - if (r == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - } else { - isStackObject = ures_isStackObject(r); - ures_closeBundle(r, FALSE); - } - uprv_memcpy(r, original, sizeof(UResourceBundle)); - r->fResPath = NULL; - r->fResPathLen = 0; - if(original->fResPath) { - ures_appendResPath(r, original->fResPath, original->fResPathLen, status); - } - ures_setIsStackObject(r, isStackObject); - if(r->fData != NULL) { - entryIncrease(r->fData); - } - } - return r; -} - -/** - * Functions to retrieve data from resource bundles. - */ - -U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resB, int32_t* len, UErrorCode* status) { - const UChar *s; - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - s = res_getString(&(resB->fResData), resB->fRes, len); - if (s == NULL) { - *status = U_RESOURCE_TYPE_MISMATCH; - } - return s; -} - -static const char * -ures_toUTF8String(const UChar *s16, int32_t length16, - char *dest, int32_t *pLength, - UBool forceCopy, - UErrorCode *status) { - int32_t capacity; - - if (U_FAILURE(*status)) { - return NULL; - } - if (pLength != NULL) { - capacity = *pLength; - } else { - capacity = 0; - } - if (capacity < 0 || (capacity > 0 && dest == NULL)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if (length16 == 0) { - /* empty string, return as read-only pointer */ - if (pLength != NULL) { - *pLength = 0; - } - if (forceCopy) { - u_terminateChars(dest, capacity, 0, status); - return dest; - } else { - return ""; - } - } else { - /* We need to transform the string to the destination buffer. */ - if (capacity < length16) { - /* No chance for the string to fit. Pure preflighting. */ - return u_strToUTF8(NULL, 0, pLength, s16, length16, status); - } - if (!forceCopy && (length16 <= 0x2aaaaaaa)) { - /* - * We know the string will fit into dest because each UChar turns - * into at most three UTF-8 bytes. Fill the latter part of dest - * so that callers do not expect to use dest as a string pointer, - * hopefully leading to more robust code for when resource bundles - * may store UTF-8 natively. - * (In which case dest would not be used at all.) - * - * We do not do this if forceCopy=TRUE because then the caller - * expects the string to start exactly at dest. - * - * The test above for <= 0x2aaaaaaa prevents overflows. - * The +1 is for the NUL terminator. - */ - int32_t maxLength = 3 * length16 + 1; - if (capacity > maxLength) { - dest += capacity - maxLength; - capacity = maxLength; - } - } - return u_strToUTF8(dest, capacity, pLength, s16, length16, status); - } -} - -U_CAPI const char * U_EXPORT2 -ures_getUTF8String(const UResourceBundle *resB, - char *dest, int32_t *pLength, - UBool forceCopy, - UErrorCode *status) { - int32_t length16; - const UChar *s16 = ures_getString(resB, &length16, status); - return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status); -} - -U_CAPI const uint8_t* U_EXPORT2 ures_getBinary(const UResourceBundle* resB, int32_t* len, - UErrorCode* status) { - const uint8_t *p; - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - p = res_getBinary(&(resB->fResData), resB->fRes, len); - if (p == NULL) { - *status = U_RESOURCE_TYPE_MISMATCH; - } - return p; -} - -U_CAPI const int32_t* U_EXPORT2 ures_getIntVector(const UResourceBundle* resB, int32_t* len, - UErrorCode* status) { - const int32_t *p; - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - p = res_getIntVector(&(resB->fResData), resB->fRes, len); - if (p == NULL) { - *status = U_RESOURCE_TYPE_MISMATCH; - } - return p; -} - -/* this function returns a signed integer */ -/* it performs sign extension */ -U_CAPI int32_t U_EXPORT2 ures_getInt(const UResourceBundle* resB, UErrorCode *status) { - if (status==NULL || U_FAILURE(*status)) { - return 0xffffffff; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0xffffffff; - } - if(RES_GET_TYPE(resB->fRes) != URES_INT) { - *status = U_RESOURCE_TYPE_MISMATCH; - return 0xffffffff; - } - return RES_GET_INT(resB->fRes); -} - -U_CAPI uint32_t U_EXPORT2 ures_getUInt(const UResourceBundle* resB, UErrorCode *status) { - if (status==NULL || U_FAILURE(*status)) { - return 0xffffffff; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0xffffffff; - } - if(RES_GET_TYPE(resB->fRes) != URES_INT) { - *status = U_RESOURCE_TYPE_MISMATCH; - return 0xffffffff; - } - return RES_GET_UINT(resB->fRes); -} - -U_CAPI UResType U_EXPORT2 ures_getType(const UResourceBundle *resB) { - if(resB == NULL) { - return URES_NONE; - } - return res_getPublicType(resB->fRes); -} - -U_CAPI const char * U_EXPORT2 ures_getKey(const UResourceBundle *resB) { - if(resB == NULL) { - return NULL; - } - - return(resB->fKey); -} - -U_CAPI int32_t U_EXPORT2 ures_getSize(const UResourceBundle *resB) { - if(resB == NULL) { - return 0; - } - - return resB->fSize; -} - -static const UChar* ures_getStringWithAlias(const UResourceBundle *resB, Resource r, int32_t sIndex, int32_t *len, UErrorCode *status) { - if(RES_GET_TYPE(r) == URES_ALIAS) { - const UChar* result = 0; - UResourceBundle *tempRes = ures_getByIndex(resB, sIndex, NULL, status); - result = ures_getString(tempRes, len, status); - ures_close(tempRes); - return result; - } else { - return res_getString(&(resB->fResData), r, len); - } -} - -U_CAPI void U_EXPORT2 ures_resetIterator(UResourceBundle *resB){ - if(resB == NULL) { - return; - } - resB->fIndex = -1; -} - -U_CAPI UBool U_EXPORT2 ures_hasNext(const UResourceBundle *resB) { - if(resB == NULL) { - return FALSE; - } - return (UBool)(resB->fIndex < resB->fSize-1); -} - -U_CAPI const UChar* U_EXPORT2 ures_getNextString(UResourceBundle *resB, int32_t* len, const char ** key, UErrorCode *status) { - Resource r = RES_BOGUS; - - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(resB->fIndex == resB->fSize-1) { - *status = U_INDEX_OUTOFBOUNDS_ERROR; - } else { - resB->fIndex++; - switch(RES_GET_TYPE(resB->fRes)) { - case URES_STRING: - case URES_STRING_V2: - return res_getString(&(resB->fResData), resB->fRes, len); - case URES_TABLE: - case URES_TABLE16: - case URES_TABLE32: - r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, key); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return ures_getStringWithAlias(resB, r, resB->fIndex, len, status); - case URES_ARRAY: - case URES_ARRAY16: - r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return ures_getStringWithAlias(resB, r, resB->fIndex, len, status); - case URES_ALIAS: - return ures_getStringWithAlias(resB, resB->fRes, resB->fIndex, len, status); - case URES_INT: - case URES_BINARY: - case URES_INT_VECTOR: - *status = U_RESOURCE_TYPE_MISMATCH; - U_FALLTHROUGH; - default: - return NULL; - } - } - - return NULL; -} - -U_CAPI UResourceBundle* U_EXPORT2 ures_getNextResource(UResourceBundle *resB, UResourceBundle *fillIn, UErrorCode *status) { - const char *key = NULL; - Resource r = RES_BOGUS; - - if (status==NULL || U_FAILURE(*status)) { - /*return NULL;*/ - return fillIn; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - /*return NULL;*/ - return fillIn; - } - - if(resB->fIndex == resB->fSize-1) { - *status = U_INDEX_OUTOFBOUNDS_ERROR; - /*return NULL;*/ - } else { - resB->fIndex++; - switch(RES_GET_TYPE(resB->fRes)) { - case URES_INT: - case URES_BINARY: - case URES_STRING: - case URES_STRING_V2: - case URES_INT_VECTOR: - return ures_copyResb(fillIn, resB, status); - case URES_TABLE: - case URES_TABLE16: - case URES_TABLE32: - r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, &key); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status); - case URES_ARRAY: - case URES_ARRAY16: - r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status); - default: - /*return NULL;*/ - return fillIn; - } - } - /*return NULL;*/ - return fillIn; -} - -U_CAPI UResourceBundle* U_EXPORT2 ures_getByIndex(const UResourceBundle *resB, int32_t indexR, UResourceBundle *fillIn, UErrorCode *status) { - const char* key = NULL; - Resource r = RES_BOGUS; - - if (status==NULL || U_FAILURE(*status)) { - /*return NULL;*/ - return fillIn; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - /*return NULL;*/ - return fillIn; - } - - if(indexR >= 0 && resB->fSize > indexR) { - switch(RES_GET_TYPE(resB->fRes)) { - case URES_INT: - case URES_BINARY: - case URES_STRING: - case URES_STRING_V2: - case URES_INT_VECTOR: - return ures_copyResb(fillIn, resB, status); - case URES_TABLE: - case URES_TABLE16: - case URES_TABLE32: - r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexR, &key); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status); - case URES_ARRAY: - case URES_ARRAY16: - r = res_getArrayItem(&(resB->fResData), resB->fRes, indexR); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status); - default: - /*return NULL;*/ - return fillIn; - } - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - /*return NULL;*/ - return fillIn; -} - -U_CAPI const UChar* U_EXPORT2 ures_getStringByIndex(const UResourceBundle *resB, int32_t indexS, int32_t* len, UErrorCode *status) { - const char* key = NULL; - Resource r = RES_BOGUS; - - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(indexS >= 0 && resB->fSize > indexS) { - switch(RES_GET_TYPE(resB->fRes)) { - case URES_STRING: - case URES_STRING_V2: - return res_getString(&(resB->fResData), resB->fRes, len); - case URES_TABLE: - case URES_TABLE16: - case URES_TABLE32: - r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexS, &key); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return ures_getStringWithAlias(resB, r, indexS, len, status); - case URES_ARRAY: - case URES_ARRAY16: - r = res_getArrayItem(&(resB->fResData), resB->fRes, indexS); - if(r == RES_BOGUS && resB->fHasFallback) { - /* TODO: do the fallback */ - } - return ures_getStringWithAlias(resB, r, indexS, len, status); - case URES_ALIAS: - return ures_getStringWithAlias(resB, resB->fRes, indexS, len, status); - case URES_INT: - case URES_BINARY: - case URES_INT_VECTOR: - *status = U_RESOURCE_TYPE_MISMATCH; - break; - default: - /* must not occur */ - *status = U_INTERNAL_PROGRAM_ERROR; - break; - } - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - return NULL; -} - -U_CAPI const char * U_EXPORT2 -ures_getUTF8StringByIndex(const UResourceBundle *resB, - int32_t idx, - char *dest, int32_t *pLength, - UBool forceCopy, - UErrorCode *status) { - int32_t length16; - const UChar *s16 = ures_getStringByIndex(resB, idx, &length16, status); - return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status); -} - -/*U_CAPI const char *ures_getResPath(UResourceBundle *resB) { - return resB->fResPath; -}*/ - -U_CAPI UResourceBundle* U_EXPORT2 -ures_findResource(const char* path, UResourceBundle *fillIn, UErrorCode *status) -{ - UResourceBundle *first = NULL; - UResourceBundle *result = fillIn; - char *packageName = NULL; - char *pathToResource = NULL, *save = NULL; - char *locale = NULL, *localeEnd = NULL; - int32_t length; - - if(status == NULL || U_FAILURE(*status)) { - return result; - } - - length = (int32_t)(uprv_strlen(path)+1); - save = pathToResource = (char *)uprv_malloc(length*sizeof(char)); - /* test for NULL */ - if(pathToResource == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return result; - } - uprv_memcpy(pathToResource, path, length); - - locale = pathToResource; - if(*pathToResource == RES_PATH_SEPARATOR) { /* there is a path specification */ - pathToResource++; - packageName = pathToResource; - pathToResource = uprv_strchr(pathToResource, RES_PATH_SEPARATOR); - if(pathToResource == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - *pathToResource = 0; - locale = pathToResource+1; - } - } - - localeEnd = uprv_strchr(locale, RES_PATH_SEPARATOR); - if(localeEnd != NULL) { - *localeEnd = 0; - } - - first = ures_open(packageName, locale, status); - - if(U_SUCCESS(*status)) { - if(localeEnd) { - result = ures_findSubResource(first, localeEnd+1, fillIn, status); - } else { - result = ures_copyResb(fillIn, first, status); - } - ures_close(first); - } - uprv_free(save); - return result; -} - -U_CAPI UResourceBundle* U_EXPORT2 -ures_findSubResource(const UResourceBundle *resB, char* path, UResourceBundle *fillIn, UErrorCode *status) -{ - Resource res = RES_BOGUS; - UResourceBundle *result = fillIn; - const char *key; - - if(status == NULL || U_FAILURE(*status)) { - return result; - } - - /* here we do looping and circular alias checking */ - /* this loop is here because aliasing is resolved on this level, not on res level */ - /* so, when we encounter an alias, it is not an aggregate resource, so we return */ - do { - res = res_findResource(&(resB->fResData), resB->fRes, &path, &key); - if(res != RES_BOGUS) { - result = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status); - resB = result; - } else { - *status = U_MISSING_RESOURCE_ERROR; - break; - } - } while(*path); /* there is more stuff in the path */ - - return result; -} -U_INTERNAL const UChar* U_EXPORT2 -ures_getStringByKeyWithFallback(const UResourceBundle *resB, - const char* inKey, - int32_t* len, - UErrorCode *status) { - - UResourceBundle stack; - const UChar* retVal = NULL; - ures_initStackObject(&stack); - ures_getByKeyWithFallback(resB, inKey, &stack, status); - int32_t length; - retVal = ures_getString(&stack, &length, status); - ures_close(&stack); - if (U_FAILURE(*status)) { - return NULL; - } - if (length == 3 && retVal[0] == EMPTY_SET && retVal[1] == EMPTY_SET && retVal[2] == EMPTY_SET ) { - retVal = NULL; - length = 0; - *status = U_MISSING_RESOURCE_ERROR; - } - if (len != NULL) { - *len = length; - } - return retVal; -} - -/* - Like res_getTableItemByKey but accepts full paths like "NumberElements/latn/patternsShort". -*/ -static Resource getTableItemByKeyPath(const ResourceData *pResData, Resource table, const char *key) { - Resource resource = table; /* The current resource */ - icu::CharString path; - UErrorCode errorCode = U_ZERO_ERROR; - path.append(key, errorCode); - if (U_FAILURE(errorCode)) { return RES_BOGUS; } - char *pathPart = path.data(); /* Path from current resource to desired resource */ - UResType type = (UResType)RES_GET_TYPE(resource); /* the current resource type */ - while (*pathPart && resource != RES_BOGUS && URES_IS_CONTAINER(type)) { - char *nextPathPart = uprv_strchr(pathPart, RES_PATH_SEPARATOR); - if (nextPathPart != NULL) { - *nextPathPart = 0; /* Terminating null for this part of path. */ - nextPathPart++; - } else { - nextPathPart = uprv_strchr(pathPart, 0); - } - int32_t t; - const char *pathP = pathPart; - resource = res_getTableItemByKey(pResData, resource, &t, &pathP); - type = (UResType)RES_GET_TYPE(resource); - pathPart = nextPathPart; - } - if (*pathPart) { - return RES_BOGUS; - } - return resource; -} - -U_CAPI UResourceBundle* U_EXPORT2 -ures_getByKeyWithFallback(const UResourceBundle *resB, - const char* inKey, - UResourceBundle *fillIn, - UErrorCode *status) { - Resource res = RES_BOGUS, rootRes = RES_BOGUS; - /*UResourceDataEntry *realData = NULL;*/ - UResourceBundle *helper = NULL; - - if (status==NULL || U_FAILURE(*status)) { - return fillIn; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return fillIn; - } - - int32_t type = RES_GET_TYPE(resB->fRes); - if(URES_IS_TABLE(type)) { - res = getTableItemByKeyPath(&(resB->fResData), resB->fRes, inKey); - const char* key = inKey; - if(res == RES_BOGUS) { - UResourceDataEntry *dataEntry = resB->fData; - CharString path; - char *myPath = NULL; - const char* resPath = resB->fResPath; - int32_t len = resB->fResPathLen; - while(res == RES_BOGUS && dataEntry->fParent != NULL) { /* Otherwise, we'll look in parents */ - dataEntry = dataEntry->fParent; - rootRes = dataEntry->fData.rootRes; - - if(dataEntry->fBogus == U_ZERO_ERROR) { - path.clear(); - if (len > 0) { - path.append(resPath, len, *status); - } - path.append(inKey, *status); - if (U_FAILURE(*status)) { - ures_close(helper); - return fillIn; - } - myPath = path.data(); - key = inKey; - do { - res = res_findResource(&(dataEntry->fData), rootRes, &myPath, &key); - if (RES_GET_TYPE(res) == URES_ALIAS && *myPath) { - /* We hit an alias, but we didn't finish following the path. */ - helper = init_resb_result(&(dataEntry->fData), res, NULL, -1, dataEntry, resB, 0, helper, status); - /*helper = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, helper, status);*/ - if(helper) { - dataEntry = helper->fData; - rootRes = helper->fRes; - resPath = helper->fResPath; - len = helper->fResPathLen; - - } else { - break; - } - } - } while(*myPath); /* Continue until the whole path is consumed */ - } - } - /*const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);*/ - if(res != RES_BOGUS) { - /* check if resB->fResPath gives the right name here */ - if(uprv_strcmp(dataEntry->fName, uloc_getDefault())==0 || uprv_strcmp(dataEntry->fName, kRootLocaleName)==0) { - *status = U_USING_DEFAULT_WARNING; - } else { - *status = U_USING_FALLBACK_WARNING; - } - - fillIn = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, fillIn, status); - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - } else { - fillIn = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status); - } - } - else { - *status = U_RESOURCE_TYPE_MISMATCH; - } - ures_close(helper); - return fillIn; -} - -namespace { - -void getAllItemsWithFallback( - const UResourceBundle *bundle, ResourceDataValue &value, - ResourceSink &sink, - UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return; } - // We recursively enumerate child-first, - // only storing parent items in the absence of child items. - // The sink needs to store a placeholder value for the no-fallback/no-inheritance marker - // to prevent a parent item from being stored. - // - // It would be possible to recursively enumerate parent-first, - // overriding parent items with child items. - // When the sink sees the no-fallback/no-inheritance marker, - // then it would remove the parent's item. - // We would deserialize parent values even though they are overridden in a child bundle. - value.pResData = &bundle->fResData; - UResourceDataEntry *parentEntry = bundle->fData->fParent; - UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus); - value.setResource(bundle->fRes); - sink.put(bundle->fKey, value, !hasParent, errorCode); - if (hasParent) { - // We might try to query the sink whether - // any fallback from the parent bundle is still possible. - - // Turn the parent UResourceDataEntry into a UResourceBundle, - // much like in ures_openWithType(). - // TODO: See if we can refactor ures_getByKeyWithFallback() - // and pull out an inner function that takes and returns a UResourceDataEntry - // so that we need not create UResourceBundle objects. - UResourceBundle parentBundle; - ures_initStackObject(&parentBundle); - parentBundle.fTopLevelData = parentBundle.fData = parentEntry; - // TODO: What is the difference between bundle fData and fTopLevelData? - uprv_memcpy(&parentBundle.fResData, &parentEntry->fData, sizeof(ResourceData)); - // TODO: Try to replace bundle.fResData with just using bundle.fData->fData. - parentBundle.fHasFallback = !parentBundle.fResData.noFallback; - parentBundle.fIsTopLevel = TRUE; - parentBundle.fRes = parentBundle.fResData.rootRes; - parentBundle.fSize = res_countArrayItems(&(parentBundle.fResData), parentBundle.fRes); - parentBundle.fIndex = -1; - entryIncrease(parentEntry); - - // Look up the container item in the parent bundle. - UResourceBundle containerBundle; - ures_initStackObject(&containerBundle); - const UResourceBundle *rb; - UErrorCode pathErrorCode = U_ZERO_ERROR; // Ignore if parents up to root do not have this path. - if (bundle->fResPath == NULL || *bundle->fResPath == 0) { - rb = &parentBundle; - } else { - rb = ures_getByKeyWithFallback(&parentBundle, bundle->fResPath, - &containerBundle, &pathErrorCode); - } - if (U_SUCCESS(pathErrorCode)) { - getAllItemsWithFallback(rb, value, sink, errorCode); - } - ures_close(&containerBundle); - ures_close(&parentBundle); - } -} - -} // namespace - -U_CAPI void U_EXPORT2 -ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path, - icu::ResourceSink &sink, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return; } - if (path == NULL) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - UResourceBundle stackBundle; - ures_initStackObject(&stackBundle); - const UResourceBundle *rb; - if (*path == 0) { - // empty path - rb = bundle; - } else { - rb = ures_getByKeyWithFallback(bundle, path, &stackBundle, &errorCode); - if (U_FAILURE(errorCode)) { - ures_close(&stackBundle); - return; - } - } - // Get all table items with fallback. - ResourceDataValue value; - getAllItemsWithFallback(rb, value, sink, errorCode); - ures_close(&stackBundle); -} - -U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) { - Resource res = RES_BOGUS; - UResourceDataEntry *realData = NULL; - const char *key = inKey; - - if (status==NULL || U_FAILURE(*status)) { - return fillIn; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return fillIn; - } - - int32_t type = RES_GET_TYPE(resB->fRes); - if(URES_IS_TABLE(type)) { - int32_t t; - res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key); - if(res == RES_BOGUS) { - key = inKey; - if(resB->fHasFallback == TRUE) { - const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); - if(U_SUCCESS(*status)) { - /* check if resB->fResPath gives the right name here */ - return init_resb_result(rd, res, key, -1, realData, resB, 0, fillIn, status); - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - } else { - return init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status); - } - } -#if 0 - /* this is a kind of TODO item. If we have an array with an index table, we could do this. */ - /* not currently */ - else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) { - /* here should go a first attempt to locate the key using index table */ - const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); - if(U_SUCCESS(*status)) { - return init_resb_result(rd, res, key, realData, resB, fillIn, status); - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - } -#endif - else { - *status = U_RESOURCE_TYPE_MISMATCH; - } - return fillIn; -} - -U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, const char* inKey, int32_t* len, UErrorCode *status) { - Resource res = RES_BOGUS; - UResourceDataEntry *realData = NULL; - const char* key = inKey; - - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if(resB == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - int32_t type = RES_GET_TYPE(resB->fRes); - if(URES_IS_TABLE(type)) { - int32_t t=0; - - res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key); - - if(res == RES_BOGUS) { - key = inKey; - if(resB->fHasFallback == TRUE) { - const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); - if(U_SUCCESS(*status)) { - switch (RES_GET_TYPE(res)) { - case URES_STRING: - case URES_STRING_V2: - return res_getString(rd, res, len); - case URES_ALIAS: - { - const UChar* result = 0; - UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status); - result = ures_getString(tempRes, len, status); - ures_close(tempRes); - return result; - } - default: - *status = U_RESOURCE_TYPE_MISMATCH; - } - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - } else { - switch (RES_GET_TYPE(res)) { - case URES_STRING: - case URES_STRING_V2: - return res_getString(&(resB->fResData), res, len); - case URES_ALIAS: - { - const UChar* result = 0; - UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status); - result = ures_getString(tempRes, len, status); - ures_close(tempRes); - return result; - } - default: - *status = U_RESOURCE_TYPE_MISMATCH; - } - } - } -#if 0 - /* this is a kind of TODO item. If we have an array with an index table, we could do this. */ - /* not currently */ - else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) { - /* here should go a first attempt to locate the key using index table */ - const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); - if(U_SUCCESS(*status)) { - return res_getString(rd, res, len); - } else { - *status = U_MISSING_RESOURCE_ERROR; - } - } -#endif - else { - *status = U_RESOURCE_TYPE_MISMATCH; - } - return NULL; -} - -U_CAPI const char * U_EXPORT2 -ures_getUTF8StringByKey(const UResourceBundle *resB, - const char *key, - char *dest, int32_t *pLength, - UBool forceCopy, - UErrorCode *status) { - int32_t length16; - const UChar *s16 = ures_getStringByKey(resB, key, &length16, status); - return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status); -} - -/* TODO: clean from here down */ - -/** - * INTERNAL: Get the name of the first real locale (not placeholder) - * that has resource bundle data. - */ -U_INTERNAL const char* U_EXPORT2 -ures_getLocaleInternal(const UResourceBundle* resourceBundle, UErrorCode* status) -{ - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if (!resourceBundle) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } else { - return resourceBundle->fData->fName; - } -} - -U_CAPI const char* U_EXPORT2 -ures_getLocale(const UResourceBundle* resourceBundle, - UErrorCode* status) -{ - return ures_getLocaleInternal(resourceBundle, status); -} - - -U_CAPI const char* U_EXPORT2 -ures_getLocaleByType(const UResourceBundle* resourceBundle, - ULocDataLocaleType type, - UErrorCode* status) { - if (status==NULL || U_FAILURE(*status)) { - return NULL; - } - if (!resourceBundle) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } else { - switch(type) { - case ULOC_ACTUAL_LOCALE: - return resourceBundle->fData->fName; - case ULOC_VALID_LOCALE: - return resourceBundle->fTopLevelData->fName; - case ULOC_REQUESTED_LOCALE: - default: - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - } -} - -U_CFUNC const char* ures_getName(const UResourceBundle* resB) { - if(resB == NULL) { - return NULL; - } - - return resB->fData->fName; -} - -#ifdef URES_DEBUG -U_CFUNC const char* ures_getPath(const UResourceBundle* resB) { - if(resB == NULL) { - return NULL; - } - - return resB->fData->fPath; -} -#endif - -static UResourceBundle* -ures_openWithType(UResourceBundle *r, const char* path, const char* localeID, - UResOpenType openType, UErrorCode* status) { - if(U_FAILURE(*status)) { - return NULL; - } - - UResourceDataEntry *entry; - if(openType != URES_OPEN_DIRECT) { - /* first "canonicalize" the locale ID */ - char canonLocaleID[ULOC_FULLNAME_CAPACITY]; - uloc_getBaseName(localeID, canonLocaleID, UPRV_LENGTHOF(canonLocaleID), status); - if(U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - entry = entryOpen(path, canonLocaleID, openType, status); - } else { - entry = entryOpenDirect(path, localeID, status); - } - if(U_FAILURE(*status)) { - return NULL; - } - if(entry == NULL) { - *status = U_MISSING_RESOURCE_ERROR; - return NULL; - } - - UBool isStackObject; - if(r == NULL) { - r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle)); - if(r == NULL) { - entryClose(entry); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - isStackObject = FALSE; - } else { // fill-in - isStackObject = ures_isStackObject(r); - ures_closeBundle(r, FALSE); - } - uprv_memset(r, 0, sizeof(UResourceBundle)); - ures_setIsStackObject(r, isStackObject); - - r->fTopLevelData = r->fData = entry; - uprv_memcpy(&r->fResData, &entry->fData, sizeof(ResourceData)); - r->fHasFallback = openType != URES_OPEN_DIRECT && !r->fResData.noFallback; - r->fIsTopLevel = TRUE; - r->fRes = r->fResData.rootRes; - r->fSize = res_countArrayItems(&(r->fResData), r->fRes); - r->fIndex = -1; - - return r; -} - -U_CAPI UResourceBundle* U_EXPORT2 -ures_open(const char* path, const char* localeID, UErrorCode* status) { - return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status); -} - -U_CAPI UResourceBundle* U_EXPORT2 -ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status) { - return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_ROOT, status); -} - -/** - * Opens a resource bundle without "canonicalizing" the locale name. No fallback will be performed - * or sought. However, alias substitution will happen! - */ -U_CAPI UResourceBundle* U_EXPORT2 -ures_openDirect(const char* path, const char* localeID, UErrorCode* status) { - return ures_openWithType(NULL, path, localeID, URES_OPEN_DIRECT, status); -} - -/** - * API: This function is used to open a resource bundle - * proper fallback chaining is executed while initialization. - * The result is stored in cache for later fallback search. - */ -U_CAPI void U_EXPORT2 -ures_openFillIn(UResourceBundle *r, const char* path, - const char* localeID, UErrorCode* status) { - if(U_SUCCESS(*status) && r == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - ures_openWithType(r, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status); -} - -/** - * API: Counts members. For arrays and tables, returns number of resources. - * For strings, returns 1. - */ -U_CAPI int32_t U_EXPORT2 -ures_countArrayItems(const UResourceBundle* resourceBundle, - const char* resourceKey, - UErrorCode* status) -{ - UResourceBundle resData; - ures_initStackObject(&resData); - if (status==NULL || U_FAILURE(*status)) { - return 0; - } - if(resourceBundle == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - ures_getByKey(resourceBundle, resourceKey, &resData, status); - - if(resData.fResData.data != NULL) { - int32_t result = res_countArrayItems(&resData.fResData, resData.fRes); - ures_close(&resData); - return result; - } else { - *status = U_MISSING_RESOURCE_ERROR; - ures_close(&resData); - return 0; - } -} - -/** - * Internal function. - * Return the version number associated with this ResourceBundle as a string. - * - * @param resourceBundle The resource bundle for which the version is checked. - * @return A version number string as specified in the resource bundle or its parent. - * The caller does not own this string. - * @see ures_getVersion - * @internal - */ -U_INTERNAL const char* U_EXPORT2 -ures_getVersionNumberInternal(const UResourceBundle *resourceBundle) -{ - if (!resourceBundle) return NULL; - - if(resourceBundle->fVersion == NULL) { - - /* If the version ID has not been built yet, then do so. Retrieve */ - /* the minor version from the file. */ - UErrorCode status = U_ZERO_ERROR; - int32_t minor_len = 0; - int32_t len; - - const UChar* minor_version = ures_getStringByKey(resourceBundle, kVersionTag, &minor_len, &status); - - /* Determine the length of of the final version string. This is */ - /* the length of the major part + the length of the separator */ - /* (==1) + the length of the minor part (+ 1 for the zero byte at */ - /* the end). */ - - len = (minor_len > 0) ? minor_len : 1; - - /* Allocate the string, and build it up. */ - /* + 1 for zero byte */ - - - ((UResourceBundle *)resourceBundle)->fVersion = (char *)uprv_malloc(1 + len); - /* Check for null pointer. */ - if (((UResourceBundle *)resourceBundle)->fVersion == NULL) { - return NULL; - } - - if(minor_len > 0) { - u_UCharsToChars(minor_version, resourceBundle->fVersion , minor_len); - resourceBundle->fVersion[len] = '\0'; - } - else { - uprv_strcpy(resourceBundle->fVersion, kDefaultMinorVersion); - } - } - - return resourceBundle->fVersion; -} - -U_CAPI const char* U_EXPORT2 -ures_getVersionNumber(const UResourceBundle* resourceBundle) -{ - return ures_getVersionNumberInternal(resourceBundle); -} - -U_CAPI void U_EXPORT2 ures_getVersion(const UResourceBundle* resB, UVersionInfo versionInfo) { - if (!resB) return; - - u_versionFromString(versionInfo, ures_getVersionNumberInternal(resB)); -} - -/** Tree support functions *******************************/ -#define INDEX_LOCALE_NAME "res_index" -#define INDEX_TAG "InstalledLocales" -#define DEFAULT_TAG "default" - -#if defined(URES_TREE_DEBUG) -#include -#endif - -typedef struct ULocalesContext { - UResourceBundle installed; - UResourceBundle curr; -} ULocalesContext; - -static void U_CALLCONV -ures_loc_closeLocales(UEnumeration *enumerator) { - ULocalesContext *ctx = (ULocalesContext *)enumerator->context; - ures_close(&ctx->curr); - ures_close(&ctx->installed); - uprv_free(ctx); - uprv_free(enumerator); -} - -static int32_t U_CALLCONV -ures_loc_countLocales(UEnumeration *en, UErrorCode * /*status*/) { - ULocalesContext *ctx = (ULocalesContext *)en->context; - return ures_getSize(&ctx->installed); -} - -U_CDECL_BEGIN - - -static const char * U_CALLCONV -ures_loc_nextLocale(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status) { - ULocalesContext *ctx = (ULocalesContext *)en->context; - UResourceBundle *res = &(ctx->installed); - UResourceBundle *k = NULL; - const char *result = NULL; - int32_t len = 0; - if(ures_hasNext(res) && (k = ures_getNextResource(res, &ctx->curr, status)) != 0) { - result = ures_getKey(k); - len = (int32_t)uprv_strlen(result); - } - if (resultLength) { - *resultLength = len; - } - return result; -} - -static void U_CALLCONV -ures_loc_resetLocales(UEnumeration* en, - UErrorCode* /*status*/) { - UResourceBundle *res = &((ULocalesContext *)en->context)->installed; - ures_resetIterator(res); -} - -U_CDECL_END - -static const UEnumeration gLocalesEnum = { - NULL, - NULL, - ures_loc_closeLocales, - ures_loc_countLocales, - uenum_unextDefault, - ures_loc_nextLocale, - ures_loc_resetLocales -}; - - -U_CAPI UEnumeration* U_EXPORT2 -ures_openAvailableLocales(const char *path, UErrorCode *status) -{ - UResourceBundle *idx = NULL; - UEnumeration *en = NULL; - ULocalesContext *myContext = NULL; - - if(U_FAILURE(*status)) { - return NULL; - } - myContext = static_cast(uprv_malloc(sizeof(ULocalesContext))); - en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); - if(!en || !myContext) { - *status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(en); - uprv_free(myContext); - return NULL; - } - uprv_memcpy(en, &gLocalesEnum, sizeof(UEnumeration)); - - ures_initStackObject(&myContext->installed); - ures_initStackObject(&myContext->curr); - idx = ures_openDirect(path, INDEX_LOCALE_NAME, status); - ures_getByKey(idx, INDEX_TAG, &myContext->installed, status); - if(U_SUCCESS(*status)) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "Got %s::%s::[%s] : %s\n", - path, INDEX_LOCALE_NAME, INDEX_TAG, ures_getKey(&myContext->installed)); -#endif - en->context = myContext; - } else { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s open failed - %s\n", path, u_errorName(*status)); -#endif - ures_close(&myContext->installed); - uprv_free(myContext); - uprv_free(en); - en = NULL; - } - - ures_close(idx); - - return en; -} - -static UBool isLocaleInList(UEnumeration *locEnum, const char *locToSearch, UErrorCode *status) { - const char *loc; - while ((loc = uenum_next(locEnum, NULL, status)) != NULL) { - if (uprv_strcmp(loc, locToSearch) == 0) { - return TRUE; - } - } - return FALSE; -} - -U_CAPI int32_t U_EXPORT2 -ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, - const char *path, const char *resName, const char *keyword, const char *locid, - UBool *isAvailable, UBool omitDefault, UErrorCode *status) -{ - char kwVal[1024] = ""; /* value of keyword 'keyword' */ - char defVal[1024] = ""; /* default value for given locale */ - char defLoc[1024] = ""; /* default value for given locale */ - char base[1024] = ""; /* base locale */ - char found[1024]; - char parent[1024]; - char full[1024] = ""; - UResourceBundle bund1, bund2; - UResourceBundle *res = NULL; - UErrorCode subStatus = U_ZERO_ERROR; - int32_t length = 0; - if(U_FAILURE(*status)) return 0; - uloc_getKeywordValue(locid, keyword, kwVal, 1024-1,&subStatus); - if(!uprv_strcmp(kwVal, DEFAULT_TAG)) { - kwVal[0]=0; - } - uloc_getBaseName(locid, base, 1024-1,&subStatus); -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n", - locid, keyword, kwVal, base, u_errorName(subStatus)); -#endif - ures_initStackObject(&bund1); - ures_initStackObject(&bund2); - - - uprv_strcpy(parent, base); - uprv_strcpy(found, base); - - if(isAvailable) { - UEnumeration *locEnum = ures_openAvailableLocales(path, &subStatus); - *isAvailable = TRUE; - if (U_SUCCESS(subStatus)) { - *isAvailable = isLocaleInList(locEnum, parent, &subStatus); - } - uenum_close(locEnum); - } - - if(U_FAILURE(subStatus)) { - *status = subStatus; - return 0; - } - - do { - subStatus = U_ZERO_ERROR; - res = ures_open(path, parent, &subStatus); - if(((subStatus == U_USING_FALLBACK_WARNING) || - (subStatus == U_USING_DEFAULT_WARNING)) && isAvailable) - { - *isAvailable = FALSE; - } - isAvailable = NULL; /* only want to set this the first time around */ - -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> %s [%s]\n", path?path:"ICUDATA", parent, u_errorName(subStatus), ures_getLocale(res, &subStatus)); -#endif - if(U_FAILURE(subStatus)) { - *status = subStatus; - } else if(subStatus == U_ZERO_ERROR) { - ures_getByKey(res,resName,&bund1, &subStatus); - if(subStatus == U_ZERO_ERROR) { - const UChar *defUstr; - int32_t defLen; - /* look for default item */ -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s : loaded default -> %s\n", - path?path:"ICUDATA", parent, u_errorName(subStatus)); -#endif - defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus); - if(U_SUCCESS(subStatus) && defLen) { - u_UCharsToChars(defUstr, defVal, u_strlen(defUstr)); -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> default %s=%s, %s\n", - path?path:"ICUDATA", parent, keyword, defVal, u_errorName(subStatus)); -#endif - uprv_strcpy(defLoc, parent); - if(kwVal[0]==0) { - uprv_strcpy(kwVal, defVal); -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> kwVal = %s\n", - path?path:"ICUDATA", parent, keyword, kwVal); -#endif - } - } - } - } - - subStatus = U_ZERO_ERROR; - - if (res != NULL) { - uprv_strcpy(found, ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus)); - } - - uloc_getParent(found,parent,sizeof(parent),&subStatus); - ures_close(res); - } while(!defVal[0] && *found && uprv_strcmp(found, "root") != 0 && U_SUCCESS(*status)); - - /* Now, see if we can find the kwVal collator.. start the search over.. */ - uprv_strcpy(parent, base); - uprv_strcpy(found, base); - - do { - subStatus = U_ZERO_ERROR; - res = ures_open(path, parent, &subStatus); - if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) { - *isAvailable = FALSE; - } - isAvailable = NULL; /* only want to set this the first time around */ - -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> %s (looking for %s)\n", - path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); -#endif - if(U_FAILURE(subStatus)) { - *status = subStatus; - } else if(subStatus == U_ZERO_ERROR) { - ures_getByKey(res,resName,&bund1, &subStatus); -#if defined(URES_TREE_DEBUG) -/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, resName, u_errorName(subStatus)); -#endif - if(subStatus == U_ZERO_ERROR) { - ures_getByKey(&bund1, kwVal, &bund2, &subStatus); -#if defined(URES_TREE_DEBUG) -/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal, u_errorName(subStatus)); -#endif - if(subStatus == U_ZERO_ERROR) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> full0 %s=%s, %s\n", - path?path:"ICUDATA", parent, keyword, kwVal, u_errorName(subStatus)); -#endif - uprv_strcpy(full, parent); - if(*full == 0) { - uprv_strcpy(full, "root"); - } - /* now, recalculate default kw if need be */ - if(uprv_strlen(defLoc) > uprv_strlen(full)) { - const UChar *defUstr; - int32_t defLen; - /* look for default item */ -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> recalculating Default0\n", - path?path:"ICUDATA", full); -#endif - defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus); - if(U_SUCCESS(subStatus) && defLen) { - u_UCharsToChars(defUstr, defVal, u_strlen(defUstr)); -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> default0 %s=%s, %s\n", - path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus)); -#endif - uprv_strcpy(defLoc, full); - } - } /* end of recalculate default KW */ -#if defined(URES_TREE_DEBUG) - else { - fprintf(stderr, "No trim0, %s <= %s\n", defLoc, full); - } -#endif - } else { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "err=%s in %s looking for %s\n", - u_errorName(subStatus), parent, kwVal); -#endif - } - } - } - - subStatus = U_ZERO_ERROR; - - uprv_strcpy(found, parent); - uloc_getParent(found,parent,1023,&subStatus); - ures_close(res); - } while(!full[0] && *found && U_SUCCESS(*status)); - - if((full[0]==0) && uprv_strcmp(kwVal, defVal)) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal, defVal); -#endif - uprv_strcpy(kwVal, defVal); - uprv_strcpy(parent, base); - uprv_strcpy(found, base); - - do { /* search for 'default' named item */ - subStatus = U_ZERO_ERROR; - res = ures_open(path, parent, &subStatus); - if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) { - *isAvailable = FALSE; - } - isAvailable = NULL; /* only want to set this the first time around */ - -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> %s (looking for default %s)\n", - path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); -#endif - if(U_FAILURE(subStatus)) { - *status = subStatus; - } else if(subStatus == U_ZERO_ERROR) { - ures_getByKey(res,resName,&bund1, &subStatus); - if(subStatus == U_ZERO_ERROR) { - ures_getByKey(&bund1, kwVal, &bund2, &subStatus); - if(subStatus == U_ZERO_ERROR) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> full1 %s=%s, %s\n", path?path:"ICUDATA", - parent, keyword, kwVal, u_errorName(subStatus)); -#endif - uprv_strcpy(full, parent); - if(*full == 0) { - uprv_strcpy(full, "root"); - } - - /* now, recalculate default kw if need be */ - if(uprv_strlen(defLoc) > uprv_strlen(full)) { - const UChar *defUstr; - int32_t defLen; - /* look for default item */ -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> recalculating Default1\n", - path?path:"ICUDATA", full); -#endif - defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus); - if(U_SUCCESS(subStatus) && defLen) { - u_UCharsToChars(defUstr, defVal, u_strlen(defUstr)); -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s;%s -> default %s=%s, %s\n", - path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus)); -#endif - uprv_strcpy(defLoc, full); - } - } /* end of recalculate default KW */ -#if defined(URES_TREE_DEBUG) - else { - fprintf(stderr, "No trim1, %s <= %s\n", defLoc, full); - } -#endif - } - } - } - subStatus = U_ZERO_ERROR; - - uprv_strcpy(found, parent); - uloc_getParent(found,parent,1023,&subStatus); - ures_close(res); - } while(!full[0] && *found && U_SUCCESS(*status)); - } - - if(U_SUCCESS(*status)) { - if(!full[0]) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal); -#endif - *status = U_MISSING_RESOURCE_ERROR; - } else if(omitDefault) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr,"Trim? full=%s, defLoc=%s, found=%s\n", full, defLoc, found); -#endif - if(uprv_strlen(defLoc) <= uprv_strlen(full)) { - /* found the keyword in a *child* of where the default tag was present. */ - if(!uprv_strcmp(kwVal, defVal)) { /* if the requested kw is default, */ - /* and the default is in or in an ancestor of the current locale */ -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal); -#endif - kwVal[0]=0; - } - } - } - uprv_strcpy(found, full); - if(kwVal[0]) { - uprv_strcat(found, "@"); - uprv_strcat(found, keyword); - uprv_strcat(found, "="); - uprv_strcat(found, kwVal); - } else if(!omitDefault) { - uprv_strcat(found, "@"); - uprv_strcat(found, keyword); - uprv_strcat(found, "="); - uprv_strcat(found, defVal); - } - } - /* we found the default locale - no need to repeat it.*/ - - ures_close(&bund1); - ures_close(&bund2); - - length = (int32_t)uprv_strlen(found); - - if(U_SUCCESS(*status)) { - int32_t copyLength = uprv_min(length, resultCapacity); - if(copyLength>0) { - uprv_strncpy(result, found, copyLength); - } - if(length == 0) { - *status = U_MISSING_RESOURCE_ERROR; - } - } else { - length = 0; - result[0]=0; - } - return u_terminateChars(result, resultCapacity, length, status); -} - -U_CAPI UEnumeration* U_EXPORT2 -ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status) -{ -#define VALUES_BUF_SIZE 2048 -#define VALUES_LIST_SIZE 512 - - char valuesBuf[VALUES_BUF_SIZE]; - int32_t valuesIndex = 0; - const char *valuesList[VALUES_LIST_SIZE]; - int32_t valuesCount = 0; - - const char *locale; - int32_t locLen; - - UEnumeration *locs = NULL; - - UResourceBundle item; - UResourceBundle subItem; - - ures_initStackObject(&item); - ures_initStackObject(&subItem); - locs = ures_openAvailableLocales(path, status); - - if(U_FAILURE(*status)) { - ures_close(&item); - ures_close(&subItem); - return NULL; - } - - valuesBuf[0]=0; - valuesBuf[1]=0; - - while((locale = uenum_next(locs, &locLen, status)) != 0) { - UResourceBundle *bund = NULL; - UResourceBundle *subPtr = NULL; - UErrorCode subStatus = U_ZERO_ERROR; /* don't fail if a bundle is unopenable */ - bund = ures_openDirect(path, locale, &subStatus); - -#if defined(URES_TREE_DEBUG) - if(!bund || U_FAILURE(subStatus)) { - fprintf(stderr, "%s-%s values: Can't open %s locale - skipping. (%s)\n", - path?path:"", keyword, locale, u_errorName(subStatus)); - } -#endif - - ures_getByKey(bund, keyword, &item, &subStatus); - - if(!bund || U_FAILURE(subStatus)) { -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s-%s values: Can't find in %s - skipping. (%s)\n", - path?path:"", keyword, locale, u_errorName(subStatus)); -#endif - ures_close(bund); - bund = NULL; - continue; - } - - while((subPtr = ures_getNextResource(&item,&subItem,&subStatus)) != 0 - && U_SUCCESS(subStatus)) { - const char *k; - int32_t i; - k = ures_getKey(subPtr); - -#if defined(URES_TREE_DEBUG) - /* fprintf(stderr, "%s | %s | %s | %s\n", path?path:"", keyword, locale, k); */ -#endif - if(k == NULL || *k == 0 || - uprv_strcmp(k, DEFAULT_TAG) == 0 || uprv_strncmp(k, "private-", 8) == 0) { - // empty or "default" or unlisted type - continue; - } - for(i=0; i= (VALUES_LIST_SIZE-1)) || /* no more space in list .. */ - ((valuesIndex+kLen+1+1) >= VALUES_BUF_SIZE)) { /* no more space in buffer (string + 2 nulls) */ - *status = U_ILLEGAL_ARGUMENT_ERROR; /* out of space.. */ - } else { - uprv_strcpy(valuesBuf+valuesIndex, k); - valuesList[valuesCount++] = valuesBuf+valuesIndex; - valuesIndex += kLen; -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s | %s | %s | [%s] (UNIQUE)\n", - path?path:"", keyword, locale, k); -#endif - valuesBuf[valuesIndex++] = 0; /* terminate */ - } - } - } - ures_close(bund); - } - valuesBuf[valuesIndex++] = 0; /* terminate */ - - ures_close(&item); - ures_close(&subItem); - uenum_close(locs); -#if defined(URES_TREE_DEBUG) - fprintf(stderr, "%s: size %d, #%d\n", u_errorName(*status), - valuesIndex, valuesCount); -#endif - return uloc_openKeywordList(valuesBuf, valuesIndex, status); -} -#if 0 -/* This code isn't needed, and given the documentation warnings the implementation is suspect */ -U_INTERNAL UBool U_EXPORT2 -ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){ - if(res1==NULL || res2==NULL){ - return res1==res2; /* pointer comparision */ - } - if(res1->fKey==NULL|| res2->fKey==NULL){ - return (res1->fKey==res2->fKey); - }else{ - if(uprv_strcmp(res1->fKey, res2->fKey)!=0){ - return FALSE; - } - } - if(uprv_strcmp(res1->fData->fName, res2->fData->fName)!=0){ - return FALSE; - } - if(res1->fData->fPath == NULL|| res2->fData->fPath==NULL){ - return (res1->fData->fPath == res2->fData->fPath); - }else{ - if(uprv_strcmp(res1->fData->fPath, res2->fData->fPath)!=0){ - return FALSE; - } - } - if(uprv_strcmp(res1->fData->fParent->fName, res2->fData->fParent->fName)!=0){ - return FALSE; - } - if(uprv_strcmp(res1->fData->fParent->fPath, res2->fData->fParent->fPath)!=0){ - return FALSE; - } - if(uprv_strncmp(res1->fResPath, res2->fResPath, res1->fResPathLen)!=0){ - return FALSE; - } - if(res1->fRes != res2->fRes){ - return FALSE; - } - return TRUE; -} -U_INTERNAL UResourceBundle* U_EXPORT2 -ures_clone(const UResourceBundle* res, UErrorCode* status){ - UResourceBundle* bundle = NULL; - UResourceBundle* ret = NULL; - if(U_FAILURE(*status) || res == NULL){ - return NULL; - } - bundle = ures_open(res->fData->fPath, res->fData->fName, status); - if(res->fResPath!=NULL){ - ret = ures_findSubResource(bundle, res->fResPath, NULL, status); - ures_close(bundle); - }else{ - ret = bundle; - } - return ret; -} -U_INTERNAL const UResourceBundle* U_EXPORT2 -ures_getParentBundle(const UResourceBundle* res){ - if(res==NULL){ - return NULL; - } - return res->fParentRes; -} -#endif - -U_INTERNAL void U_EXPORT2 -ures_getVersionByKey(const UResourceBundle* res, const char *key, UVersionInfo ver, UErrorCode *status) { - const UChar *str; - int32_t len; - str = ures_getStringByKey(res, key, &len, status); - if(U_SUCCESS(*status)) { - u_versionFromUString(ver, str); - } -} - -/* eof */ diff --git a/deps/node/deps/icu-small/source/common/uresdata.cpp b/deps/node/deps/icu-small/source/common/uresdata.cpp deleted file mode 100644 index a0b8d3ba..00000000 --- a/deps/node/deps/icu-small/source/common/uresdata.cpp +++ /dev/null @@ -1,1482 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 1999-2016, International Business Machines Corporation -* and others. All Rights Reserved. -******************************************************************************* -* file name: uresdata.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999dec08 -* created by: Markus W. Scherer -* Modification History: -* -* Date Name Description -* 06/20/2000 helena OS/400 port changes; mostly typecast. -* 06/24/02 weiv Added support for resource sharing -*/ - -#include "unicode/utypes.h" -#include "unicode/udata.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "cstring.h" -#include "resource.h" -#include "uarrsort.h" -#include "uassert.h" -#include "ucol_swp.h" -#include "udataswp.h" -#include "uinvchar.h" -#include "uresdata.h" -#include "uresimp.h" - -/* - * Resource access helpers - */ - -/* get a const char* pointer to the key with the keyOffset byte offset from pRoot */ -#define RES_GET_KEY16(pResData, keyOffset) \ - ((keyOffset)<(pResData)->localKeyLimit ? \ - (const char *)(pResData)->pRoot+(keyOffset) : \ - (pResData)->poolBundleKeys+(keyOffset)-(pResData)->localKeyLimit) - -#define RES_GET_KEY32(pResData, keyOffset) \ - ((keyOffset)>=0 ? \ - (const char *)(pResData)->pRoot+(keyOffset) : \ - (pResData)->poolBundleKeys+((keyOffset)&0x7fffffff)) - -#define URESDATA_ITEM_NOT_FOUND -1 - -/* empty resources, returned when the resource offset is 0 */ -static const uint16_t gEmpty16=0; - -static const struct { - int32_t length; - int32_t res; -} gEmpty32={ 0, 0 }; - -static const struct { - int32_t length; - UChar nul; - UChar pad; -} gEmptyString={ 0, 0, 0 }; - -/* - * All the type-access functions assume that - * the resource is of the expected type. - */ - -static int32_t -_res_findTableItem(const ResourceData *pResData, const uint16_t *keyOffsets, int32_t length, - const char *key, const char **realKey) { - const char *tableKey; - int32_t mid, start, limit; - int result; - - /* do a binary search for the key */ - start=0; - limit=length; - while(startuseNativeStrcmp) { - result = uprv_strcmp(key, tableKey); - } else { - result = uprv_compareInvCharsAsAscii(key, tableKey); - } - if (result < 0) { - limit = mid; - } else if (result > 0) { - start = mid + 1; - } else { - /* We found it! */ - *realKey=tableKey; - return mid; - } - } - return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */ -} - -static int32_t -_res_findTable32Item(const ResourceData *pResData, const int32_t *keyOffsets, int32_t length, - const char *key, const char **realKey) { - const char *tableKey; - int32_t mid, start, limit; - int result; - - /* do a binary search for the key */ - start=0; - limit=length; - while(startuseNativeStrcmp) { - result = uprv_strcmp(key, tableKey); - } else { - result = uprv_compareInvCharsAsAscii(key, tableKey); - } - if (result < 0) { - limit = mid; - } else if (result > 0) { - start = mid + 1; - } else { - /* We found it! */ - *realKey=tableKey; - return mid; - } - } - return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */ -} - -/* helper for res_load() ---------------------------------------------------- */ - -static UBool U_CALLCONV -isAcceptable(void *context, - const char * /*type*/, const char * /*name*/, - const UDataInfo *pInfo) { - uprv_memcpy(context, pInfo->formatVersion, 4); - return (UBool)( - pInfo->size>=20 && - pInfo->isBigEndian==U_IS_BIG_ENDIAN && - pInfo->charsetFamily==U_CHARSET_FAMILY && - pInfo->sizeofUChar==U_SIZEOF_UCHAR && - pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */ - pInfo->dataFormat[1]==0x65 && - pInfo->dataFormat[2]==0x73 && - pInfo->dataFormat[3]==0x42 && - (1<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=3)); -} - -/* semi-public functions ---------------------------------------------------- */ - -static void -res_init(ResourceData *pResData, - UVersionInfo formatVersion, const void *inBytes, int32_t length, - UErrorCode *errorCode) { - UResType rootType; - - /* get the root resource */ - pResData->pRoot=(const int32_t *)inBytes; - pResData->rootRes=(Resource)*pResData->pRoot; - pResData->p16BitUnits=&gEmpty16; - - /* formatVersion 1.1 must have a root item and at least 5 indexes */ - if(length>=0 && (length/4)<((formatVersion[0]==1 && formatVersion[1]==0) ? 1 : 1+5)) { - *errorCode=U_INVALID_FORMAT_ERROR; - res_unload(pResData); - return; - } - - /* currently, we accept only resources that have a Table as their roots */ - rootType=(UResType)RES_GET_TYPE(pResData->rootRes); - if(!URES_IS_TABLE(rootType)) { - *errorCode=U_INVALID_FORMAT_ERROR; - res_unload(pResData); - return; - } - - if(formatVersion[0]==1 && formatVersion[1]==0) { - pResData->localKeyLimit=0x10000; /* greater than any 16-bit key string offset */ - } else { - /* bundles with formatVersion 1.1 and later contain an indexes[] array */ - const int32_t *indexes=pResData->pRoot+1; - int32_t indexLength=indexes[URES_INDEX_LENGTH]&0xff; - if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) { - *errorCode=U_INVALID_FORMAT_ERROR; - res_unload(pResData); - return; - } - if( length>=0 && - (length<((1+indexLength)<<2) || - length<(indexes[URES_INDEX_BUNDLE_TOP]<<2)) - ) { - *errorCode=U_INVALID_FORMAT_ERROR; - res_unload(pResData); - return; - } - if(indexes[URES_INDEX_KEYS_TOP]>(1+indexLength)) { - pResData->localKeyLimit=indexes[URES_INDEX_KEYS_TOP]<<2; - } - if(formatVersion[0]>=3) { - // In formatVersion 1, the indexLength took up this whole int. - // In version 2, bits 31..8 were reserved and always 0. - // In version 3, they contain bits 23..0 of the poolStringIndexLimit. - // Bits 27..24 are in indexes[URES_INDEX_ATTRIBUTES] bits 15..12. - pResData->poolStringIndexLimit=(int32_t)((uint32_t)indexes[URES_INDEX_LENGTH]>>8); - } - if(indexLength>URES_INDEX_ATTRIBUTES) { - int32_t att=indexes[URES_INDEX_ATTRIBUTES]; - pResData->noFallback=(UBool)(att&URES_ATT_NO_FALLBACK); - pResData->isPoolBundle=(UBool)((att&URES_ATT_IS_POOL_BUNDLE)!=0); - pResData->usesPoolBundle=(UBool)((att&URES_ATT_USES_POOL_BUNDLE)!=0); - pResData->poolStringIndexLimit|=(att&0xf000)<<12; // bits 15..12 -> 27..24 - pResData->poolStringIndex16Limit=(int32_t)((uint32_t)att>>16); - } - if((pResData->isPoolBundle || pResData->usesPoolBundle) && indexLength<=URES_INDEX_POOL_CHECKSUM) { - *errorCode=U_INVALID_FORMAT_ERROR; - res_unload(pResData); - return; - } - if( indexLength>URES_INDEX_16BIT_TOP && - indexes[URES_INDEX_16BIT_TOP]>indexes[URES_INDEX_KEYS_TOP] - ) { - pResData->p16BitUnits=(const uint16_t *)(pResData->pRoot+indexes[URES_INDEX_KEYS_TOP]); - } - } - - if(formatVersion[0]==1 || U_CHARSET_FAMILY==U_ASCII_FAMILY) { - /* - * formatVersion 1: compare key strings in native-charset order - * formatVersion 2 and up: compare key strings in ASCII order - */ - pResData->useNativeStrcmp=TRUE; - } -} - -U_CAPI void U_EXPORT2 -res_read(ResourceData *pResData, - const UDataInfo *pInfo, const void *inBytes, int32_t length, - UErrorCode *errorCode) { - UVersionInfo formatVersion; - - uprv_memset(pResData, 0, sizeof(ResourceData)); - if(U_FAILURE(*errorCode)) { - return; - } - if(!isAcceptable(formatVersion, NULL, NULL, pInfo)) { - *errorCode=U_INVALID_FORMAT_ERROR; - return; - } - res_init(pResData, formatVersion, inBytes, length, errorCode); -} - -U_CFUNC void -res_load(ResourceData *pResData, - const char *path, const char *name, UErrorCode *errorCode) { - UVersionInfo formatVersion; - - uprv_memset(pResData, 0, sizeof(ResourceData)); - - /* load the ResourceBundle file */ - pResData->data=udata_openChoice(path, "res", name, isAcceptable, formatVersion, errorCode); - if(U_FAILURE(*errorCode)) { - return; - } - - /* get its memory and initialize *pResData */ - res_init(pResData, formatVersion, udata_getMemory(pResData->data), -1, errorCode); -} - -U_CFUNC void -res_unload(ResourceData *pResData) { - if(pResData->data!=NULL) { - udata_close(pResData->data); - pResData->data=NULL; - } -} - -static const int8_t gPublicTypes[URES_LIMIT] = { - URES_STRING, - URES_BINARY, - URES_TABLE, - URES_ALIAS, - - URES_TABLE, /* URES_TABLE32 */ - URES_TABLE, /* URES_TABLE16 */ - URES_STRING, /* URES_STRING_V2 */ - URES_INT, - - URES_ARRAY, - URES_ARRAY, /* URES_ARRAY16 */ - URES_NONE, - URES_NONE, - - URES_NONE, - URES_NONE, - URES_INT_VECTOR, - URES_NONE -}; - -U_CAPI UResType U_EXPORT2 -res_getPublicType(Resource res) { - return (UResType)gPublicTypes[RES_GET_TYPE(res)]; -} - -U_CAPI const UChar * U_EXPORT2 -res_getString(const ResourceData *pResData, Resource res, int32_t *pLength) { - const UChar *p; - uint32_t offset=RES_GET_OFFSET(res); - int32_t length; - if(RES_GET_TYPE(res)==URES_STRING_V2) { - int32_t first; - if((int32_t)offsetpoolStringIndexLimit) { - p=(const UChar *)pResData->poolBundleStrings+offset; - } else { - p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit); - } - first=*p; - if(!U16_IS_TRAIL(first)) { - length=u_strlen(p); - } else if(first<0xdfef) { - length=first&0x3ff; - ++p; - } else if(first<0xdfff) { - length=((first-0xdfef)<<16)|p[1]; - p+=2; - } else { - length=((int32_t)p[1]<<16)|p[2]; - p+=3; - } - } else if(res==offset) /* RES_GET_TYPE(res)==URES_STRING */ { - const int32_t *p32= res==0 ? &gEmptyString.length : pResData->pRoot+res; - length=*p32++; - p=(const UChar *)p32; - } else { - p=NULL; - length=0; - } - if(pLength) { - *pLength=length; - } - return p; -} - -namespace { - -/** - * CLDR string value (three empty-set symbols)=={2205, 2205, 2205} - * prevents fallback to the parent bundle. - * TODO: combine with other code that handles this marker, use EMPTY_SET constant. - * TODO: maybe move to uresbund.cpp? - */ -UBool isNoInheritanceMarker(const ResourceData *pResData, Resource res) { - uint32_t offset=RES_GET_OFFSET(res); - if (offset == 0) { - // empty string - } else if (res == offset) { - const int32_t *p32=pResData->pRoot+res; - int32_t length=*p32; - const UChar *p=(const UChar *)p32; - return length == 3 && p[2] == 0x2205 && p[3] == 0x2205 && p[4] == 0x2205; - } else if (RES_GET_TYPE(res) == URES_STRING_V2) { - const UChar *p; - if((int32_t)offsetpoolStringIndexLimit) { - p=(const UChar *)pResData->poolBundleStrings+offset; - } else { - p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit); - } - int32_t first=*p; - if (first == 0x2205) { // implicit length - return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0; - } else if (first == 0xdc03) { // explicit length 3 (should not occur) - return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0x2205; - } else { - // Assume that the string has not been stored with more length units than necessary. - return FALSE; - } - } - return FALSE; -} - -int32_t getStringArray(const ResourceData *pResData, const icu::ResourceArray &array, - icu::UnicodeString *dest, int32_t capacity, - UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return 0; - } - if(dest == NULL ? capacity != 0 : capacity < 0) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - int32_t length = array.getSize(); - if(length == 0) { - return 0; - } - if(length > capacity) { - errorCode = U_BUFFER_OVERFLOW_ERROR; - return length; - } - for(int32_t i = 0; i < length; ++i) { - int32_t sLength; - const UChar *s = res_getString(pResData, array.internalGetResource(pResData, i), &sLength); - if(s == NULL) { - errorCode = U_RESOURCE_TYPE_MISMATCH; - return 0; - } - dest[i].setTo(TRUE, s, sLength); - } - return length; -} - -} // namespace - -U_CAPI const UChar * U_EXPORT2 -res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength) { - const UChar *p; - uint32_t offset=RES_GET_OFFSET(res); - int32_t length; - if(RES_GET_TYPE(res)==URES_ALIAS) { - const int32_t *p32= offset==0 ? &gEmptyString.length : pResData->pRoot+offset; - length=*p32++; - p=(const UChar *)p32; - } else { - p=NULL; - length=0; - } - if(pLength) { - *pLength=length; - } - return p; -} - -U_CAPI const uint8_t * U_EXPORT2 -res_getBinary(const ResourceData *pResData, Resource res, int32_t *pLength) { - const uint8_t *p; - uint32_t offset=RES_GET_OFFSET(res); - int32_t length; - if(RES_GET_TYPE(res)==URES_BINARY) { - const int32_t *p32= offset==0 ? (const int32_t*)&gEmpty32 : pResData->pRoot+offset; - length=*p32++; - p=(const uint8_t *)p32; - } else { - p=NULL; - length=0; - } - if(pLength) { - *pLength=length; - } - return p; -} - - -U_CAPI const int32_t * U_EXPORT2 -res_getIntVector(const ResourceData *pResData, Resource res, int32_t *pLength) { - const int32_t *p; - uint32_t offset=RES_GET_OFFSET(res); - int32_t length; - if(RES_GET_TYPE(res)==URES_INT_VECTOR) { - p= offset==0 ? (const int32_t *)&gEmpty32 : pResData->pRoot+offset; - length=*p++; - } else { - p=NULL; - length=0; - } - if(pLength) { - *pLength=length; - } - return p; -} - -U_CAPI int32_t U_EXPORT2 -res_countArrayItems(const ResourceData *pResData, Resource res) { - uint32_t offset=RES_GET_OFFSET(res); - switch(RES_GET_TYPE(res)) { - case URES_STRING: - case URES_STRING_V2: - case URES_BINARY: - case URES_ALIAS: - case URES_INT: - case URES_INT_VECTOR: - return 1; - case URES_ARRAY: - case URES_TABLE32: - return offset==0 ? 0 : *(pResData->pRoot+offset); - case URES_TABLE: - return offset==0 ? 0 : *((const uint16_t *)(pResData->pRoot+offset)); - case URES_ARRAY16: - case URES_TABLE16: - return pResData->p16BitUnits[offset]; - default: - return 0; - } -} - -U_NAMESPACE_BEGIN - -ResourceDataValue::~ResourceDataValue() {} - -UResType ResourceDataValue::getType() const { - return res_getPublicType(res); -} - -const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return NULL; - } - const UChar *s = res_getString(pResData, res, &length); - if(s == NULL) { - errorCode = U_RESOURCE_TYPE_MISMATCH; - } - return s; -} - -const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return NULL; - } - const UChar *s = res_getAlias(pResData, res, &length); - if(s == NULL) { - errorCode = U_RESOURCE_TYPE_MISMATCH; - } - return s; -} - -int32_t ResourceDataValue::getInt(UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return 0; - } - if(RES_GET_TYPE(res) != URES_INT) { - errorCode = U_RESOURCE_TYPE_MISMATCH; - } - return RES_GET_INT(res); -} - -uint32_t ResourceDataValue::getUInt(UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return 0; - } - if(RES_GET_TYPE(res) != URES_INT) { - errorCode = U_RESOURCE_TYPE_MISMATCH; - } - return RES_GET_UINT(res); -} - -const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return NULL; - } - const int32_t *iv = res_getIntVector(pResData, res, &length); - if(iv == NULL) { - errorCode = U_RESOURCE_TYPE_MISMATCH; - } - return iv; -} - -const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return NULL; - } - const uint8_t *b = res_getBinary(pResData, res, &length); - if(b == NULL) { - errorCode = U_RESOURCE_TYPE_MISMATCH; - } - return b; -} - -ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return ResourceArray(); - } - const uint16_t *items16 = NULL; - const Resource *items32 = NULL; - uint32_t offset=RES_GET_OFFSET(res); - int32_t length = 0; - switch(RES_GET_TYPE(res)) { - case URES_ARRAY: - if (offset!=0) { // empty if offset==0 - items32 = (const Resource *)pResData->pRoot+offset; - length = *items32++; - } - break; - case URES_ARRAY16: - items16 = pResData->p16BitUnits+offset; - length = *items16++; - break; - default: - errorCode = U_RESOURCE_TYPE_MISMATCH; - return ResourceArray(); - } - return ResourceArray(items16, items32, length); -} - -ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return ResourceTable(); - } - const uint16_t *keys16 = NULL; - const int32_t *keys32 = NULL; - const uint16_t *items16 = NULL; - const Resource *items32 = NULL; - uint32_t offset = RES_GET_OFFSET(res); - int32_t length = 0; - switch(RES_GET_TYPE(res)) { - case URES_TABLE: - if (offset != 0) { // empty if offset==0 - keys16 = (const uint16_t *)(pResData->pRoot+offset); - length = *keys16++; - items32 = (const Resource *)(keys16+length+(~length&1)); - } - break; - case URES_TABLE16: - keys16 = pResData->p16BitUnits+offset; - length = *keys16++; - items16 = keys16 + length; - break; - case URES_TABLE32: - if (offset != 0) { // empty if offset==0 - keys32 = pResData->pRoot+offset; - length = *keys32++; - items32 = (const Resource *)keys32 + length; - } - break; - default: - errorCode = U_RESOURCE_TYPE_MISMATCH; - return ResourceTable(); - } - return ResourceTable(keys16, keys32, items16, items32, length); -} - -UBool ResourceDataValue::isNoInheritanceMarker() const { - return ::isNoInheritanceMarker(pResData, res); -} - -int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity, - UErrorCode &errorCode) const { - return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode); -} - -int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity, - UErrorCode &errorCode) const { - if(URES_IS_ARRAY(res)) { - return ::getStringArray(pResData, getArray(errorCode), dest, capacity, errorCode); - } - if(U_FAILURE(errorCode)) { - return 0; - } - if(dest == NULL ? capacity != 0 : capacity < 0) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - if(capacity < 1) { - errorCode = U_BUFFER_OVERFLOW_ERROR; - return 1; - } - int32_t sLength; - const UChar *s = res_getString(pResData, res, &sLength); - if(s != NULL) { - dest[0].setTo(TRUE, s, sLength); - return 1; - } - errorCode = U_RESOURCE_TYPE_MISMATCH; - return 0; -} - -UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode) const { - UnicodeString us; - if(U_FAILURE(errorCode)) { - return us; - } - int32_t sLength; - const UChar *s = res_getString(pResData, res, &sLength); - if(s != NULL) { - us.setTo(TRUE, s, sLength); - return us; - } - ResourceArray array = getArray(errorCode); - if(U_FAILURE(errorCode)) { - return us; - } - if(array.getSize() > 0) { - s = res_getString(pResData, array.internalGetResource(pResData, 0), &sLength); - if(s != NULL) { - us.setTo(TRUE, s, sLength); - return us; - } - } - errorCode = U_RESOURCE_TYPE_MISMATCH; - return us; -} - -U_NAMESPACE_END - -static Resource -makeResourceFrom16(const ResourceData *pResData, int32_t res16) { - if(res16poolStringIndex16Limit) { - // Pool string, nothing to do. - } else { - // Local string, adjust the 16-bit offset to a regular one, - // with a larger pool string index limit. - res16=res16-pResData->poolStringIndex16Limit+pResData->poolStringIndexLimit; - } - return URES_MAKE_RESOURCE(URES_STRING_V2, res16); -} - -U_CAPI Resource U_EXPORT2 -res_getTableItemByKey(const ResourceData *pResData, Resource table, - int32_t *indexR, const char **key) { - uint32_t offset=RES_GET_OFFSET(table); - int32_t length; - int32_t idx; - if(key == NULL || *key == NULL) { - return RES_BOGUS; - } - switch(RES_GET_TYPE(table)) { - case URES_TABLE: { - if (offset!=0) { /* empty if offset==0 */ - const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset); - length=*p++; - *indexR=idx=_res_findTableItem(pResData, p, length, *key, key); - if(idx>=0) { - const Resource *p32=(const Resource *)(p+length+(~length&1)); - return p32[idx]; - } - } - break; - } - case URES_TABLE16: { - const uint16_t *p=pResData->p16BitUnits+offset; - length=*p++; - *indexR=idx=_res_findTableItem(pResData, p, length, *key, key); - if(idx>=0) { - return makeResourceFrom16(pResData, p[length+idx]); - } - break; - } - case URES_TABLE32: { - if (offset!=0) { /* empty if offset==0 */ - const int32_t *p= pResData->pRoot+offset; - length=*p++; - *indexR=idx=_res_findTable32Item(pResData, p, length, *key, key); - if(idx>=0) { - return (Resource)p[length+idx]; - } - } - break; - } - default: - break; - } - return RES_BOGUS; -} - -U_CAPI Resource U_EXPORT2 -res_getTableItemByIndex(const ResourceData *pResData, Resource table, - int32_t indexR, const char **key) { - uint32_t offset=RES_GET_OFFSET(table); - int32_t length; - if (indexR < 0) { - return RES_BOGUS; - } - switch(RES_GET_TYPE(table)) { - case URES_TABLE: { - if (offset != 0) { /* empty if offset==0 */ - const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset); - length=*p++; - if(indexRp16BitUnits+offset; - length=*p++; - if(indexRpRoot+offset; - length=*p++; - if(indexRrootRes, &idx, &realKey); -} - - -UBool icu::ResourceTable::getKeyAndValue(int32_t i, - const char *&key, icu::ResourceValue &value) const { - if(0 <= i && i < length) { - icu::ResourceDataValue &rdValue = static_cast(value); - if (keys16 != NULL) { - key = RES_GET_KEY16(rdValue.pResData, keys16[i]); - } else { - key = RES_GET_KEY32(rdValue.pResData, keys32[i]); - } - Resource res; - if (items16 != NULL) { - res = makeResourceFrom16(rdValue.pResData, items16[i]); - } else { - res = items32[i]; - } - rdValue.setResource(res); - return TRUE; - } - return FALSE; -} - -U_CAPI Resource U_EXPORT2 -res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) { - uint32_t offset=RES_GET_OFFSET(array); - if (indexR < 0) { - return RES_BOGUS; - } - switch(RES_GET_TYPE(array)) { - case URES_ARRAY: { - if (offset!=0) { /* empty if offset==0 */ - const int32_t *p= pResData->pRoot+offset; - if(indexR<*p) { - return (Resource)p[1+indexR]; - } - } - break; - } - case URES_ARRAY16: { - const uint16_t *p=pResData->p16BitUnits+offset; - if(indexR<*p) { - return makeResourceFrom16(pResData, p[1+indexR]); - } - break; - } - default: - break; - } - return RES_BOGUS; -} - -uint32_t icu::ResourceArray::internalGetResource(const ResourceData *pResData, int32_t i) const { - if (items16 != NULL) { - return makeResourceFrom16(pResData, items16[i]); - } else { - return items32[i]; - } -} - -UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const { - if(0 <= i && i < length) { - icu::ResourceDataValue &rdValue = static_cast(value); - rdValue.setResource(internalGetResource(rdValue.pResData, i)); - return TRUE; - } - return FALSE; -} - -U_CFUNC Resource -res_findResource(const ResourceData *pResData, Resource r, char** path, const char** key) { - char *pathP = *path, *nextSepP = *path; - char *closeIndex = NULL; - Resource t1 = r; - Resource t2; - int32_t indexR = 0; - UResType type = (UResType)RES_GET_TYPE(t1); - - /* if you come in with an empty path, you'll be getting back the same resource */ - if(!uprv_strlen(pathP)) { - return r; - } - - /* one needs to have an aggregate resource in order to search in it */ - if(!URES_IS_CONTAINER(type)) { - return RES_BOGUS; - } - - while(nextSepP && *pathP && t1 != RES_BOGUS && URES_IS_CONTAINER(type)) { - /* Iteration stops if: the path has been consumed, we found a non-existing - * resource (t1 == RES_BOGUS) or we found a scalar resource (including alias) - */ - nextSepP = uprv_strchr(pathP, RES_PATH_SEPARATOR); - /* if there are more separators, terminate string - * and set path to the remaining part of the string - */ - if(nextSepP != NULL) { - if(nextSepP == pathP) { - // Empty key string. - return RES_BOGUS; - } - *nextSepP = 0; /* overwrite the separator with a NUL to terminate the key */ - *path = nextSepP+1; - } else { - *path = uprv_strchr(pathP, 0); - } - - /* if the resource is a table */ - /* try the key based access */ - if(URES_IS_TABLE(type)) { - *key = pathP; - t2 = res_getTableItemByKey(pResData, t1, &indexR, key); - if(t2 == RES_BOGUS) { - /* if we fail to get the resource by key, maybe we got an index */ - indexR = uprv_strtol(pathP, &closeIndex, 10); - if(indexR >= 0 && *closeIndex == 0) { - /* if we indeed have an index, try to get the item by index */ - t2 = res_getTableItemByIndex(pResData, t1, indexR, key); - } // else t2 is already RES_BOGUS - } - } else if(URES_IS_ARRAY(type)) { - indexR = uprv_strtol(pathP, &closeIndex, 10); - if(indexR >= 0 && *closeIndex == 0) { - t2 = res_getArrayItem(pResData, t1, indexR); - } else { - t2 = RES_BOGUS; /* have an array, but don't have a valid index */ - } - *key = NULL; - } else { /* can't do much here, except setting t2 to bogus */ - t2 = RES_BOGUS; - } - t1 = t2; - type = (UResType)RES_GET_TYPE(t1); - /* position pathP to next resource key/index */ - pathP = *path; - } - - return t1; -} - -/* resource bundle swapping ------------------------------------------------- */ - -/* - * Need to always enumerate the entire item tree, - * track the lowest address of any item to use as the limit for char keys[], - * track the highest address of any item to return the size of the data. - * - * We should have thought of storing those in the data... - * It is possible to extend the data structure by putting additional values - * in places that are inaccessible by ordinary enumeration of the item tree. - * For example, additional integers could be stored at the beginning or - * end of the key strings; this could be indicated by a minor version number, - * and the data swapping would have to know about these values. - * - * The data structure does not forbid keys to be shared, so we must swap - * all keys once instead of each key when it is referenced. - * - * These swapping functions assume that a resource bundle always has a length - * that is a multiple of 4 bytes. - * Currently, this is trivially true because genrb writes bundle tree leaves - * physically first, before their branches, so that the root table with its - * array of resource items (uint32_t values) is always last. - */ - -/* definitions for table sorting ------------------------ */ - -/* - * row of a temporary array - * - * gets platform-endian key string indexes and sorting indexes; - * after sorting this array by keys, the actual key/value arrays are permutated - * according to the sorting indexes - */ -typedef struct Row { - int32_t keyIndex, sortIndex; -} Row; - -static int32_t U_CALLCONV -ures_compareRows(const void *context, const void *left, const void *right) { - const char *keyChars=(const char *)context; - return (int32_t)uprv_strcmp(keyChars+((const Row *)left)->keyIndex, - keyChars+((const Row *)right)->keyIndex); -} - -typedef struct TempTable { - const char *keyChars; - Row *rows; - int32_t *resort; - uint32_t *resFlags; - int32_t localKeyLimit; - uint8_t majorFormatVersion; -} TempTable; - -enum { - STACK_ROW_CAPACITY=200 -}; - -/* The table item key string is not locally available. */ -static const char *const gUnknownKey=""; - -/* resource table key for collation binaries: "%%CollationBin" */ -static const UChar gCollationBinKey[]={ - 0x25, 0x25, - 0x43, 0x6f, 0x6c, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x42, 0x69, 0x6e, - 0 -}; - -/* - * swap one resource item - */ -static void -ures_swapResource(const UDataSwapper *ds, - const Resource *inBundle, Resource *outBundle, - Resource res, /* caller swaps res itself */ - const char *key, - TempTable *pTempTable, - UErrorCode *pErrorCode) { - const Resource *p; - Resource *q; - int32_t offset, count; - - switch(RES_GET_TYPE(res)) { - case URES_TABLE16: - case URES_STRING_V2: - case URES_INT: - case URES_ARRAY16: - /* integer, or points to 16-bit units, nothing to do here */ - return; - default: - break; - } - - /* all other types use an offset to point to their data */ - offset=(int32_t)RES_GET_OFFSET(res); - if(offset==0) { - /* special offset indicating an empty item */ - return; - } - if(pTempTable->resFlags[offset>>5]&((uint32_t)1<<(offset&0x1f))) { - /* we already swapped this resource item */ - return; - } else { - /* mark it as swapped now */ - pTempTable->resFlags[offset>>5]|=((uint32_t)1<<(offset&0x1f)); - } - - p=inBundle+offset; - q=outBundle+offset; - - switch(RES_GET_TYPE(res)) { - case URES_ALIAS: - /* physically same value layout as string, fall through */ - U_FALLTHROUGH; - case URES_STRING: - count=udata_readInt32(ds, (int32_t)*p); - /* swap length */ - ds->swapArray32(ds, p, 4, q, pErrorCode); - /* swap each UChar (the terminating NUL would not change) */ - ds->swapArray16(ds, p+1, 2*count, q+1, pErrorCode); - break; - case URES_BINARY: - count=udata_readInt32(ds, (int32_t)*p); - /* swap length */ - ds->swapArray32(ds, p, 4, q, pErrorCode); - /* no need to swap or copy bytes - ures_swap() copied them all */ - - /* swap known formats */ -#if !UCONFIG_NO_COLLATION - if( key!=NULL && /* the binary is in a table */ - (key!=gUnknownKey ? - /* its table key string is "%%CollationBin" */ - 0==ds->compareInvChars(ds, key, -1, - gCollationBinKey, UPRV_LENGTHOF(gCollationBinKey)-1) : - /* its table key string is unknown but it looks like a collation binary */ - ucol_looksLikeCollationBinary(ds, p+1, count)) - ) { - ucol_swap(ds, p+1, count, q+1, pErrorCode); - } -#endif - break; - case URES_TABLE: - case URES_TABLE32: - { - const uint16_t *pKey16; - uint16_t *qKey16; - - const int32_t *pKey32; - int32_t *qKey32; - - Resource item; - int32_t i, oldIndex; - - if(RES_GET_TYPE(res)==URES_TABLE) { - /* get table item count */ - pKey16=(const uint16_t *)p; - qKey16=(uint16_t *)q; - count=ds->readUInt16(*pKey16); - - pKey32=qKey32=NULL; - - /* swap count */ - ds->swapArray16(ds, pKey16++, 2, qKey16++, pErrorCode); - - offset+=((1+count)+1)/2; - } else { - /* get table item count */ - pKey32=(const int32_t *)p; - qKey32=(int32_t *)q; - count=udata_readInt32(ds, *pKey32); - - pKey16=qKey16=NULL; - - /* swap count */ - ds->swapArray32(ds, pKey32++, 4, qKey32++, pErrorCode); - - offset+=1+count; - } - - if(count==0) { - break; - } - - p=inBundle+offset; /* pointer to table resources */ - q=outBundle+offset; - - /* recurse */ - for(i=0; ireadUInt16(pKey16[i]); - if(keyOffsetlocalKeyLimit) { - itemKey=(const char *)outBundle+keyOffset; - } - } else { - int32_t keyOffset=udata_readInt32(ds, pKey32[i]); - if(keyOffset>=0) { - itemKey=(const char *)outBundle+keyOffset; - } - } - item=ds->readUInt32(p[i]); - ures_swapResource(ds, inBundle, outBundle, item, itemKey, pTempTable, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ures_swapResource(table res=%08x)[%d].recurse(%08x) failed\n", - res, i, item); - return; - } - } - - if(pTempTable->majorFormatVersion>1 || ds->inCharset==ds->outCharset) { - /* no need to sort, just swap the offset/value arrays */ - if(pKey16!=NULL) { - ds->swapArray16(ds, pKey16, count*2, qKey16, pErrorCode); - ds->swapArray32(ds, p, count*4, q, pErrorCode); - } else { - /* swap key offsets and items as one array */ - ds->swapArray32(ds, pKey32, count*2*4, qKey32, pErrorCode); - } - break; - } - - /* - * We need to sort tables by outCharset key strings because they - * sort differently for different charset families. - * ures_swap() already set pTempTable->keyChars appropriately. - * First we set up a temporary table with the key indexes and - * sorting indexes and sort that. - * Then we permutate and copy/swap the actual values. - */ - if(pKey16!=NULL) { - for(i=0; irows[i].keyIndex=ds->readUInt16(pKey16[i]); - pTempTable->rows[i].sortIndex=i; - } - } else { - for(i=0; irows[i].keyIndex=udata_readInt32(ds, pKey32[i]); - pTempTable->rows[i].sortIndex=i; - } - } - uprv_sortArray(pTempTable->rows, count, sizeof(Row), - ures_compareRows, pTempTable->keyChars, - FALSE, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ures_swapResource(table res=%08x).uprv_sortArray(%d items) failed\n", - res, count); - return; - } - - /* - * copy/swap/permutate items - * - * If we swap in-place, then the permutation must use another - * temporary array (pTempTable->resort) - * before the results are copied to the outBundle. - */ - /* keys */ - if(pKey16!=NULL) { - uint16_t *rKey16; - - if(pKey16!=qKey16) { - rKey16=qKey16; - } else { - rKey16=(uint16_t *)pTempTable->resort; - } - for(i=0; irows[i].sortIndex; - ds->swapArray16(ds, pKey16+oldIndex, 2, rKey16+i, pErrorCode); - } - if(qKey16!=rKey16) { - uprv_memcpy(qKey16, rKey16, 2*count); - } - } else { - int32_t *rKey32; - - if(pKey32!=qKey32) { - rKey32=qKey32; - } else { - rKey32=pTempTable->resort; - } - for(i=0; irows[i].sortIndex; - ds->swapArray32(ds, pKey32+oldIndex, 4, rKey32+i, pErrorCode); - } - if(qKey32!=rKey32) { - uprv_memcpy(qKey32, rKey32, 4*count); - } - } - - /* resources */ - { - Resource *r; - - - if(p!=q) { - r=q; - } else { - r=(Resource *)pTempTable->resort; - } - for(i=0; irows[i].sortIndex; - ds->swapArray32(ds, p+oldIndex, 4, r+i, pErrorCode); - } - if(q!=r) { - uprv_memcpy(q, r, 4*count); - } - } - } - break; - case URES_ARRAY: - { - Resource item; - int32_t i; - - count=udata_readInt32(ds, (int32_t)*p); - /* swap length */ - ds->swapArray32(ds, p++, 4, q++, pErrorCode); - - /* recurse */ - for(i=0; ireadUInt32(p[i]); - ures_swapResource(ds, inBundle, outBundle, item, NULL, pTempTable, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ures_swapResource(array res=%08x)[%d].recurse(%08x) failed\n", - res, i, item); - return; - } - } - - /* swap items */ - ds->swapArray32(ds, p, 4*count, q, pErrorCode); - } - break; - case URES_INT_VECTOR: - count=udata_readInt32(ds, (int32_t)*p); - /* swap length and each integer */ - ds->swapArray32(ds, p, 4*(1+count), q, pErrorCode); - break; - default: - /* also catches RES_BOGUS */ - *pErrorCode=U_UNSUPPORTED_ERROR; - break; - } -} - -U_CAPI int32_t U_EXPORT2 -ures_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - const Resource *inBundle; - Resource rootRes; - int32_t headerSize, maxTableLength; - - Row rows[STACK_ROW_CAPACITY]; - int32_t resort[STACK_ROW_CAPACITY]; - TempTable tempTable; - - const int32_t *inIndexes; - - /* the following integers count Resource item offsets (4 bytes each), not bytes */ - int32_t bundleLength, indexLength, keysBottom, keysTop, resBottom, top; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */ - pInfo->dataFormat[1]==0x65 && - pInfo->dataFormat[2]==0x73 && - pInfo->dataFormat[3]==0x42 && - /* formatVersion 1.1+ or 2.x or 3.x */ - ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1) || - pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) - )) { - udata_printError(ds, "ures_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a resource bundle\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0], pInfo->formatVersion[1]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - tempTable.majorFormatVersion=pInfo->formatVersion[0]; - - /* a resource bundle must contain at least one resource item */ - if(length<0) { - bundleLength=-1; - } else { - bundleLength=(length-headerSize)/4; - - /* formatVersion 1.1 must have a root item and at least 5 indexes */ - if(bundleLength<(1+5)) { - udata_printError(ds, "ures_swap(): too few bytes (%d after header) for a resource bundle\n", - length-headerSize); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - inBundle=(const Resource *)((const char *)inData+headerSize); - rootRes=ds->readUInt32(*inBundle); - - /* formatVersion 1.1 adds the indexes[] array */ - inIndexes=(const int32_t *)(inBundle+1); - - indexLength=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH])&0xff; - if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) { - udata_printError(ds, "ures_swap(): too few indexes for a 1.1+ resource bundle\n"); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - keysBottom=1+indexLength; - keysTop=udata_readInt32(ds, inIndexes[URES_INDEX_KEYS_TOP]); - if(indexLength>URES_INDEX_16BIT_TOP) { - resBottom=udata_readInt32(ds, inIndexes[URES_INDEX_16BIT_TOP]); - } else { - resBottom=keysTop; - } - top=udata_readInt32(ds, inIndexes[URES_INDEX_BUNDLE_TOP]); - maxTableLength=udata_readInt32(ds, inIndexes[URES_INDEX_MAX_TABLE_LENGTH]); - - if(0<=bundleLength && bundleLength(1+indexLength)) { - tempTable.localKeyLimit=keysTop<<2; - } else { - tempTable.localKeyLimit=0; - } - - if(length>=0) { - Resource *outBundle=(Resource *)((char *)outData+headerSize); - - /* track which resources we have already swapped */ - uint32_t stackResFlags[STACK_ROW_CAPACITY]; - int32_t resFlagsLength; - - /* - * We need one bit per 4 resource bundle bytes so that we can track - * every possible Resource for whether we have swapped it already. - * Multiple Resource words can refer to the same bundle offsets - * for sharing identical values. - * We could optimize this by allocating only for locations above - * where Resource values are stored (above keys & strings). - */ - resFlagsLength=(length+31)>>5; /* number of bytes needed */ - resFlagsLength=(resFlagsLength+3)&~3; /* multiple of 4 bytes for uint32_t */ - if(resFlagsLength<=(int32_t)sizeof(stackResFlags)) { - tempTable.resFlags=stackResFlags; - } else { - tempTable.resFlags=(uint32_t *)uprv_malloc(resFlagsLength); - if(tempTable.resFlags==NULL) { - udata_printError(ds, "ures_swap(): unable to allocate memory for tracking resources\n"); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - } - uprv_memset(tempTable.resFlags, 0, resFlagsLength); - - /* copy the bundle for binary and inaccessible data */ - if(inData!=outData) { - uprv_memcpy(outBundle, inBundle, 4*top); - } - - /* swap the key strings, but not the padding bytes (0xaa) after the last string and its NUL */ - udata_swapInvStringBlock(ds, inBundle+keysBottom, 4*(keysTop-keysBottom), - outBundle+keysBottom, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ures_swap().udata_swapInvStringBlock(keys[%d]) failed\n", 4*(keysTop-keysBottom)); - return 0; - } - - /* swap the 16-bit units (strings, table16, array16) */ - if(keysTopswapArray16(ds, inBundle+keysTop, (resBottom-keysTop)*4, outBundle+keysTop, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ures_swap().swapArray16(16-bit units[%d]) failed\n", 2*(resBottom-keysTop)); - return 0; - } - } - - /* allocate the temporary table for sorting resource tables */ - tempTable.keyChars=(const char *)outBundle; /* sort by outCharset */ - if(tempTable.majorFormatVersion>1 || maxTableLength<=STACK_ROW_CAPACITY) { - tempTable.rows=rows; - tempTable.resort=resort; - } else { - tempTable.rows=(Row *)uprv_malloc(maxTableLength*sizeof(Row)+maxTableLength*4); - if(tempTable.rows==NULL) { - udata_printError(ds, "ures_swap(): unable to allocate memory for sorting tables (max length: %d)\n", - maxTableLength); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - if(tempTable.resFlags!=stackResFlags) { - uprv_free(tempTable.resFlags); - } - return 0; - } - tempTable.resort=(int32_t *)(tempTable.rows+maxTableLength); - } - - /* swap the resources */ - ures_swapResource(ds, inBundle, outBundle, rootRes, NULL, &tempTable, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - udata_printError(ds, "ures_swapResource(root res=%08x) failed\n", - rootRes); - } - - if(tempTable.rows!=rows) { - uprv_free(tempTable.rows); - } - if(tempTable.resFlags!=stackResFlags) { - uprv_free(tempTable.resFlags); - } - - /* swap the root resource and indexes */ - ds->swapArray32(ds, inBundle, keysBottom*4, outBundle, pErrorCode); - } - - return headerSize+4*top; -} diff --git a/deps/node/deps/icu-small/source/common/uresdata.h b/deps/node/deps/icu-small/source/common/uresdata.h deleted file mode 100644 index 4e28ddcc..00000000 --- a/deps/node/deps/icu-small/source/common/uresdata.h +++ /dev/null @@ -1,519 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1999-2016, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* file name: uresdata.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999dec08 -* created by: Markus W. Scherer -* 06/24/02 weiv Added support for resource sharing -*/ - -#ifndef __RESDATA_H__ -#define __RESDATA_H__ - -#include "unicode/utypes.h" -#include "unicode/udata.h" -#include "unicode/ures.h" -#include "putilimp.h" -#include "udataswp.h" - -/** - * Numeric constants for internal-only types of resource items. - * These must use different numeric values than UResType constants - * because they are used together. - * Internal types are never returned by ures_getType(). - */ -typedef enum { - /** Include a negative value so that the compiler uses the same int type as for UResType. */ - URES_INTERNAL_NONE=-1, - - /** Resource type constant for tables with 32-bit count, key offsets and values. */ - URES_TABLE32=4, - - /** - * Resource type constant for tables with 16-bit count, key offsets and values. - * All values are URES_STRING_V2 strings. - */ - URES_TABLE16=5, - - /** Resource type constant for 16-bit Unicode strings in formatVersion 2. */ - URES_STRING_V2=6, - - /** - * Resource type constant for arrays with 16-bit count and values. - * All values are URES_STRING_V2 strings. - */ - URES_ARRAY16=9 - - /* Resource type 15 is not defined but effectively used by RES_BOGUS=0xffffffff. */ -} UResInternalType; - -/* - * A Resource is a 32-bit value that has 2 bit fields: - * 31..28 4-bit type, see enum below - * 27..0 28-bit four-byte-offset or value according to the type - */ -typedef uint32_t Resource; - -#define RES_BOGUS 0xffffffff -#define RES_MAX_OFFSET 0x0fffffff - -#define RES_GET_TYPE(res) ((int32_t)((res)>>28UL)) -#define RES_GET_OFFSET(res) ((res)&0x0fffffff) -#define RES_GET_POINTER(pRoot, res) ((pRoot)+RES_GET_OFFSET(res)) - -/* get signed and unsigned integer values directly from the Resource handle */ -#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC -# define RES_GET_INT(res) (((int32_t)((res)<<4L))>>4L) -#else -# define RES_GET_INT(res) (int32_t)(((res)&0x08000000) ? (res)|0xf0000000 : (res)&0x07ffffff) -#endif - -#define RES_GET_UINT(res) ((res)&0x0fffffff) - -#define URES_IS_ARRAY(type) ((int32_t)(type)==URES_ARRAY || (int32_t)(type)==URES_ARRAY16) -#define URES_IS_TABLE(type) ((int32_t)(type)==URES_TABLE || (int32_t)(type)==URES_TABLE16 || (int32_t)(type)==URES_TABLE32) -#define URES_IS_CONTAINER(type) (URES_IS_TABLE(type) || URES_IS_ARRAY(type)) - -#define URES_MAKE_RESOURCE(type, offset) (((Resource)(type)<<28)|(Resource)(offset)) -#define URES_MAKE_EMPTY_RESOURCE(type) ((Resource)(type)<<28) - -/* indexes[] value names; indexes are generally 32-bit (Resource) indexes */ -enum { - /** - * [0] contains the length of indexes[] - * which is at most URES_INDEX_TOP of the latest format version - * - * formatVersion==1: all bits contain the length of indexes[] - * but the length is much less than 0xff; - * formatVersion>1: - * only bits 7..0 contain the length of indexes[], - * bits 31..8 are reserved and set to 0 - * formatVersion>=3: - * bits 31..8 poolStringIndexLimit bits 23..0 - */ - URES_INDEX_LENGTH, - /** - * [1] contains the top of the key strings, - * same as the bottom of resources or UTF-16 strings, rounded up - */ - URES_INDEX_KEYS_TOP, - /** [2] contains the top of all resources */ - URES_INDEX_RESOURCES_TOP, - /** - * [3] contains the top of the bundle, - * in case it were ever different from [2] - */ - URES_INDEX_BUNDLE_TOP, - /** [4] max. length of any table */ - URES_INDEX_MAX_TABLE_LENGTH, - /** - * [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) - * - * formatVersion>=3: - * bits 31..16 poolStringIndex16Limit - * bits 15..12 poolStringIndexLimit bits 27..24 - */ - URES_INDEX_ATTRIBUTES, - /** - * [6] top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16), - * rounded up (new in formatVersion 2.0, ICU 4.4) - */ - URES_INDEX_16BIT_TOP, - /** [7] checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */ - URES_INDEX_POOL_CHECKSUM, - URES_INDEX_TOP -}; - -/* - * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES]. - * New in formatVersion 1.2 (ICU 3.6). - * - * If set, then this resource bundle is a standalone bundle. - * If not set, then the bundle participates in locale fallback, eventually - * all the way to the root bundle. - * If indexes[] is missing or too short, then the attribute cannot be determined - * reliably. Dependency checking should ignore such bundles, and loading should - * use fallbacks. - */ -#define URES_ATT_NO_FALLBACK 1 - -/* - * Attributes for bundles that are, or use, a pool bundle. - * A pool bundle provides key strings that are shared among several other bundles - * to reduce their total size. - * New in formatVersion 2 (ICU 4.4). - */ -#define URES_ATT_IS_POOL_BUNDLE 2 -#define URES_ATT_USES_POOL_BUNDLE 4 - -/* - * File format for .res resource bundle files - * - * ICU 56: New in formatVersion 3 compared with 2: ------------- - * - * Resource bundles can optionally use shared string-v2 values - * stored in the pool bundle. - * If so, then the indexes[] contain two new values - * in previously-unused bits of existing indexes[] slots: - * - poolStringIndexLimit: - * String-v2 offsets (in 32-bit Resource words) below this limit - * point to pool bundle string-v2 values. - * - poolStringIndex16Limit: - * Resource16 string-v2 offsets below this limit - * point to pool bundle string-v2 values. - * Guarantee: poolStringIndex16Limit <= poolStringIndexLimit - * - * The local bundle's poolStringIndexLimit is greater than - * any pool bundle string index used in the local bundle. - * The poolStringIndexLimit should not be greater than - * the maximum possible pool bundle string index. - * - * The maximum possible pool bundle string index is the index to the last non-NUL - * pool string character, due to suffix sharing. - * - * In the pool bundle, there is no structure that lists the strings. - * (The root resource is an empty Table.) - * If the strings need to be enumerated (as genrb --usePoolBundle does), - * then iterate through the pool bundle's 16-bit-units array from the beginning. - * Stop at the end of the array, or when an explicit or implicit string length - * would lead beyond the end of the array, - * or when an apparent string is not NUL-terminated. - * (Future genrb version might terminate the strings with - * what looks like a large explicit string length.) - * - * ICU 4.4: New in formatVersion 2 compared with 1.3: ------------- - * - * Three new resource types -- String-v2, Table16 and Array16 -- have their - * values stored in a new array of 16-bit units between the table key strings - * and the start of the other resources. - * - * genrb eliminates duplicates among Unicode string-v2 values. - * Multiple Unicode strings may use the same offset and string data, - * or a short string may point to the suffix of a longer string. ("Suffix sharing") - * For example, one string "abc" may be reused for another string "bc" by pointing - * to the second character. (Short strings-v2 are NUL-terminated - * and not preceded by an explicit length value.) - * - * It is allowed for all resource types to share values. - * The swapper code (ures_swap()) has been modified so that it swaps each item - * exactly once. - * - * A resource bundle may use a special pool bundle. Some or all of the table key strings - * of the using-bundle are omitted, and the key string offsets for such key strings refer - * to offsets in the pool bundle. - * The using-bundle's and the pool-bundle's indexes[URES_INDEX_POOL_CHECKSUM] values - * must match. - * Two bits in indexes[URES_INDEX_ATTRIBUTES] indicate whether a resource bundle - * is or uses a pool bundle. - * - * Table key strings must be compared in ASCII order, even if they are not - * stored in ASCII. - * - * New in formatVersion 1.3 compared with 1.2: ------------- - * - * genrb eliminates duplicates among key strings. - * Multiple table items may share one key string, or one item may point - * to the suffix of another's key string. ("Suffix sharing") - * For example, one key "abc" may be reused for another key "bc" by pointing - * to the second character. (Key strings are NUL-terminated.) - * - * ------------- - * - * An ICU4C resource bundle file (.res) is a binary, memory-mappable file - * with nested, hierarchical data structures. - * It physically contains the following: - * - * Resource root; -- 32-bit Resource item, root item for this bundle's tree; - * currently, the root item must be a table or table32 resource item - * int32_t indexes[indexes[0]]; -- array of indexes for friendly - * reading and swapping; see URES_INDEX_* above - * new in formatVersion 1.1 (ICU 2.8) - * char keys[]; -- characters for key strings - * (formatVersion 1.0: up to 65k of characters; 1.1: <2G) - * (minus the space for root and indexes[]), - * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated; - * padded to multiple of 4 bytes for 4-alignment of the following data - * uint16_t 16BitUnits[]; -- resources that are stored entirely as sequences of 16-bit units - * (new in formatVersion 2/ICU 4.4) - * data is indexed by the offset values in 16-bit resource types, - * with offset 0 pointing to the beginning of this array; - * there is a 0 at offset 0, for empty resources; - * padded to multiple of 4 bytes for 4-alignment of the following data - * data; -- data directly and indirectly indexed by the root item; - * the structure is determined by walking the tree - * - * Each resource bundle item has a 32-bit Resource handle (see typedef above) - * which contains the item type number in its upper 4 bits (31..28) and either - * an offset or a direct value in its lower 28 bits (27..0). - * The order of items is undefined and only determined by walking the tree. - * Leaves of the tree may be stored first or last or anywhere in between, - * and it is in theory possible to have unreferenced holes in the file. - * - * 16-bit-unit values: - * Starting with formatVersion 2/ICU 4.4, some resources are stored in a special - * array of 16-bit units. Each resource value is a sequence of 16-bit units, - * with no per-resource padding to a 4-byte boundary. - * 16-bit container types (Table16 and Array16) contain Resource16 values - * which are offsets to String-v2 resources in the same 16-bit-units array. - * - * Direct values: - * - Empty Unicode strings have an offset value of 0 in the Resource handle itself. - * - Starting with formatVersion 2/ICU 4.4, an offset value of 0 for - * _any_ resource type indicates an empty value. - * - Integer values are 28-bit values stored in the Resource handle itself; - * the interpretation of unsigned vs. signed integers is up to the application. - * - * All other types and values use 28-bit offsets to point to the item's data. - * The offset is an index to the first 32-bit word of the value, relative to the - * start of the resource data (i.e., the root item handle is at offset 0). - * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits). - * All resource item values are 4-aligned. - * - * New in formatVersion 2/ICU 4.4: Some types use offsets into the 16-bit-units array, - * indexing 16-bit units in that array. - * - * The structures (memory layouts) for the values for each item type are listed - * in the table below. - * - * Nested, hierarchical structures: ------------- - * - * Table items contain key-value pairs where the keys are offsets to char * key strings. - * The values of these pairs are either Resource handles or - * offsets into the 16-bit-units array, depending on the table type. - * - * Array items are simple vectors of Resource handles, - * or of offsets into the 16-bit-units array, depending on the array type. - * - * Table key string offsets: ------- - * - * Key string offsets are relative to the start of the resource data (of the root handle), - * i.e., the first string has an offset of 4+sizeof(indexes). - * (After the 4-byte root handle and after the indexes array.) - * - * If the resource bundle uses a pool bundle, then some key strings are stored - * in the pool bundle rather than in the local bundle itself. - * - In a Table or Table16, the 16-bit key string offset is local if it is - * less than indexes[URES_INDEX_KEYS_TOP]<<2. - * Otherwise, subtract indexes[URES_INDEX_KEYS_TOP]<<2 to get the offset into - * the pool bundle key strings. - * - In a Table32, the 32-bit key string offset is local if it is non-negative. - * Otherwise, reset bit 31 to get the pool key string offset. - * - * Unlike the local offset, the pool key offset is relative to - * the start of the key strings, not to the start of the bundle. - * - * An alias item is special (and new in ICU 2.4): -------------- - * - * Its memory layout is just like for a UnicodeString, but at runtime it resolves to - * another resource bundle's item according to the path in the string. - * This is used to share items across bundles that are in different lookup/fallback - * chains (e.g., large collation data among zh_TW and zh_HK). - * This saves space (for large items) and maintenance effort (less duplication of data). - * - * -------------------------------------------------------------------------- - * - * Resource types: - * - * Most resources have their values stored at four-byte offsets from the start - * of the resource data. These values are at least 4-aligned. - * Some resource values are stored directly in the offset field of the Resource itself. - * See UResType in unicode/ures.h for enumeration constants for Resource types. - * - * Some resources have their values stored as sequences of 16-bit units, - * at 2-byte offsets from the start of a contiguous 16-bit-unit array between - * the table key strings and the other resources. (new in formatVersion 2/ICU 4.4) - * At offset 0 of that array is a 16-bit zero value for empty 16-bit resources. - * - * Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2 - * resources, with the offsets relative to the start of the 16-bit-units array. - * Starting with formatVersion 3/ICU 56, if offset(URES_NONE)) {} - virtual ~ResourceDataValue(); - - void setData(const ResourceData *data) { pResData = data; } - void setResource(Resource r) { res = r; } - - virtual UResType getType() const; - virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const; - virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const; - virtual int32_t getInt(UErrorCode &errorCode) const; - virtual uint32_t getUInt(UErrorCode &errorCode) const; - virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const; - virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const; - virtual ResourceArray getArray(UErrorCode &errorCode) const; - virtual ResourceTable getTable(UErrorCode &errorCode) const; - virtual UBool isNoInheritanceMarker() const; - virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity, - UErrorCode &errorCode) const; - virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity, - UErrorCode &errorCode) const; - virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const; - - const ResourceData *pResData; - -private: - Resource res; -}; - -U_NAMESPACE_END - -#endif /* __cplusplus */ - -/** - * Swap an ICU resource bundle. See udataswp.h. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -ures_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -#endif diff --git a/deps/node/deps/icu-small/source/common/uresimp.h b/deps/node/deps/icu-small/source/common/uresimp.h deleted file mode 100644 index e4f75c9f..00000000 --- a/deps/node/deps/icu-small/source/common/uresimp.h +++ /dev/null @@ -1,278 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef URESIMP_H -#define URESIMP_H - -#include "unicode/ures.h" - -#include "uresdata.h" - -#define kRootLocaleName "root" -#define kPoolBundleName "pool" - -/* - The default minor version and the version separator must be exactly one - character long. -*/ - -#define kDefaultMinorVersion "0" -#define kVersionSeparator "." -#define kVersionTag "Version" - -#define MAGIC1 19700503 -#define MAGIC2 19641227 - -#define URES_MAX_ALIAS_LEVEL 256 -#define URES_MAX_BUFFER_SIZE 256 - -#define EMPTY_SET 0x2205 - -struct UResourceDataEntry; -typedef struct UResourceDataEntry UResourceDataEntry; - -/* - * Note: If we wanted to make this structure smaller, then we could try - * to use one UResourceDataEntry pointer for fAlias and fPool, with a separate - * flag to distinguish whether this struct is for a real bundle with a pool, - * or for an alias entry for which we won't use the pool after loading. - */ -struct UResourceDataEntry { - char *fName; /* name of the locale for bundle - still to decide whether it is original or fallback */ - char *fPath; /* path to bundle - used for distinguishing between resources with the same name */ - UResourceDataEntry *fParent; /*next resource in fallback chain*/ - UResourceDataEntry *fAlias; - UResourceDataEntry *fPool; - ResourceData fData; /* data for low level access */ - char fNameBuffer[3]; /* A small buffer of free space for fName. The free space is due to struct padding. */ - uint32_t fCountExisting; /* how much is this resource used */ - UErrorCode fBogus; - /* int32_t fHashKey;*/ /* for faster access in the hashtable */ -}; - -#define RES_BUFSIZE 64 -#define RES_PATH_SEPARATOR '/' -#define RES_PATH_SEPARATOR_S "/" - -struct UResourceBundle { - const char *fKey; /*tag*/ - UResourceDataEntry *fData; /*for low-level access*/ - char *fVersion; - UResourceDataEntry *fTopLevelData; /* for getting the valid locale */ - char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */ - ResourceData fResData; - char fResBuf[RES_BUFSIZE]; - int32_t fResPathLen; - Resource fRes; - UBool fHasFallback; - UBool fIsTopLevel; - uint32_t fMagic1; /* For determining if it's a stack object */ - uint32_t fMagic2; /* For determining if it's a stack object */ - int32_t fIndex; - int32_t fSize; - - /*const UResourceBundle *fParentRes;*/ /* needed to get the actual locale for a child resource */ -}; - -U_CAPI void U_EXPORT2 ures_initStackObject(UResourceBundle* resB); - -/** - * Opens a resource bundle for the locale; - * if there is not even a base language bundle, then loads the root bundle; - * never falls back to the default locale. - * - * This is used for algorithms that have good pan-Unicode default behavior, - * such as case mappings, collation, and segmentation (BreakIterator). - */ -U_CAPI UResourceBundle* U_EXPORT2 -ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status); - -/* Some getters used by the copy constructor */ -U_CFUNC const char* ures_getName(const UResourceBundle* resB); -#ifdef URES_DEBUG -U_CFUNC const char* ures_getPath(const UResourceBundle* resB); -/** - * If anything was in the RB cache, dump it to the screen. - * @return TRUE if there was anything into the cache - */ -U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void); -#endif -/*U_CFUNC void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd);*/ -/*U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd);*/ -/*U_CFUNC void ures_freeResPath(UResourceBundle *resB);*/ - -/* Candidates for export */ -U_CFUNC UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status); - -/** - * Returns a resource that can be located using the pathToResource argument. One needs optional package, locale - * and path inside the locale, for example: "/myData/en/zoneStrings/3". Keys and indexes are supported. Keys - * need to reference data in named structures, while indexes can reference both named and anonymous resources. - * Features a fill-in parameter. - * - * Note, this function does NOT have a syntax for specifying items within a tree. May want to consider a - * syntax that delineates between package/tree and resource. - * - * @param pathToResource a path that will lead to the requested resource - * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller. - * Alternatively, you can supply a struct to be filled by this function. - * @param status fills in the outgoing error code. - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it - */ -U_CAPI UResourceBundle* U_EXPORT2 -ures_findResource(const char* pathToResource, - UResourceBundle *fillIn, UErrorCode *status); - -/** - * Returns a sub resource that can be located using the pathToResource argument. One needs a path inside - * the supplied resource, for example, if you have "en_US" resource bundle opened, you might ask for - * "zoneStrings/3". Keys and indexes are supported. Keys - * need to reference data in named structures, while indexes can reference both - * named and anonymous resources. - * Features a fill-in parameter. - * - * @param resourceBundle a resource - * @param pathToResource a path that will lead to the requested resource - * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller. - * Alternatively, you can supply a struct to be filled by this function. - * @param status fills in the outgoing error code. - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it - */ -U_CAPI UResourceBundle* U_EXPORT2 -ures_findSubResource(const UResourceBundle *resB, - char* pathToResource, - UResourceBundle *fillIn, UErrorCode *status); - -/** - * Returns a functionally equivalent locale (considering keywords) for the specified keyword. - * @param result fillin for the equivalent locale - * @param resultCapacity capacity of the fillin buffer - * @param path path to the tree, or NULL for ICU data - * @param resName top level resource. Example: "collations" - * @param keyword locale keyword. Example: "collation" - * @param locid The requested locale - * @param isAvailable If non-null, pointer to fillin parameter that indicates whether the - * requested locale was available. The locale is defined as 'available' if it physically - * exists within the specified tree. - * @param omitDefault if TRUE, omit keyword and value if default. 'de_DE\@collation=standard' -> 'de_DE' - * @param status error code - * @return the actual buffer size needed for the full locale. If it's greater - * than resultCapacity, the returned full name will be truncated and an error code will be returned. - */ -U_CAPI int32_t U_EXPORT2 -ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, - const char *path, const char *resName, const char *keyword, const char *locid, - UBool *isAvailable, UBool omitDefault, UErrorCode *status); - -/** - * Given a tree path and keyword, return a string enumeration of all possible values for that keyword. - * @param path path to the tree, or NULL for ICU data - * @param keyword a particular keyword to consider, must match a top level resource name - * within the tree. - * @param status error code - */ -U_CAPI UEnumeration* U_EXPORT2 -ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status); - - -/** - * Get a resource with multi-level fallback. Normally only the top level resources will - * fallback to its parent. This performs fallback on subresources. For example, when a table - * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs - * on the sub-resources because the table is defined in the current resource bundle, but this - * function can perform fallback on the sub-resources of the table. - * @param resB a resource - * @param inKey a key associated with the requested resource - * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller. - * Alternatively, you can supply a struct to be filled by this function. - * @param status: fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it - */ -U_CAPI UResourceBundle* U_EXPORT2 -ures_getByKeyWithFallback(const UResourceBundle *resB, - const char* inKey, - UResourceBundle *fillIn, - UErrorCode *status); - - -/** - * Get a String with multi-level fallback. Normally only the top level resources will - * fallback to its parent. This performs fallback on subresources. For example, when a table - * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs - * on the sub-resources because the table is defined in the current resource bundle, but this - * function can perform fallback on the sub-resources of the table. - * @param resB a resource - * @param inKey a key associated with the requested resource - * @param status: fills in the outgoing error code - * could be U_MISSING_RESOURCE_ERROR if the key is not found - * could be a non-failing error - * e.g.: U_USING_FALLBACK_WARNING,U_USING_DEFAULT_WARNING - * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it - */ -U_CAPI const UChar* U_EXPORT2 -ures_getStringByKeyWithFallback(const UResourceBundle *resB, - const char* inKey, - int32_t* len, - UErrorCode *status); - -#ifdef __cplusplus - -U_CAPI void U_EXPORT2 -ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path, - icu::ResourceSink &sink, UErrorCode &errorCode); - -#endif /* __cplusplus */ - -/** - * Get a version number by key - * @param resB bundle containing version number - * @param key the key for the version number - * @param ver fillin for the version number - * @param status error code - */ -U_CAPI void U_EXPORT2 -ures_getVersionByKey(const UResourceBundle *resB, - const char *key, - UVersionInfo ver, - UErrorCode *status); - - -/** - * Internal function. - * Return the version number associated with this ResourceBundle as a string. - * - * @param resourceBundle The resource bundle for which the version is checked. - * @return A version number string as specified in the resource bundle or its parent. - * The caller does not own this string. - * @see ures_getVersion - */ -U_CAPI const char* U_EXPORT2 -ures_getVersionNumberInternal(const UResourceBundle *resourceBundle); - -/** - * Return the name of the Locale associated with this ResourceBundle. This API allows - * you to query for the real locale of the resource. For example, if you requested - * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. - * For subresources, the locale where this resource comes from will be returned. - * If fallback has occured, getLocale will reflect this. - * - * This internal version avoids deprecated-warnings in ICU code. - * - * @param resourceBundle resource bundle in question - * @param status just for catching illegal arguments - * @return A Locale name - */ -U_CAPI const char* U_EXPORT2 -ures_getLocaleInternal(const UResourceBundle* resourceBundle, - UErrorCode* status); - -#endif /*URESIMP_H*/ diff --git a/deps/node/deps/icu-small/source/common/ureslocs.h b/deps/node/deps/icu-small/source/common/ureslocs.h deleted file mode 100644 index f7c3344e..00000000 --- a/deps/node/deps/icu-small/source/common/ureslocs.h +++ /dev/null @@ -1,27 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2009-2014 International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#ifndef __URESLOCS_H__ -#define __URESLOCS_H__ - -#include "unicode/utypes.h" -#include "unicode/udata.h" - -U_CDECL_BEGIN - - -#define U_ICUDATA_LANG U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "lang" -#define U_ICUDATA_REGION U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "region" -#define U_ICUDATA_CURR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "curr" -#define U_ICUDATA_ZONE U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "zone" -#define U_ICUDATA_UNIT U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "unit" - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/usc_impl.cpp b/deps/node/deps/icu-small/source/common/usc_impl.cpp deleted file mode 100644 index d6988032..00000000 --- a/deps/node/deps/icu-small/source/common/usc_impl.cpp +++ /dev/null @@ -1,361 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File USC_IMPL.C -* -* Modification History: -* -* Date Name Description -* 07/08/2002 Eric Mader Creation. -****************************************************************************** -*/ - -#include "unicode/uscript.h" -#include "usc_impl.h" -#include "cmemory.h" - -#define PAREN_STACK_DEPTH 32 - -#define MOD(sp) ((sp) % PAREN_STACK_DEPTH) -#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) -#define INC(sp,count) (MOD((sp) + (count))) -#define INC1(sp) (INC(sp, 1)) -#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) -#define DEC1(sp) (DEC(sp, 1)) -#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0) -#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun)) -#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP]) -#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0) - -struct ParenStackEntry -{ - int32_t pairIndex; - UScriptCode scriptCode; -}; - -struct UScriptRun -{ - int32_t textLength; - const UChar *textArray; - - int32_t scriptStart; - int32_t scriptLimit; - UScriptCode scriptCode; - - struct ParenStackEntry parenStack[PAREN_STACK_DEPTH]; - int32_t parenSP; - int32_t pushCount; - int32_t fixupCount; -}; - -static int8_t highBit(int32_t value); - -static const UChar32 pairedChars[] = { - 0x0028, 0x0029, /* ascii paired punctuation */ - 0x003c, 0x003e, - 0x005b, 0x005d, - 0x007b, 0x007d, - 0x00ab, 0x00bb, /* guillemets */ - 0x2018, 0x2019, /* general punctuation */ - 0x201c, 0x201d, - 0x2039, 0x203a, - 0x3008, 0x3009, /* chinese paired punctuation */ - 0x300a, 0x300b, - 0x300c, 0x300d, - 0x300e, 0x300f, - 0x3010, 0x3011, - 0x3014, 0x3015, - 0x3016, 0x3017, - 0x3018, 0x3019, - 0x301a, 0x301b -}; - -static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode) -{ - scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount); - scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount); - - scriptRun->parenSP = INC1(scriptRun->parenSP); - scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex; - scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode; -} - -static void pop(UScriptRun *scriptRun) -{ - if (STACK_IS_EMPTY(scriptRun)) { - return; - } - - if (scriptRun->fixupCount > 0) { - scriptRun->fixupCount -= 1; - } - - scriptRun->pushCount -= 1; - scriptRun->parenSP = DEC1(scriptRun->parenSP); - - /* If the stack is now empty, reset the stack - pointers to their initial values. - */ - if (STACK_IS_EMPTY(scriptRun)) { - scriptRun->parenSP = -1; - } -} - -static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode) -{ - int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount); - - while (scriptRun->fixupCount-- > 0) { - fixupSP = INC1(fixupSP); - scriptRun->parenStack[fixupSP].scriptCode = scriptCode; - } -} - -static int8_t -highBit(int32_t value) -{ - int8_t bit = 0; - - if (value <= 0) { - return -32; - } - - if (value >= 1 << 16) { - value >>= 16; - bit += 16; - } - - if (value >= 1 << 8) { - value >>= 8; - bit += 8; - } - - if (value >= 1 << 4) { - value >>= 4; - bit += 4; - } - - if (value >= 1 << 2) { - value >>= 2; - bit += 2; - } - - if (value >= 1 << 1) { - //value >>= 1; - bit += 1; - } - - return bit; -} - -static int32_t -getPairIndex(UChar32 ch) -{ - int32_t pairedCharCount = UPRV_LENGTHOF(pairedChars); - int32_t pairedCharPower = 1 << highBit(pairedCharCount); - int32_t pairedCharExtra = pairedCharCount - pairedCharPower; - - int32_t probe = pairedCharPower; - int32_t pairIndex = 0; - - if (ch >= pairedChars[pairedCharExtra]) { - pairIndex = pairedCharExtra; - } - - while (probe > (1 << 0)) { - probe >>= 1; - - if (ch >= pairedChars[pairIndex + probe]) { - pairIndex += probe; - } - } - - if (pairedChars[pairIndex] != ch) { - pairIndex = -1; - } - - return pairIndex; -} - -static UBool -sameScript(UScriptCode scriptOne, UScriptCode scriptTwo) -{ - return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo; -} - -U_CAPI UScriptRun * U_EXPORT2 -uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode) -{ - UScriptRun *result = NULL; - - if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } - - result = (UScriptRun *)uprv_malloc(sizeof (UScriptRun)); - - if (result == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - uscript_setRunText(result, src, length, pErrorCode); - - /* Release the UScriptRun if uscript_setRunText() returns an error */ - if (U_FAILURE(*pErrorCode)) { - uprv_free(result); - result = NULL; - } - - return result; -} - -U_CAPI void U_EXPORT2 -uscript_closeRun(UScriptRun *scriptRun) -{ - if (scriptRun != NULL) { - uprv_free(scriptRun); - } -} - -U_CAPI void U_EXPORT2 -uscript_resetRun(UScriptRun *scriptRun) -{ - if (scriptRun != NULL) { - scriptRun->scriptStart = 0; - scriptRun->scriptLimit = 0; - scriptRun->scriptCode = USCRIPT_INVALID_CODE; - scriptRun->parenSP = -1; - scriptRun->pushCount = 0; - scriptRun->fixupCount = 0; - } -} - -U_CAPI void U_EXPORT2 -uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode) -{ - if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { - return; - } - - if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - scriptRun->textArray = src; - scriptRun->textLength = length; - - uscript_resetRun(scriptRun); -} - -U_CAPI UBool U_EXPORT2 -uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript) -{ - UErrorCode error = U_ZERO_ERROR; - - /* if we've fallen off the end of the text, we're done */ - if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) { - return FALSE; - } - - SYNC_FIXUP(scriptRun); - scriptRun->scriptCode = USCRIPT_COMMON; - - for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) { - UChar high = scriptRun->textArray[scriptRun->scriptLimit]; - UChar32 ch = high; - UScriptCode sc; - int32_t pairIndex; - - /* - * if the character is a high surrogate and it's not the last one - * in the text, see if it's followed by a low surrogate - */ - if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) { - UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1]; - - /* - * if it is followed by a low surrogate, - * consume it and form the full character - */ - if (low >= 0xDC00 && low <= 0xDFFF) { - ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000; - scriptRun->scriptLimit += 1; - } - } - - sc = uscript_getScript(ch, &error); - pairIndex = getPairIndex(ch); - - /* - * Paired character handling: - * - * if it's an open character, push it onto the stack. - * if it's a close character, find the matching open on the - * stack, and use that script code. Any non-matching open - * characters above it on the stack will be poped. - */ - if (pairIndex >= 0) { - if ((pairIndex & 1) == 0) { - push(scriptRun, pairIndex, scriptRun->scriptCode); - } else { - int32_t pi = pairIndex & ~1; - - while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) { - pop(scriptRun); - } - - if (STACK_IS_NOT_EMPTY(scriptRun)) { - sc = TOP(scriptRun).scriptCode; - } - } - } - - if (sameScript(scriptRun->scriptCode, sc)) { - if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { - scriptRun->scriptCode = sc; - - fixup(scriptRun, scriptRun->scriptCode); - } - - /* - * if this character is a close paired character, - * pop the matching open character from the stack - */ - if (pairIndex >= 0 && (pairIndex & 1) != 0) { - pop(scriptRun); - } - } else { - /* - * if the run broke on a surrogate pair, - * end it before the high surrogate - */ - if (ch >= 0x10000) { - scriptRun->scriptLimit -= 1; - } - - break; - } - } - - - if (pRunStart != NULL) { - *pRunStart = scriptRun->scriptStart; - } - - if (pRunLimit != NULL) { - *pRunLimit = scriptRun->scriptLimit; - } - - if (pRunScript != NULL) { - *pRunScript = scriptRun->scriptCode; - } - - return TRUE; -} diff --git a/deps/node/deps/icu-small/source/common/usc_impl.h b/deps/node/deps/icu-small/source/common/usc_impl.h deleted file mode 100644 index 44899649..00000000 --- a/deps/node/deps/icu-small/source/common/usc_impl.h +++ /dev/null @@ -1,139 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File USC_IMPL.H -* -* Modification History: -* -* Date Name Description -* 07/08/2002 Eric Mader Creation. -****************************************************************************** -*/ - -#ifndef USC_IMPL_H -#define USC_IMPL_H -#include "unicode/utypes.h" -#include "unicode/uscript.h" - -/** - * UScriptRun is used to find runs of characters in - * the same script. It implements a simple iterator over an array - * of characters. The iterator will resolve script-neutral characters - * like punctuation into the script of the surrounding characters. - * - * The iterator will try to match paired punctuation. If it sees an - * opening punctuation character, it will remember the script that - * was assigned to that character, and assign the same script to the - * matching closing punctuation. - * - * Scripts are chosen based on the UScriptCode enumeration. - * No attempt is made to combine related scripts into a single run. In - * particular, Hiragana, Katakana, and Han characters will appear in seperate - * runs. - - * Here is an example of how to iterate over script runs: - *

- * \code
- * void printScriptRuns(const UChar *text, int32_t length)
- * {
- *     UErrorCode error = U_ZERO_ERROR;
- *     UScriptRun *scriptRun = uscript_openRun(text, testLength, &error);
- *     int32_t start = 0, limit = 0;
- *     UScriptCode code = USCRIPT_INVALID_CODE;
- *
- *     while (uscript_nextRun(&start, &limit, &code)) {
- *         printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, limit);
- *     }
- *
- *     uscript_closeRun(scriptRun);
- *  }
- * 
- */ -struct UScriptRun; - -typedef struct UScriptRun UScriptRun; - -/** - * Create a UScriptRun object for iterating over the given text. This object must - * be freed using uscript_closeRun(). Note that this object does not copy the source text, - * only the pointer to it. You must make sure that the pointer remains valid until you call - * uscript_closeRun() or uscript_setRunText(). - * - * @param src is the address of the array of characters over which to iterate. - * if src == NULL and length == 0, - * an empty UScriptRun object will be returned. - * - * @param length is the number of characters over which to iterate. - * - * @param pErrorCode is a pointer to a valid UErrorCode value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - * - * @return the address of UScriptRun object which will iterate over the text, - * or NULL if the operation failed. - */ -U_CAPI UScriptRun * U_EXPORT2 -uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode); - -/** - * Frees the given UScriptRun object and any storage associated with it. - * On return, scriptRun no longer points to a valid UScriptRun object. - * - * @param scriptRun is the UScriptRun object which will be freed. - */ -U_CAPI void U_EXPORT2 -uscript_closeRun(UScriptRun *scriptRun); - -/** - * Reset the UScriptRun object so that it will start iterating from - * the beginning. - * - * @param scriptRun is the address of the UScriptRun object to be reset. - */ -U_CAPI void U_EXPORT2 -uscript_resetRun(UScriptRun *scriptRun); - -/** - * Change the text over which the given UScriptRun object iterates. - * - * @param scriptRun is the UScriptRun object which will be changed. - * - * @param src is the address of the new array of characters over which to iterate. - * If src == NULL and length == 0, - * the UScriptRun object will become empty. - * - * @param length is the new number of characters over which to iterate - * - * @param pErrorCode is a pointer to a valid UErrorCode value. If this value - * indicates a failure on entry, the function will immediately return. - * On exit the value will indicate the success of the operation. - */ -U_CAPI void U_EXPORT2 -uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode); - -/** - * Advance the UScriptRun object to the next script run, return the start and limit - * offsets, and the script of the run. - * - * @param scriptRun is the address of the UScriptRun object. - * - * @param pRunStart is a pointer to the variable to receive the starting offset of the next run. - * This pointer can be NULL if the value is not needed. - * - * @param pRunLimit is a pointer to the variable to receive the limit offset of the next run. - * This pointer can be NULL if the value is not needed. - * - * @param pRunScript is a pointer to the variable to receive the UScriptCode for the - * script of the current run. This pointer can be NULL if the value is not needed. - * - * @return true if there was another script run. - */ -U_CAPI UBool U_EXPORT2 -uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript); - -#endif diff --git a/deps/node/deps/icu-small/source/common/uscript.cpp b/deps/node/deps/icu-small/source/common/uscript.cpp deleted file mode 100644 index 83b5f7ef..00000000 --- a/deps/node/deps/icu-small/source/common/uscript.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1997-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File USCRIPT.C -* -* Modification History: -* -* Date Name Description -* 07/06/2001 Ram Creation. -****************************************************************************** -*/ - -#include "unicode/uchar.h" -#include "unicode/uscript.h" -#include "unicode/uloc.h" -#include "cmemory.h" -#include "cstring.h" - -static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; -static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; -static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; - -static int32_t -setCodes(const UScriptCode *src, int32_t length, - UScriptCode *dest, int32_t capacity, UErrorCode *err) { - int32_t i; - if(U_FAILURE(*err)) { return 0; } - if(length > capacity) { - *err = U_BUFFER_OVERFLOW_ERROR; - return length; - } - for(i = 0; i < length; ++i) { - dest[i] = src[i]; - } - return length; -} - -static int32_t -setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) { - if(U_FAILURE(*err)) { return 0; } - if(1 > capacity) { - *err = U_BUFFER_OVERFLOW_ERROR; - return 1; - } - scripts[0] = script; - return 1; -} - -static int32_t -getCodesFromLocale(const char *locale, - UScriptCode *scripts, int32_t capacity, UErrorCode *err) { - UErrorCode internalErrorCode = U_ZERO_ERROR; - char lang[8]; - char script[8]; - int32_t scriptLength; - if(U_FAILURE(*err)) { return 0; } - // Multi-script languages, equivalent to the LocaleScript data - // that we used to load from locale resource bundles. - /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode); - if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { - return 0; - } - if(0 == uprv_strcmp(lang, "ja")) { - return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err); - } - if(0 == uprv_strcmp(lang, "ko")) { - return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err); - } - scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode); - if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { - return 0; - } - if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) { - return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err); - } - // Explicit script code. - if(scriptLength != 0) { - UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); - if(scriptCode != USCRIPT_INVALID_CODE) { - if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) { - scriptCode = USCRIPT_HAN; - } - return setOneCode(scriptCode, scripts, capacity, err); - } - } - return 0; -} - -/* TODO: this is a bad API and should be deprecated, ticket #11141 */ -U_CAPI int32_t U_EXPORT2 -uscript_getCode(const char* nameOrAbbrOrLocale, - UScriptCode* fillIn, - int32_t capacity, - UErrorCode* err){ - UBool triedCode; - char likely[ULOC_FULLNAME_CAPACITY]; - UErrorCode internalErrorCode; - int32_t length; - - if(U_FAILURE(*err)) { - return 0; - } - if(nameOrAbbrOrLocale==NULL || - (fillIn == NULL ? capacity != 0 : capacity < 0)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - triedCode = FALSE; - if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){ - /* try long and abbreviated script names first */ - UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); - if(code!=USCRIPT_INVALID_CODE) { - return setOneCode(code, fillIn, capacity, err); - } - triedCode = TRUE; - } - internalErrorCode = U_ZERO_ERROR; - length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err); - if(U_FAILURE(*err) || length != 0) { - return length; - } - (void)uloc_addLikelySubtags(nameOrAbbrOrLocale, - likely, UPRV_LENGTHOF(likely), &internalErrorCode); - if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { - length = getCodesFromLocale(likely, fillIn, capacity, err); - if(U_FAILURE(*err) || length != 0) { - return length; - } - } - if(!triedCode) { - /* still not found .. try long and abbreviated script names again */ - UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); - if(code!=USCRIPT_INVALID_CODE) { - return setOneCode(code, fillIn, capacity, err); - } - } - return 0; -} diff --git a/deps/node/deps/icu-small/source/common/uscript_props.cpp b/deps/node/deps/icu-small/source/common/uscript_props.cpp deleted file mode 100644 index bfdb68c7..00000000 --- a/deps/node/deps/icu-small/source/common/uscript_props.cpp +++ /dev/null @@ -1,294 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2013-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: uscript_props.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2013feb16 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/unistr.h" -#include "unicode/uscript.h" -#include "unicode/utf16.h" -#include "ustr_imp.h" -#include "cmemory.h" - -namespace { - -// Script metadata (script properties). -// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt - -// 0 = NOT_ENCODED, no sample character, default false script properties. -// Bits 20.. 0: sample character - -// Bits 23..21: usage -const int32_t UNKNOWN = 1 << 21; -const int32_t EXCLUSION = 2 << 21; -const int32_t LIMITED_USE = 3 << 21; -// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 -const int32_t RECOMMENDED = 5 << 21; - -// Bits 31..24: Single-bit flags -const int32_t RTL = 1 << 24; -const int32_t LB_LETTERS = 1 << 25; -const int32_t CASED = 1 << 26; - -const int32_t SCRIPT_PROPS[] = { - // Begin copy-paste output from - // tools/trunk/unicode/py/parsescriptmetadata.py - 0x0040 | RECOMMENDED, // Zyyy - 0x0308 | RECOMMENDED, // Zinh - 0x0628 | RECOMMENDED | RTL, // Arab - 0x0531 | RECOMMENDED | CASED, // Armn - 0x0995 | RECOMMENDED, // Beng - 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo - 0x13C4 | LIMITED_USE | CASED, // Cher - 0x03E2 | EXCLUSION | CASED, // Copt - 0x042F | RECOMMENDED | CASED, // Cyrl - 0x10414 | EXCLUSION | CASED, // Dsrt - 0x0905 | RECOMMENDED, // Deva - 0x12A0 | RECOMMENDED, // Ethi - 0x10D3 | RECOMMENDED, // Geor - 0x10330 | EXCLUSION, // Goth - 0x03A9 | RECOMMENDED | CASED, // Grek - 0x0A95 | RECOMMENDED, // Gujr - 0x0A15 | RECOMMENDED, // Guru - 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani - 0xAC00 | RECOMMENDED, // Hang - 0x05D0 | RECOMMENDED | RTL, // Hebr - 0x304B | RECOMMENDED | LB_LETTERS, // Hira - 0x0C95 | RECOMMENDED, // Knda - 0x30AB | RECOMMENDED | LB_LETTERS, // Kana - 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr - 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo - 0x004C | RECOMMENDED | CASED, // Latn - 0x0D15 | RECOMMENDED, // Mlym - 0x1826 | EXCLUSION, // Mong - 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr - 0x168F | EXCLUSION, // Ogam - 0x10300 | EXCLUSION, // Ital - 0x0B15 | RECOMMENDED, // Orya - 0x16A0 | EXCLUSION, // Runr - 0x0D85 | RECOMMENDED, // Sinh - 0x0710 | LIMITED_USE | RTL, // Syrc - 0x0B95 | RECOMMENDED, // Taml - 0x0C15 | RECOMMENDED, // Telu - 0x078C | RECOMMENDED | RTL, // Thaa - 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai - 0x0F40 | RECOMMENDED, // Tibt - 0x14C0 | LIMITED_USE, // Cans - 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii - 0x1703 | EXCLUSION, // Tglg - 0x1723 | EXCLUSION, // Hano - 0x1743 | EXCLUSION, // Buhd - 0x1763 | EXCLUSION, // Tagb - 0x280E | UNKNOWN, // Brai - 0x10800 | EXCLUSION | RTL, // Cprt - 0x1900 | LIMITED_USE, // Limb - 0x10000 | EXCLUSION, // Linb - 0x10480 | EXCLUSION, // Osma - 0x10450 | EXCLUSION, // Shaw - 0x1950 | LIMITED_USE | LB_LETTERS, // Tale - 0x10380 | EXCLUSION, // Ugar - 0, - 0x1A00 | EXCLUSION, // Bugi - 0x2C00 | EXCLUSION | CASED, // Glag - 0x10A00 | EXCLUSION | RTL, // Khar - 0xA800 | LIMITED_USE, // Sylo - 0x1980 | LIMITED_USE | LB_LETTERS, // Talu - 0x2D30 | LIMITED_USE, // Tfng - 0x103A0 | EXCLUSION, // Xpeo - 0x1B05 | LIMITED_USE, // Bali - 0x1BC0 | LIMITED_USE, // Batk - 0, - 0x11005 | EXCLUSION, // Brah - 0xAA00 | LIMITED_USE, // Cham - 0, - 0, - 0, - 0, - 0x13153 | EXCLUSION, // Egyp - 0, - 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans - 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant - 0x16B1C | EXCLUSION, // Hmng - 0x10CA1 | EXCLUSION | RTL | CASED, // Hung - 0, - 0xA984 | LIMITED_USE, // Java - 0xA90A | LIMITED_USE, // Kali - 0, - 0, - 0x1C00 | LIMITED_USE, // Lepc - 0x10647 | EXCLUSION, // Lina - 0x0840 | LIMITED_USE | RTL, // Mand - 0, - 0x10980 | EXCLUSION | RTL, // Mero - 0x07CA | LIMITED_USE | RTL, // Nkoo - 0x10C00 | EXCLUSION | RTL, // Orkh - 0x1036B | EXCLUSION, // Perm - 0xA840 | EXCLUSION, // Phag - 0x10900 | EXCLUSION | RTL, // Phnx - 0x16F00 | LIMITED_USE, // Plrd - 0, - 0, - 0, - 0, - 0, - 0, - 0xA549 | LIMITED_USE, // Vaii - 0, - 0x12000 | EXCLUSION, // Xsux - 0, - 0xFDD0 | UNKNOWN, // Zzzz - 0x102A0 | EXCLUSION, // Cari - 0x304B | RECOMMENDED | LB_LETTERS, // Jpan - 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana - 0x10280 | EXCLUSION, // Lyci - 0x10920 | EXCLUSION | RTL, // Lydi - 0x1C5A | LIMITED_USE, // Olck - 0xA930 | EXCLUSION, // Rjng - 0xA882 | LIMITED_USE, // Saur - 0x1D850 | EXCLUSION, // Sgnw - 0x1B83 | LIMITED_USE, // Sund - 0, - 0xABC0 | LIMITED_USE, // Mtei - 0x10840 | EXCLUSION | RTL, // Armi - 0x10B00 | EXCLUSION | RTL, // Avst - 0x11103 | LIMITED_USE, // Cakm - 0xAC00 | RECOMMENDED, // Kore - 0x11083 | EXCLUSION, // Kthi - 0x10AD8 | EXCLUSION | RTL, // Mani - 0x10B60 | EXCLUSION | RTL, // Phli - 0x10B8F | EXCLUSION | RTL, // Phlp - 0, - 0x10B40 | EXCLUSION | RTL, // Prti - 0x0800 | EXCLUSION | RTL, // Samr - 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt - 0, - 0, - 0xA6A0 | LIMITED_USE, // Bamu - 0xA4D0 | LIMITED_USE, // Lisu - 0, - 0x10A60 | EXCLUSION | RTL, // Sarb - 0x16AE6 | EXCLUSION, // Bass - 0x1BC20 | EXCLUSION, // Dupl - 0x10500 | EXCLUSION, // Elba - 0x11315 | EXCLUSION, // Gran - 0, - 0, - 0x1E802 | EXCLUSION | RTL, // Mend - 0x109A0 | EXCLUSION | RTL, // Merc - 0x10A95 | EXCLUSION | RTL, // Narb - 0x10896 | EXCLUSION | RTL, // Nbat - 0x10873 | EXCLUSION | RTL, // Palm - 0x112BE | EXCLUSION, // Sind - 0x118B4 | EXCLUSION | CASED, // Wara - 0, - 0, - 0x16A4F | EXCLUSION, // Mroo - 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu - 0x11183 | EXCLUSION, // Shrd - 0x110D0 | EXCLUSION, // Sora - 0x11680 | EXCLUSION, // Takr - 0x18229 | EXCLUSION | LB_LETTERS, // Tang - 0, - 0x14400 | EXCLUSION, // Hluw - 0x11208 | EXCLUSION, // Khoj - 0x11484 | EXCLUSION, // Tirh - 0x10537 | EXCLUSION, // Aghb - 0x11152 | EXCLUSION, // Mahj - 0x11717 | EXCLUSION | LB_LETTERS, // Ahom - 0x108F4 | EXCLUSION | RTL, // Hatr - 0x1160E | EXCLUSION, // Modi - 0x1128F | EXCLUSION, // Mult - 0x11AC0 | EXCLUSION, // Pauc - 0x1158E | EXCLUSION, // Sidd - 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm - 0x11C0E | EXCLUSION, // Bhks - 0x11C72 | EXCLUSION, // Marc - 0x11412 | LIMITED_USE, // Newa - 0x104B5 | LIMITED_USE | CASED, // Osge - 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb - 0x1112 | RECOMMENDED, // Jamo - 0, - 0x11D10 | EXCLUSION, // Gonm - 0x11A5C | EXCLUSION, // Soyo - 0x11A0B | EXCLUSION, // Zanb - 0x1180B | EXCLUSION, // Dogr - 0x11D71 | LIMITED_USE, // Gong - 0x11EE5 | EXCLUSION, // Maka - 0x16E40 | EXCLUSION | CASED, // Medf - 0x10D12 | LIMITED_USE | RTL, // Rohg - 0x10F42 | EXCLUSION | RTL, // Sogd - 0x10F19 | EXCLUSION | RTL, // Sogo - // End copy-paste from parsescriptmetadata.py -}; - -int32_t getScriptProps(UScriptCode script) { - if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { - return SCRIPT_PROPS[script]; - } else { - return 0; - } -} - -} // namespace - -U_CAPI int32_t U_EXPORT2 -uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { return 0; } - if(capacity < 0 || (capacity > 0 && dest == NULL)) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - int32_t sampleChar = getScriptProps(script) & 0x1fffff; - int32_t length; - if(sampleChar == 0) { - length = 0; - } else { - length = U16_LENGTH(sampleChar); - if(length <= capacity) { - int32_t i = 0; - U16_APPEND_UNSAFE(dest, i, sampleChar); - } - } - return u_terminateUChars(dest, capacity, length, pErrorCode); -} - -U_COMMON_API icu::UnicodeString U_EXPORT2 -uscript_getSampleUnicodeString(UScriptCode script) { - icu::UnicodeString sample; - int32_t sampleChar = getScriptProps(script) & 0x1fffff; - if(sampleChar != 0) { - sample.append(sampleChar); - } - return sample; -} - -U_CAPI UScriptUsage U_EXPORT2 -uscript_getUsage(UScriptCode script) { - return (UScriptUsage)((getScriptProps(script) >> 21) & 7); -} - -U_CAPI UBool U_EXPORT2 -uscript_isRightToLeft(UScriptCode script) { - return (getScriptProps(script) & RTL) != 0; -} - -U_CAPI UBool U_EXPORT2 -uscript_breaksBetweenLetters(UScriptCode script) { - return (getScriptProps(script) & LB_LETTERS) != 0; -} - -U_CAPI UBool U_EXPORT2 -uscript_isCased(UScriptCode script) { - return (getScriptProps(script) & CASED) != 0; -} diff --git a/deps/node/deps/icu-small/source/common/uset.cpp b/deps/node/deps/icu-small/source/common/uset.cpp deleted file mode 100644 index 75ff5ddf..00000000 --- a/deps/node/deps/icu-small/source/common/uset.cpp +++ /dev/null @@ -1,641 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002mar07 -* created by: Markus W. Scherer -* -* There are functions to efficiently serialize a USet into an array of uint16_t -* and functions to use such a serialized form efficiently without -* instantiating a new USet. -*/ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/uset.h" -#include "unicode/uniset.h" -#include "cmemory.h" -#include "unicode/ustring.h" -#include "unicode/parsepos.h" - -U_NAMESPACE_USE - -U_CAPI USet* U_EXPORT2 -uset_openEmpty() { - return (USet*) new UnicodeSet(); -} - -U_CAPI USet* U_EXPORT2 -uset_open(UChar32 start, UChar32 end) { - return (USet*) new UnicodeSet(start, end); -} - -U_CAPI void U_EXPORT2 -uset_close(USet* set) { - delete (UnicodeSet*) set; -} - -U_CAPI USet * U_EXPORT2 -uset_clone(const USet *set) { - return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone()); -} - -U_CAPI UBool U_EXPORT2 -uset_isFrozen(const USet *set) { - return ((UnicodeSet*) set)->UnicodeSet::isFrozen(); -} - -U_CAPI void U_EXPORT2 -uset_freeze(USet *set) { - ((UnicodeSet*) set)->UnicodeSet::freeze(); -} - -U_CAPI USet * U_EXPORT2 -uset_cloneAsThawed(const USet *set) { - return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed()); -} - -U_CAPI void U_EXPORT2 -uset_set(USet* set, - UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::set(start, end); -} - -U_CAPI void U_EXPORT2 -uset_addAll(USet* set, const USet *additionalSet) { - ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet)); -} - -U_CAPI void U_EXPORT2 -uset_add(USet* set, UChar32 c) { - ((UnicodeSet*) set)->UnicodeSet::add(c); -} - -U_CAPI void U_EXPORT2 -uset_addRange(USet* set, UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::add(start, end); -} - -U_CAPI void U_EXPORT2 -uset_addString(USet* set, const UChar* str, int32_t strLen) { - // UnicodeString handles -1 for strLen - UnicodeString s(strLen<0, str, strLen); - ((UnicodeSet*) set)->UnicodeSet::add(s); -} - -U_CAPI void U_EXPORT2 -uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) { - // UnicodeString handles -1 for strLen - UnicodeString s(str, strLen); - ((UnicodeSet*) set)->UnicodeSet::addAll(s); -} - -U_CAPI void U_EXPORT2 -uset_remove(USet* set, UChar32 c) { - ((UnicodeSet*) set)->UnicodeSet::remove(c); -} - -U_CAPI void U_EXPORT2 -uset_removeRange(USet* set, UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::remove(start, end); -} - -U_CAPI void U_EXPORT2 -uset_removeString(USet* set, const UChar* str, int32_t strLen) { - UnicodeString s(strLen==-1, str, strLen); - ((UnicodeSet*) set)->UnicodeSet::remove(s); -} - -U_CAPI void U_EXPORT2 -uset_removeAll(USet* set, const USet* remove) { - ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove); -} - -U_CAPI void U_EXPORT2 -uset_retain(USet* set, UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::retain(start, end); -} - -U_CAPI void U_EXPORT2 -uset_retainAll(USet* set, const USet* retain) { - ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain); -} - -U_CAPI void U_EXPORT2 -uset_compact(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::compact(); -} - -U_CAPI void U_EXPORT2 -uset_complement(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::complement(); -} - -U_CAPI void U_EXPORT2 -uset_complementAll(USet* set, const USet* complement) { - ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement); -} - -U_CAPI void U_EXPORT2 -uset_clear(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::clear(); -} - -U_CAPI void U_EXPORT2 -uset_removeAllStrings(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::removeAllStrings(); -} - -U_CAPI UBool U_EXPORT2 -uset_isEmpty(const USet* set) { - return ((const UnicodeSet*) set)->UnicodeSet::isEmpty(); -} - -U_CAPI UBool U_EXPORT2 -uset_contains(const USet* set, UChar32 c) { - return ((const UnicodeSet*) set)->UnicodeSet::contains(c); -} - -U_CAPI UBool U_EXPORT2 -uset_containsRange(const USet* set, UChar32 start, UChar32 end) { - return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end); -} - -U_CAPI UBool U_EXPORT2 -uset_containsString(const USet* set, const UChar* str, int32_t strLen) { - UnicodeString s(strLen==-1, str, strLen); - return ((const UnicodeSet*) set)->UnicodeSet::contains(s); -} - -U_CAPI UBool U_EXPORT2 -uset_containsAll(const USet* set1, const USet* set2) { - return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2); -} - -U_CAPI UBool U_EXPORT2 -uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) { - // Create a string alias, since nothing is being added to the set. - UnicodeString s(strLen==-1, str, strLen); - return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s); -} - -U_CAPI UBool U_EXPORT2 -uset_containsNone(const USet* set1, const USet* set2) { - return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2); -} - -U_CAPI UBool U_EXPORT2 -uset_containsSome(const USet* set1, const USet* set2) { - return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2); -} - -U_CAPI int32_t U_EXPORT2 -uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition); -} - -U_CAPI int32_t U_EXPORT2 -uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition); -} - -U_CAPI int32_t U_EXPORT2 -uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition); -} - -U_CAPI int32_t U_EXPORT2 -uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition); -} - -U_CAPI UBool U_EXPORT2 -uset_equals(const USet* set1, const USet* set2) { - return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2; -} - -U_CAPI int32_t U_EXPORT2 -uset_indexOf(const USet* set, UChar32 c) { - return ((UnicodeSet*) set)->UnicodeSet::indexOf(c); -} - -U_CAPI UChar32 U_EXPORT2 -uset_charAt(const USet* set, int32_t index) { - return ((UnicodeSet*) set)->UnicodeSet::charAt(index); -} - -U_CAPI int32_t U_EXPORT2 -uset_size(const USet* set) { - return ((const UnicodeSet*) set)->UnicodeSet::size(); -} - -U_NAMESPACE_BEGIN -/** - * This class only exists to provide access to the UnicodeSet private - * USet support API. Declaring a class a friend is more portable than - * trying to declare extern "C" functions as friends. - */ -class USetAccess /* not : public UObject because all methods are static */ { -public: - /* Try to have the compiler inline these*/ - inline static int32_t getStringCount(const UnicodeSet& set) { - return set.getStringCount(); - } - inline static const UnicodeString* getString(const UnicodeSet& set, - int32_t i) { - return set.getString(i); - } -private: - /* do not instantiate*/ - USetAccess(); -}; -U_NAMESPACE_END - -U_CAPI int32_t U_EXPORT2 -uset_getItemCount(const USet* uset) { - const UnicodeSet& set = *(const UnicodeSet*)uset; - return set.getRangeCount() + USetAccess::getStringCount(set); -} - -U_CAPI int32_t U_EXPORT2 -uset_getItem(const USet* uset, int32_t itemIndex, - UChar32* start, UChar32* end, - UChar* str, int32_t strCapacity, - UErrorCode* ec) { - if (U_FAILURE(*ec)) return 0; - const UnicodeSet& set = *(const UnicodeSet*)uset; - int32_t rangeCount; - - if (itemIndex < 0) { - *ec = U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } else if (itemIndex < (rangeCount = set.getRangeCount())) { - *start = set.getRangeStart(itemIndex); - *end = set.getRangeEnd(itemIndex); - return 0; - } else { - itemIndex -= rangeCount; - if (itemIndex < USetAccess::getStringCount(set)) { - const UnicodeString* s = USetAccess::getString(set, itemIndex); - return s->extract(str, strCapacity, *ec); - } else { - *ec = U_INDEX_OUTOFBOUNDS_ERROR; - return -1; - } - } -} - -//U_CAPI int32_t U_EXPORT2 -//uset_getRangeCount(const USet* set) { -// return ((const UnicodeSet*) set)->getRangeCount(); -//} -// -//U_CAPI UBool U_EXPORT2 -//uset_getRange(const USet* set, int32_t rangeIndex, -// UChar32* pStart, UChar32* pEnd) { -// if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) { -// return FALSE; -// } -// const UnicodeSet* us = (const UnicodeSet*) set; -// *pStart = us->getRangeStart(rangeIndex); -// *pEnd = us->getRangeEnd(rangeIndex); -// return TRUE; -//} - -/* - * Serialize a USet into 16-bit units. - * Store BMP code points as themselves with one 16-bit unit each. - * - * Important: the code points in the array are in ascending order, - * therefore all BMP code points precede all supplementary code points. - * - * Store each supplementary code point in 2 16-bit units, - * simply with higher-then-lower 16-bit halfs. - * - * Precede the entire list with the length. - * If there are supplementary code points, then set bit 15 in the length - * and add the bmpLength between it and the array. - * - * In other words: - * - all BMP: (length=bmpLength) BMP, .., BMP - * - some supplementary: (length|0x8000) (bmpLengthUnicodeSet::serialize(dest, destCapacity,* ec); -} - -U_CAPI UBool U_EXPORT2 -uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) { - int32_t length; - - if(fillSet==NULL) { - return FALSE; - } - if(src==NULL || srcLength<=0) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - - length=*src++; - if(length&0x8000) { - /* there are supplementary values */ - length&=0x7fff; - if(srcLength<(2+length)) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - fillSet->bmpLength=*src++; - } else { - /* only BMP values */ - if(srcLength<(1+length)) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - fillSet->bmpLength=length; - } - fillSet->array=src; - fillSet->length=length; - return TRUE; -} - -U_CAPI void U_EXPORT2 -uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) { - if(fillSet==NULL || (uint32_t)c>0x10ffff) { - return; - } - - fillSet->array=fillSet->staticArray; - if(c<0xffff) { - fillSet->bmpLength=fillSet->length=2; - fillSet->staticArray[0]=(uint16_t)c; - fillSet->staticArray[1]=(uint16_t)c+1; - } else if(c==0xffff) { - fillSet->bmpLength=1; - fillSet->length=3; - fillSet->staticArray[0]=0xffff; - fillSet->staticArray[1]=1; - fillSet->staticArray[2]=0; - } else if(c<0x10ffff) { - fillSet->bmpLength=0; - fillSet->length=4; - fillSet->staticArray[0]=(uint16_t)(c>>16); - fillSet->staticArray[1]=(uint16_t)c; - ++c; - fillSet->staticArray[2]=(uint16_t)(c>>16); - fillSet->staticArray[3]=(uint16_t)c; - } else /* c==0x10ffff */ { - fillSet->bmpLength=0; - fillSet->length=2; - fillSet->staticArray[0]=0x10; - fillSet->staticArray[1]=0xffff; - } -} - -U_CAPI UBool U_EXPORT2 -uset_serializedContains(const USerializedSet* set, UChar32 c) { - const uint16_t* array; - - if(set==NULL || (uint32_t)c>0x10ffff) { - return FALSE; - } - - array=set->array; - if(c<=0xffff) { - /* find c in the BMP part */ - int32_t lo = 0; - int32_t hi = set->bmpLength-1; - if (c < array[0]) { - hi = 0; - } else if (c < array[hi]) { - for(;;) { - int32_t i = (lo + hi) >> 1; - if (i == lo) { - break; // Done! - } else if (c < array[i]) { - hi = i; - } else { - lo = i; - } - } - } else { - hi += 1; - } - return (UBool)(hi&1); - } else { - /* find c in the supplementary part */ - uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c; - int32_t base = set->bmpLength; - int32_t lo = 0; - int32_t hi = set->length - 2 - base; - if (high < array[base] || (high==array[base] && low> 1) & ~1; // Guarantee even result - int32_t iabs = i + base; - if (i == lo) { - break; // Done! - } else if (high < array[iabs] || (high==array[iabs] && lowbmpLength+(set->length-set->bmpLength)/2+1)/2; -} - -U_CAPI UBool U_EXPORT2 -uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, - UChar32* pStart, UChar32* pEnd) { - const uint16_t* array; - int32_t bmpLength, length; - - if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) { - return FALSE; - } - - array=set->array; - length=set->length; - bmpLength=set->bmpLength; - - rangeIndex*=2; /* address start/limit pairs */ - if(rangeIndex0) { -// if(c>=array[length-1]) { -// return length; -// } -// -// /* do not check the last range limit again in the loop below */ -// --length; -// } -// -// for(i=0; i=array[i]; ++i) {} -// return i; -// } -// -// static UBool -// addRemove(USet* set, UChar32 c, int32_t doRemove) { -// int32_t i, length, more; -// -// if(set==NULL || (uint32_t)c>0x10ffff) { -// return FALSE; -// } -// -// length=set->length; -// i=findChar(set->array, length, c); -// if((i&1)^doRemove) { -// /* c is already in the set */ -// return TRUE; -// } -// -// /* how many more array items do we need? */ -// if(iarray[i]) { -// /* c is just before the following range, extend that in-place by one */ -// set->array[i]=c; -// if(i>0) { -// --i; -// if(c==set->array[i]) { -// /* the previous range collapsed, remove it */ -// set->length=length-=2; -// if(iarray+i, set->array+i+2, (length-i)*4); -// } -// } -// } -// return TRUE; -// } else if(i>0 && c==set->array[i-1]) { -// /* c is just after the previous range, extend that in-place by one */ -// if(++c<=0x10ffff) { -// set->array[i-1]=c; -// if(iarray[i]) { -// /* the following range collapsed, remove it */ -// --i; -// set->length=length-=2; -// if(iarray+i, set->array+i+2, (length-i)*4); -// } -// } -// } else { -// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */ -// set->length=i-1; -// } -// return TRUE; -// } else if(i==length && c==0x10ffff) { -// /* insert one range limit c */ -// more=1; -// } else { -// /* insert two range limits c, c+1 */ -// more=2; -// } -// -// /* insert range limits */ -// if(length+more>set->capacity) { -// /* reallocate */ -// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA; -// UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4); -// if(newArray==NULL) { -// return FALSE; -// } -// set->capacity=newCapacity; -// uprv_memcpy(newArray, set->array, length*4); -// -// if(set->array!=set->staticBuffer) { -// uprv_free(set->array); -// } -// set->array=newArray; -// } -// -// if(iarray+i+more, set->array+i, (length-i)*4); -// } -// set->array[i]=c; -// if(more==2) { -// set->array[i+1]=c+1; -// } -// set->length+=more; -// -// return TRUE; -// } -// -// U_CAPI UBool U_EXPORT2 -// uset_add(USet* set, UChar32 c) { -// return addRemove(set, c, 0); -// } -// -// U_CAPI void U_EXPORT2 -// uset_remove(USet* set, UChar32 c) { -// addRemove(set, c, 1); -// } diff --git a/deps/node/deps/icu-small/source/common/uset_imp.h b/deps/node/deps/icu-small/source/common/uset_imp.h deleted file mode 100644 index 5f4a3113..00000000 --- a/deps/node/deps/icu-small/source/common/uset_imp.h +++ /dev/null @@ -1,61 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2004-2007, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset_imp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004sep07 -* created by: Markus W. Scherer -* -* Internal USet definitions. -*/ - -#ifndef __USET_IMP_H__ -#define __USET_IMP_H__ - -#include "unicode/utypes.h" -#include "unicode/uset.h" - -U_CDECL_BEGIN - -typedef void U_CALLCONV -USetAdd(USet *set, UChar32 c); - -typedef void U_CALLCONV -USetAddRange(USet *set, UChar32 start, UChar32 end); - -typedef void U_CALLCONV -USetAddString(USet *set, const UChar *str, int32_t length); - -typedef void U_CALLCONV -USetRemove(USet *set, UChar32 c); - -typedef void U_CALLCONV -USetRemoveRange(USet *set, UChar32 start, UChar32 end); - -/** - * Interface for adding items to a USet, to keep low-level code from - * statically depending on the USet implementation. - * Calls will look like sa->add(sa->set, c); - */ -struct USetAdder { - USet *set; - USetAdd *add; - USetAddRange *addRange; - USetAddString *addString; - USetRemove *remove; - USetRemoveRange *removeRange; -}; -typedef struct USetAdder USetAdder; - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/uset_props.cpp b/deps/node/deps/icu-small/source/common/uset_props.cpp deleted file mode 100644 index b68175c1..00000000 --- a/deps/node/deps/icu-small/source/common/uset_props.cpp +++ /dev/null @@ -1,143 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset_props.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004aug30 -* created by: Markus W. Scherer -* -* C wrappers around UnicodeSet functions that are implemented in -* uniset_props.cpp, split off for modularization. -*/ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/uset.h" -#include "unicode/uniset.h" -#include "cmemory.h" -#include "unicode/ustring.h" -#include "unicode/parsepos.h" - -U_NAMESPACE_USE - -U_CAPI USet* U_EXPORT2 -uset_openPattern(const UChar* pattern, int32_t patternLength, - UErrorCode* ec) -{ - UnicodeString pat(patternLength==-1, pattern, patternLength); - UnicodeSet* set = new UnicodeSet(pat, *ec); - /* test for NULL */ - if(set == 0) { - *ec = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - if (U_FAILURE(*ec)) { - delete set; - set = NULL; - } - return (USet*) set; -} - -U_CAPI USet* U_EXPORT2 -uset_openPatternOptions(const UChar* pattern, int32_t patternLength, - uint32_t options, - UErrorCode* ec) -{ - UnicodeString pat(patternLength==-1, pattern, patternLength); - UnicodeSet* set = new UnicodeSet(pat, options, NULL, *ec); - /* test for NULL */ - if(set == 0) { - *ec = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - if (U_FAILURE(*ec)) { - delete set; - set = NULL; - } - return (USet*) set; -} - - -U_CAPI int32_t U_EXPORT2 -uset_applyPattern(USet *set, - const UChar *pattern, int32_t patternLength, - uint32_t options, - UErrorCode *status){ - - // status code needs to be checked since we - // dereference it - if(status == NULL || U_FAILURE(*status)){ - return 0; - } - - // check only the set paramenter - // if pattern is NULL or null terminate - // UnicodeString constructor takes care of it - if(set == NULL){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - UnicodeString pat(pattern, patternLength); - - ParsePosition pos; - - ((UnicodeSet*) set)->applyPattern(pat, pos, options, NULL, *status); - - return pos.getIndex(); -} - -U_CAPI void U_EXPORT2 -uset_applyIntPropertyValue(USet* set, - UProperty prop, int32_t value, UErrorCode* ec) { - ((UnicodeSet*) set)->applyIntPropertyValue(prop, value, *ec); -} - -U_CAPI void U_EXPORT2 -uset_applyPropertyAlias(USet* set, - const UChar *prop, int32_t propLength, - const UChar *value, int32_t valueLength, - UErrorCode* ec) { - - UnicodeString p(prop, propLength); - UnicodeString v(value, valueLength); - - ((UnicodeSet*) set)->applyPropertyAlias(p, v, *ec); -} - -U_CAPI UBool U_EXPORT2 -uset_resemblesPattern(const UChar *pattern, int32_t patternLength, - int32_t pos) { - - UnicodeString pat(pattern, patternLength); - - return ((pos+1) < pat.length() && - pat.charAt(pos) == (UChar)91/*[*/) || - UnicodeSet::resemblesPattern(pat, pos); -} - -U_CAPI int32_t U_EXPORT2 -uset_toPattern(const USet* set, - UChar* result, int32_t resultCapacity, - UBool escapeUnprintable, - UErrorCode* ec) { - UnicodeString pat; - ((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable); - return pat.extract(result, resultCapacity, *ec); -} - -U_CAPI void U_EXPORT2 -uset_closeOver(USet* set, int32_t attributes) { - ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes); -} diff --git a/deps/node/deps/icu-small/source/common/usetiter.cpp b/deps/node/deps/icu-small/source/common/usetiter.cpp deleted file mode 100644 index 5d5d3c4e..00000000 --- a/deps/node/deps/icu-small/source/common/usetiter.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2006, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ -#include "unicode/usetiter.h" -#include "unicode/uniset.h" -#include "unicode/unistr.h" -#include "uvector.h" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator) - -/** - * Create an iterator - * @param set set to iterate over - */ -UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { - cpString = NULL; - reset(uSet); -} - -/** - * Create an iterator. Convenience for when the contents are to be set later. - */ -UnicodeSetIterator::UnicodeSetIterator() { - this->set = NULL; - cpString = NULL; - reset(); -} - -UnicodeSetIterator::~UnicodeSetIterator() { - delete cpString; -} - -/** - * Returns the next element in the set. - * @return true if there was another element in the set. - * if so, if codepoint == IS_STRING, the value is a string in the string field - * else the value is a single code point in the codepoint field. - *
You are guaranteed that the codepoints are in sorted order, and the strings are in sorted order, - * and that all code points are returned before any strings are returned. - *
Note also that the codepointEnd is undefined after calling this method. - */ -UBool UnicodeSetIterator::next() { - if (nextElement <= endElement) { - codepoint = codepointEnd = nextElement++; - string = NULL; - return TRUE; - } - if (range < endRange) { - loadRange(++range); - codepoint = codepointEnd = nextElement++; - string = NULL; - return TRUE; - } - - if (nextString >= stringCount) return FALSE; - codepoint = (UChar32)IS_STRING; // signal that value is actually a string - string = (const UnicodeString*) set->strings->elementAt(nextString++); - return TRUE; -} - -/** - * @return true if there was another element in the set. - * if so, if codepoint == IS_STRING, the value is a string in the string field - * else the value is a range of codepoints in the fields. - *
Note that the codepoints are in sorted order, and the strings are in sorted order, - * and that all code points are returned before any strings are returned. - *
You are guaranteed that the ranges are in sorted order, and the strings are in sorted order, - * and that all ranges are returned before any strings are returned. - *
You are also guaranteed that ranges are disjoint and non-contiguous. - *
Note also that the codepointEnd is undefined after calling this method. - */ -UBool UnicodeSetIterator::nextRange() { - string = NULL; - if (nextElement <= endElement) { - codepointEnd = endElement; - codepoint = nextElement; - nextElement = endElement+1; - return TRUE; - } - if (range < endRange) { - loadRange(++range); - codepointEnd = endElement; - codepoint = nextElement; - nextElement = endElement+1; - return TRUE; - } - - if (nextString >= stringCount) return FALSE; - codepoint = (UChar32)IS_STRING; // signal that value is actually a string - string = (const UnicodeString*) set->strings->elementAt(nextString++); - return TRUE; -} - -/** - *@param set the set to iterate over. This allows reuse of the iterator. - */ -void UnicodeSetIterator::reset(const UnicodeSet& uSet) { - this->set = &uSet; - reset(); -} - -/** - * Resets to the start, to allow the iteration to start over again. - */ -void UnicodeSetIterator::reset() { - if (set == NULL) { - // Set up indices to empty iteration - endRange = -1; - stringCount = 0; - } else { - endRange = set->getRangeCount() - 1; - stringCount = set->strings->size(); - } - range = 0; - endElement = -1; - nextElement = 0; - if (endRange >= 0) { - loadRange(range); - } - nextString = 0; - string = NULL; -} - -void UnicodeSetIterator::loadRange(int32_t iRange) { - nextElement = set->getRangeStart(iRange); - endElement = set->getRangeEnd(iRange); -} - - -const UnicodeString& UnicodeSetIterator::getString() { - if (string==NULL && codepoint!=(UChar32)IS_STRING) { - if (cpString == NULL) { - cpString = new UnicodeString(); - } - if (cpString != NULL) { - cpString->setTo((UChar32)codepoint); - } - string = cpString; - } - return *string; -} - -U_NAMESPACE_END - -//eof diff --git a/deps/node/deps/icu-small/source/common/ushape.cpp b/deps/node/deps/icu-small/source/common/ushape.cpp deleted file mode 100644 index 792de50b..00000000 --- a/deps/node/deps/icu-small/source/common/ushape.cpp +++ /dev/null @@ -1,1728 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ****************************************************************************** - * - * Copyright (C) 2000-2016, International Business Machines - * Corporation and others. All Rights Reserved. - * - ****************************************************************************** - * file name: ushape.cpp - * encoding: UTF-8 - * tab size: 8 (not used) - * indentation:4 - * - * created on: 2000jun29 - * created by: Markus W. Scherer - * - * Arabic letter shaping implemented by Ayman Roshdy - */ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/ustring.h" -#include "unicode/ushape.h" -#include "cmemory.h" -#include "putilimp.h" -#include "ustr_imp.h" -#include "ubidi_props.h" -#include "uassert.h" - -/* - * This implementation is designed for 16-bit Unicode strings. - * The main assumption is that the Arabic characters and their - * presentation forms each fit into a single UChar. - * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII - * characters. - */ - -/* - * ### TODO in general for letter shaping: - * - the letter shaping code is UTF-16-unaware; needs update - * + especially invertBuffer()?! - * - needs to handle the "Arabic Tail" that is used in some legacy codepages - * as a glyph fragment of wide-glyph letters - * + IBM Unicode conversion tables map it to U+200B (ZWSP) - * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms - * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT - */ - -/* definitions for Arabic letter shaping ------------------------------------ */ - -#define IRRELEVANT 4 -#define LAMTYPE 16 -#define ALEFTYPE 32 -#define LINKR 1 -#define LINKL 2 -#define APRESENT 8 -#define SHADDA 64 -#define CSHADDA 128 -#define COMBINE (SHADDA+CSHADDA) - -#define HAMZAFE_CHAR 0xfe80 -#define HAMZA06_CHAR 0x0621 -#define YEH_HAMZA_CHAR 0x0626 -#define YEH_HAMZAFE_CHAR 0xFE89 -#define LAMALEF_SPACE_SUB 0xFFFF -#define TASHKEEL_SPACE_SUB 0xFFFE -#define NEW_TAIL_CHAR 0xFE73 -#define OLD_TAIL_CHAR 0x200B -#define LAM_CHAR 0x0644 -#define SPACE_CHAR 0x0020 -#define SHADDA_CHAR 0xFE7C -#define TATWEEL_CHAR 0x0640 -#define SHADDA_TATWEEL_CHAR 0xFE7D -#define SHADDA06_CHAR 0x0651 - -#define SHAPE_MODE 0 -#define DESHAPE_MODE 1 - -struct uShapeVariables { - UChar tailChar; - uint32_t uShapeLamalefBegin; - uint32_t uShapeLamalefEnd; - uint32_t uShapeTashkeelBegin; - uint32_t uShapeTashkeelEnd; - int spacesRelativeToTextBeginEnd; -}; - -static const uint8_t tailFamilyIsolatedFinal[] = { - /* FEB1 */ 1, - /* FEB2 */ 1, - /* FEB3 */ 0, - /* FEB4 */ 0, - /* FEB5 */ 1, - /* FEB6 */ 1, - /* FEB7 */ 0, - /* FEB8 */ 0, - /* FEB9 */ 1, - /* FEBA */ 1, - /* FEBB */ 0, - /* FEBC */ 0, - /* FEBD */ 1, - /* FEBE */ 1 -}; - -static const uint8_t tashkeelMedial[] = { - /* FE70 */ 0, - /* FE71 */ 1, - /* FE72 */ 0, - /* FE73 */ 0, - /* FE74 */ 0, - /* FE75 */ 0, - /* FE76 */ 0, - /* FE77 */ 1, - /* FE78 */ 0, - /* FE79 */ 1, - /* FE7A */ 0, - /* FE7B */ 1, - /* FE7C */ 0, - /* FE7D */ 1, - /* FE7E */ 0, - /* FE7F */ 1 -}; - -static const UChar yehHamzaToYeh[] = -{ -/* isolated*/ 0xFEEF, -/* final */ 0xFEF0 -}; - -static const uint8_t IrrelevantPos[] = { - 0x0, 0x2, 0x4, 0x6, - 0x8, 0xA, 0xC, 0xE -}; - - -static const UChar convertLamAlef[] = -{ -/*FEF5*/ 0x0622, -/*FEF6*/ 0x0622, -/*FEF7*/ 0x0623, -/*FEF8*/ 0x0623, -/*FEF9*/ 0x0625, -/*FEFA*/ 0x0625, -/*FEFB*/ 0x0627, -/*FEFC*/ 0x0627 -}; - -static const UChar araLink[178]= -{ - 1 + 32 + 256 * 0x11,/*0x0622*/ - 1 + 32 + 256 * 0x13,/*0x0623*/ - 1 + 256 * 0x15,/*0x0624*/ - 1 + 32 + 256 * 0x17,/*0x0625*/ - 1 + 2 + 256 * 0x19,/*0x0626*/ - 1 + 32 + 256 * 0x1D,/*0x0627*/ - 1 + 2 + 256 * 0x1F,/*0x0628*/ - 1 + 256 * 0x23,/*0x0629*/ - 1 + 2 + 256 * 0x25,/*0x062A*/ - 1 + 2 + 256 * 0x29,/*0x062B*/ - 1 + 2 + 256 * 0x2D,/*0x062C*/ - 1 + 2 + 256 * 0x31,/*0x062D*/ - 1 + 2 + 256 * 0x35,/*0x062E*/ - 1 + 256 * 0x39,/*0x062F*/ - 1 + 256 * 0x3B,/*0x0630*/ - 1 + 256 * 0x3D,/*0x0631*/ - 1 + 256 * 0x3F,/*0x0632*/ - 1 + 2 + 256 * 0x41,/*0x0633*/ - 1 + 2 + 256 * 0x45,/*0x0634*/ - 1 + 2 + 256 * 0x49,/*0x0635*/ - 1 + 2 + 256 * 0x4D,/*0x0636*/ - 1 + 2 + 256 * 0x51,/*0x0637*/ - 1 + 2 + 256 * 0x55,/*0x0638*/ - 1 + 2 + 256 * 0x59,/*0x0639*/ - 1 + 2 + 256 * 0x5D,/*0x063A*/ - 0, 0, 0, 0, 0, /*0x063B-0x063F*/ - 1 + 2, /*0x0640*/ - 1 + 2 + 256 * 0x61,/*0x0641*/ - 1 + 2 + 256 * 0x65,/*0x0642*/ - 1 + 2 + 256 * 0x69,/*0x0643*/ - 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ - 1 + 2 + 256 * 0x71,/*0x0645*/ - 1 + 2 + 256 * 0x75,/*0x0646*/ - 1 + 2 + 256 * 0x79,/*0x0647*/ - 1 + 256 * 0x7D,/*0x0648*/ - 1 + 256 * 0x7F,/*0x0649*/ - 1 + 2 + 256 * 0x81,/*0x064A*/ - 4 + 256 * 1, /*0x064B*/ - 4 + 128 + 256 * 1, /*0x064C*/ - 4 + 128 + 256 * 1, /*0x064D*/ - 4 + 128 + 256 * 1, /*0x064E*/ - 4 + 128 + 256 * 1, /*0x064F*/ - 4 + 128 + 256 * 1, /*0x0650*/ - 4 + 64 + 256 * 3, /*0x0651*/ - 4 + 256 * 1, /*0x0652*/ - 4 + 256 * 7, /*0x0653*/ - 4 + 256 * 8, /*0x0654*/ - 4 + 256 * 8, /*0x0655*/ - 4 + 256 * 1, /*0x0656*/ - 0, 0, 0, 0, 0, /*0x0657-0x065B*/ - 1 + 256 * 0x85,/*0x065C*/ - 1 + 256 * 0x87,/*0x065D*/ - 1 + 256 * 0x89,/*0x065E*/ - 1 + 256 * 0x8B,/*0x065F*/ - 0, 0, 0, 0, 0, /*0x0660-0x0664*/ - 0, 0, 0, 0, 0, /*0x0665-0x0669*/ - 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ - 4 + 256 * 6, /*0x0670*/ - 1 + 8 + 256 * 0x00,/*0x0671*/ - 1 + 32, /*0x0672*/ - 1 + 32, /*0x0673*/ - 0, /*0x0674*/ - 1 + 32, /*0x0675*/ - 1, 1, /*0x0676-0x0677*/ - 1 + 2, /*0x0678*/ - 1 + 2 + 8 + 256 * 0x16,/*0x0679*/ - 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/ - 1 + 2 + 8 + 256 * 0x02,/*0x067B*/ - 1+2, 1+2, /*0x67C-0x067D*/ - 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ - 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ - 1 + 8 + 256 * 0x38,/*0x0688*/ - 1, 1, 1, /*0x0689-0x068B*/ - 1 + 8 + 256 * 0x34,/*0x068C*/ - 1 + 8 + 256 * 0x32,/*0x068D*/ - 1 + 8 + 256 * 0x36,/*0x068E*/ - 1, 1, /*0x068F-0x0690*/ - 1 + 8 + 256 * 0x3C,/*0x0691*/ - 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ - 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ - 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ - 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ - 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ - 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ - 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ - 1+2, 1+2, /*0x06B8-0x06B9*/ - 1 + 8 + 256 * 0x4E,/*0x06BA*/ - 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/ - 1+2, 1+2, /*0x06BC-0x06BD*/ - 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/ - 1+2, /*0x06BF*/ - 1 + 8 + 256 * 0x54,/*0x06C0*/ - 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/ - 1, 1, 1, /*0x06C2-0x06C4*/ - 1 + 8 + 256 * 0x90,/*0x06C5*/ - 1 + 8 + 256 * 0x89,/*0x06C6*/ - 1 + 8 + 256 * 0x87,/*0x06C7*/ - 1 + 8 + 256 * 0x8B,/*0x06C8*/ - 1 + 8 + 256 * 0x92,/*0x06C9*/ - 1, /*0x06CA*/ - 1 + 8 + 256 * 0x8E,/*0x06CB*/ - 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/ - 1, /*0x06CD*/ - 1+2, 1+2, /*0x06CE-0x06CF*/ - 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/ - 1+2, /*0x06D1*/ - 1 + 8 + 256 * 0x5E,/*0x06D2*/ - 1 + 8 + 256 * 0x60 /*0x06D3*/ -}; - -static const uint8_t presALink[] = { -/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ -/*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, -/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, -/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, -/*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, -/*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, -/*FC6*/ 4, 4, 4 -}; - -static const uint8_t presBLink[]= -{ -/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ -/*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2, -/*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0, -/*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, -/*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, -/*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, -/*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, -/*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, -/*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, -/*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0 -}; - -static const UChar convertFBto06[] = -{ -/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ -/*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0x67A, 0x67A, -/*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679, 0, 0, 0, 0, 0, 0, -/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0, -/*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9, -/*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0x6BA, 0x6BA, -/*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2, -/*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB, -/*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0, 0, 0, 0, 0, 0, 0, -/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC -}; - -static const UChar convertFEto06[] = -{ -/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ -/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, -/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, -/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, -/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, -/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, -/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, -/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, -/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, -/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F -}; - -static const uint8_t shapeTable[4][4][4]= -{ - { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, - { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, - { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, - { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } -}; - -/* - * This function shapes European digits to Arabic-Indic digits - * in-place, writing over the input characters. - * Since we know that we are only looking for BMP code points, - * we can safely just work with code units (again, at least UTF-16). - */ -static void -_shapeToArabicDigitsWithContext(UChar *s, int32_t length, - UChar digitBase, - UBool isLogical, UBool lastStrongWasAL) { - int32_t i; - UChar c; - - digitBase-=0x30; - - /* the iteration direction depends on the type of input */ - if(isLogical) { - for(i=0; i0; /* pre-decrement in the body */) { - c=s[--i]; - switch(ubidi_getClass(c)) { - case U_LEFT_TO_RIGHT: /* L */ - case U_RIGHT_TO_LEFT: /* R */ - lastStrongWasAL=FALSE; - break; - case U_RIGHT_TO_LEFT_ARABIC: /* AL */ - lastStrongWasAL=TRUE; - break; - case U_EUROPEAN_NUMBER: /* EN */ - if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { - s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ - } - break; - default : - break; - } - } - } -} - -/* - *Name : invertBuffer - *Function : This function inverts the buffer, it's used - * in case the user specifies the buffer to be - * U_SHAPE_TEXT_DIRECTION_LOGICAL - */ -static void -invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) { - UChar temp; - int32_t i=0,j=0; - for(i=lowlimit,j=size-highlimit-1;i= 0x0622 && ch <= 0x06D3) { - return(araLink[ch-0x0622]); - } else if(ch == 0x200D) { - return(3); - } else if(ch >= 0x206D && ch <= 0x206F) { - return(4); - }else if(ch >= 0xFB50 && ch <= 0xFC62) { - return(presALink[ch-0xFB50]); - } else if(ch >= 0xFE70 && ch <= 0xFEFC) { - return(presBLink[ch-0xFE70]); - }else { - return(0); - } -} - -/* - *Name : countSpaces - *Function : Counts the number of spaces - * at each end of the logical buffer - */ -static void -countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) { - int32_t i = 0; - int32_t countl = 0,countr = 0; - while((dest[i] == SPACE_CHAR) && (countl < size)) { - countl++; - i++; - } - if (countl < size) { /* the entire buffer is not all space */ - while(dest[size-1] == SPACE_CHAR) { - countr++; - size--; - } - } - *spacesCountl = countl; - *spacesCountr = countr; -} - -/* - *Name : isTashkeelChar - *Function : Returns 1 for Tashkeel characters in 06 range else return 0 - */ -static inline int32_t -isTashkeelChar(UChar ch) { - return (int32_t)( ch>=0x064B && ch<= 0x0652 ); -} - -/* - *Name : isTashkeelCharFE - *Function : Returns 1 for Tashkeel characters in FE range else return 0 - */ -static inline int32_t -isTashkeelCharFE(UChar ch) { - return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F ); -} - -/* - *Name : isAlefChar - *Function : Returns 1 for Alef characters else return 0 - */ -static inline int32_t -isAlefChar(UChar ch) { - return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) ); -} - -/* - *Name : isLamAlefChar - *Function : Returns 1 for LamAlef characters else return 0 - */ -static inline int32_t -isLamAlefChar(UChar ch) { - return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) ); -} - -/*BIDI - *Name : isTailChar - *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0 - */ - -static inline int32_t -isTailChar(UChar ch) { - if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){ - return 1; - }else{ - return 0; - } -} - -/*BIDI - *Name : isSeenTailFamilyChar - *Function : returns 1 if the character is a seen family isolated character - * in the FE range otherwise returns 0 - */ - -static inline int32_t -isSeenTailFamilyChar(UChar ch) { - if(ch >= 0xfeb1 && ch < 0xfebf){ - return tailFamilyIsolatedFinal [ch - 0xFEB1]; - }else{ - return 0; - } -} - - /* Name : isSeenFamilyChar - * Function : returns 1 if the character is a seen family character in the Unicode - * 06 range otherwise returns 0 - */ - -static inline int32_t -isSeenFamilyChar(UChar ch){ - if(ch >= 0x633 && ch <= 0x636){ - return 1; - }else { - return 0; - } -} - -/*Start of BIDI*/ -/* - *Name : isAlefMaksouraChar - *Function : returns 1 if the character is a Alef Maksoura Final or isolated - * otherwise returns 0 - */ -static inline int32_t -isAlefMaksouraChar(UChar ch) { - return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); -} - -/* - * Name : isYehHamzaChar - * Function : returns 1 if the character is a yehHamza isolated or yehhamza - * final is found otherwise returns 0 - */ -static inline int32_t -isYehHamzaChar(UChar ch) { - if((ch==0xFE89)||(ch==0xFE8A)){ - return 1; - }else{ - return 0; - } -} - - /* - * Name: isTashkeelOnTatweelChar - * Function: Checks if the Tashkeel Character is on Tatweel or not,if the - * Tashkeel on tatweel (FE range), it returns 1 else if the - * Tashkeel with shadda on tatweel (FC range)return 2 otherwise - * returns 0 - */ -static inline int32_t -isTashkeelOnTatweelChar(UChar ch){ - if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR) - { - return tashkeelMedial [ch - 0xFE70]; - }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) { - return 2; - }else{ - return 0; - } -} - -/* - * Name: isIsolatedTashkeelChar - * Function: Checks if the Tashkeel Character is in the isolated form - * (i.e. Unicode FE range) returns 1 else if the Tashkeel - * with shadda is in the isolated form (i.e. Unicode FC range) - * returns 2 otherwise returns 0 - */ -static inline int32_t -isIsolatedTashkeelChar(UChar ch){ - if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){ - return (1 - tashkeelMedial [ch - 0xFE70]); - }else if(ch >= 0xfc5e && ch <= 0xfc63){ - return 1; - }else{ - return 0; - } -} - - - - -/* - *Name : calculateSize - *Function : This function calculates the destSize to be used in preflighting - * when the destSize is equal to 0 - * It is used also to calculate the new destsize in case the - * destination buffer will be resized. - */ - -static int32_t -calculateSize(const UChar *source, int32_t sourceLength, -int32_t destSize,uint32_t options) { - int32_t i = 0; - - int lamAlefOption = 0; - int tashkeelOption = 0; - - destSize = sourceLength; - - if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE || - ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) && - ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){ - lamAlefOption = 1; - } - if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE && - ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){ - tashkeelOption = 1; - } - - if(lamAlefOption || tashkeelOption){ - if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { - for(i=0;i= 0) { - tempbuffer[i] = 0x0000; - i--; - count--; - } - - u_memcpy(dest, tempbuffer, sourceLength); - destSize = u_strlen(dest); - } - - lamAlefOption = 0; - - if (shapingMode == 0){ - if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){ - lamAlefOption = 1; - } - } - - if (lamAlefOption){ - /* Lam+Alef is already shaped into LamAlef + FFFF */ - i = 0; - while(i < sourceLength) { - if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){ - dest[i] = SPACE_CHAR; - } - i++; - } - destSize = sourceLength; - } - lamAlefOption = 0; - tashkeelOption = 0; - - if (shapingMode == 0) { - if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) || - (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) - && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) { - lamAlefOption = 1; - } - if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) { - tashkeelOption = 1; - } - } - - if(lamAlefOption || tashkeelOption){ - uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); - - i = j = sourceLength; count = 0; - - while(i >= 0) { - if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || - (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ - j++; - count++; - }else { - tempbuffer[j] = dest[i]; - } - i--; - j--; - } - - for(i=0 ;i < count; i++){ - tempbuffer[i] = SPACE_CHAR; - } - - u_memcpy(dest, tempbuffer, sourceLength); - destSize = sourceLength; - } - - - - lamAlefOption = 0; - tashkeelOption = 0; - - if (shapingMode == 0) { - if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) || - (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) - && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) { - lamAlefOption = 1; - } - if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){ - tashkeelOption = 1; - } - } - - if(lamAlefOption || tashkeelOption){ - uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); - - i = j = 0; count = 0; - while(i < sourceLength) { - if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || - (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ - j--; - count++; - }else { - tempbuffer[j] = dest[i]; - } - i++; - j++; - } - - while(count >= 0) { - tempbuffer[i] = SPACE_CHAR; - i--; - count--; - } - - u_memcpy(dest, tempbuffer, sourceLength); - destSize = sourceLength; - } - - - if(tempbuffer){ - uprv_free(tempbuffer); - } - - return destSize; -} - -/* - *Name :expandCompositCharAtBegin - *Function :Expands the LamAlef character to Lam and Alef consuming the required - * space from beginning of the buffer. If the text type was visual_LTR - * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected - * the spaces will be located at end of buffer. - * If there are no spaces to expand the LamAlef, an error - * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h - */ - -static int32_t -expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { - int32_t i = 0,j = 0; - int32_t countl = 0; - UChar *tempbuffer=NULL; - - tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); - - /* Test for NULL */ - if(tempbuffer == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); - - i = 0; - while(dest[i] == SPACE_CHAR) { - countl++; - i++; - } - - i = j = sourceLength-1; - - while(i >= 0 && j >= 0) { - if( countl>0 && isLamAlefChar(dest[i])) { - tempbuffer[j] = LAM_CHAR; - /* to ensure the array index is within the range */ - U_ASSERT(dest[i] >= 0xFEF5u - && dest[i]-0xFEF5u < UPRV_LENGTHOF(convertLamAlef)); - tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; - j--; - countl--; - }else { - if( countl == 0 && isLamAlefChar(dest[i]) ) { - *pErrorCode=U_NO_SPACE_AVAILABLE; - } - tempbuffer[j] = dest[i]; - } - i--; - j--; - } - u_memcpy(dest, tempbuffer, sourceLength); - - uprv_free(tempbuffer); - - destSize = sourceLength; - return destSize; -} - -/* - *Name : expandCompositCharAtEnd - *Function : Expands the LamAlef character to Lam and Alef consuming the - * required space from end of the buffer. If the text type was - * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END - * was used, the spaces will be consumed from begin of buffer. If - * there are no spaces to expand the LamAlef, an error - * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h - */ - -static int32_t -expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { - int32_t i = 0,j = 0; - - int32_t countr = 0; - int32_t inpsize = sourceLength; - - UChar *tempbuffer=NULL; - tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); - - /* Test for NULL */ - if(tempbuffer == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); - - while(dest[inpsize-1] == SPACE_CHAR) { - countr++; - inpsize--; - } - - i = sourceLength - countr - 1; - j = sourceLength - 1; - - while(i >= 0 && j >= 0) { - if( countr>0 && isLamAlefChar(dest[i]) ) { - tempbuffer[j] = LAM_CHAR; - tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; - j--; - countr--; - }else { - if ((countr == 0) && isLamAlefChar(dest[i]) ) { - *pErrorCode=U_NO_SPACE_AVAILABLE; - } - tempbuffer[j] = dest[i]; - } - i--; - j--; - } - - if(countr > 0) { - u_memmove(tempbuffer, tempbuffer+countr, sourceLength); - if(u_strlen(tempbuffer) < sourceLength) { - for(i=sourceLength-1;i>=sourceLength-countr;i--) { - tempbuffer[i] = SPACE_CHAR; - } - } - } - u_memcpy(dest, tempbuffer, sourceLength); - - uprv_free(tempbuffer); - - destSize = sourceLength; - return destSize; -} - -/* - *Name : expandCompositCharAtNear - *Function : Expands the LamAlef character into Lam + Alef, YehHamza character - * into Yeh + Hamza, SeenFamily character into SeenFamily character - * + Tail, while consuming the space next to the character. - * If there are no spaces next to the character, an error - * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h - */ - -static int32_t -expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode, - int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) { - int32_t i = 0; - - - UChar lamalefChar, yehhamzaChar; - - for(i = 0 ;i<=sourceLength-1;i++) { - if (seenTailOption && isSeenTailFamilyChar(dest[i])) { - if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { - dest[i-1] = shapeVars.tailChar; - }else { - *pErrorCode=U_NO_SPACE_AVAILABLE; - } - }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) { - if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { - yehhamzaChar = dest[i]; - dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR]; - dest[i-1] = HAMZAFE_CHAR; - }else { - - *pErrorCode=U_NO_SPACE_AVAILABLE; - } - }else if(lamAlefOption && isLamAlefChar(dest[i+1])) { - if(dest[i] == SPACE_CHAR){ - lamalefChar = dest[i+1]; - dest[i+1] = LAM_CHAR; - dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ]; - }else { - *pErrorCode=U_NO_SPACE_AVAILABLE; - } - } - } - destSize = sourceLength; - return destSize; -} - /* - * Name : expandCompositChar - * Function : LamAlef, need special handling, since it expands from one - * character into two characters while shaping or deshaping. - * In order to expand it, near or far spaces according to the - * options user specifies. Also buffer size can be increased. - * - * For SeenFamily characters and YehHamza only the near option is - * supported, while for LamAlef we can take spaces from begin, end, - * near or even increase the buffer size. - * There is also the Auto option for LamAlef only, which will first - * search for a space at end, begin then near, respectively. - * If there are no spaces to expand these characters, an error will be set to - * U_NO_SPACE_AVAILABLE as defined in utypes.h - */ - -static int32_t -expandCompositChar(UChar *dest, int32_t sourceLength, - int32_t destSize,uint32_t options, - UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) { - - int32_t i = 0,j = 0; - - UChar *tempbuffer=NULL; - int yehHamzaOption = 0; - int seenTailOption = 0; - int lamAlefOption = 0; - - if (shapingMode == 1){ - if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){ - - if(shapeVars.spacesRelativeToTextBeginEnd == 0) { - destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); - - if(*pErrorCode == U_NO_SPACE_AVAILABLE) { - *pErrorCode = U_ZERO_ERROR; - destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); - } - }else { - destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); - - if(*pErrorCode == U_NO_SPACE_AVAILABLE) { - *pErrorCode = U_ZERO_ERROR; - destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); - } - } - - if(*pErrorCode == U_NO_SPACE_AVAILABLE) { - *pErrorCode = U_ZERO_ERROR; - destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, - seenTailOption, 1,shapeVars); - } - } - } - - if (shapingMode == 1){ - if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){ - destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); - } - } - - if (shapingMode == 1){ - if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){ - destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); - } - } - - if (shapingMode == 0){ - if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){ - yehHamzaOption = 1; - } - if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){ - seenTailOption = 1; - } - } - if (shapingMode == 1) { - if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) { - lamAlefOption = 1; - } - } - - - if (yehHamzaOption || seenTailOption || lamAlefOption){ - destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, - seenTailOption,lamAlefOption,shapeVars); - } - - - if (shapingMode == 1){ - if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ - destSize = calculateSize(dest,sourceLength,destSize,options); - tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR); - - /* Test for NULL */ - if(tempbuffer == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR); - - i = j = 0; - while(i < destSize && j < destSize) { - if(isLamAlefChar(dest[i]) ) { - tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ]; - tempbuffer[j+1] = LAM_CHAR; - j++; - }else { - tempbuffer[j] = dest[i]; - } - i++; - j++; - } - - u_memcpy(dest, tempbuffer, destSize); - } - } - - if(tempbuffer) { - uprv_free(tempbuffer); - } - return destSize; -} - -/* - *Name : shapeUnicode - *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped - * arabic Unicode buffer in FExx Range - */ -static int32_t -shapeUnicode(UChar *dest, int32_t sourceLength, - int32_t destSize,uint32_t options, - UErrorCode *pErrorCode, - int tashkeelFlag, struct uShapeVariables shapeVars) { - - int32_t i, iend; - int32_t step; - int32_t lastPos,Nx, Nw; - unsigned int Shape; - int32_t lamalef_found = 0; - int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0; - UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; - UChar wLamalef; - - /* - * Converts the input buffer from FExx Range into 06xx Range - * to make sure that all characters are in the 06xx range - * even the lamalef is converted to the special region in - * the 06xx range - */ - if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) { - for (i = 0; i < sourceLength; i++) { - UChar inputChar = dest[i]; - if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { - UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; - if (c != 0) - dest[i] = c; - } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { - dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; - } else { - dest[i] = inputChar ; - } - } - } - - - /* sets the index to the end of the buffer, together with the step point to -1 */ - i = sourceLength - 1; - iend = -1; - step = -1; - - /* - * This function resolves the link between the characters . - * Arabic characters have four forms : - * Isolated Form, Initial Form, Middle Form and Final Form - */ - currLink = getLink(dest[i]); - - lastPos = i; - Nx = -2, Nw = 0; - - while (i != iend) { - /* If high byte of currLink > 0 then more than one shape */ - if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) { - Nw = i + step; - while (Nx < 0) { /* we need to know about next char */ - if(Nw == iend) { - nextLink = 0; - Nx = 3000; - } else { - nextLink = getLink(dest[Nw]); - if((nextLink & IRRELEVANT) == 0) { - Nx = Nw; - } else { - Nw = Nw + step; - } - } - } - - if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { - lamalef_found = 1; - wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ - if ( wLamalef != 0) { - dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */ - dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */ - i=lastPos; /* unicode private use area, this is done to make */ - } /* sure that removeLamAlefSpaces() handles only the */ - lastLink = prevLink; /* spaces generated during lamalef generation. */ - currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */ - } /* in removeLamAlefSpaces() */ - - if ((i > 0) && (dest[i-1] == SPACE_CHAR)){ - if ( isSeenFamilyChar(dest[i])) { - seenfamFound = 1; - } else if (dest[i] == YEH_HAMZA_CHAR) { - yehhamzaFound = 1; - } - } - else if(i==0){ - if ( isSeenFamilyChar(dest[i])){ - seenfamFound = 1; - } else if (dest[i] == YEH_HAMZA_CHAR) { - yehhamzaFound = 1; - } - } - - /* - * get the proper shape according to link ability of neighbors - * and of character; depends on the order of the shapes - * (isolated, initial, middle, final) in the compatibility area - */ - Shape = shapeTable[nextLink & (LINKR + LINKL)] - [lastLink & (LINKR + LINKL)] - [currLink & (LINKR + LINKL)]; - - if ((currLink & (LINKR+LINKL)) == 1) { - Shape &= 1; - } else if(isTashkeelChar(dest[i])) { - if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && - dest[i] != 0x064C && dest[i] != 0x064D ) - { - Shape = 1; - if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) { - Shape = 0; - } - } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){ - Shape = 1; - } else { - Shape = 0; - } - } - if ((dest[i] ^ 0x0600) < 0x100) { - if ( isTashkeelChar(dest[i]) ){ - if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){ - dest[i] = TASHKEEL_SPACE_SUB; - tashkeelFound = 1; - } else { - /* to ensure the array index is within the range */ - U_ASSERT(dest[i] >= 0x064Bu - && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos)); - dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + static_cast(Shape); - } - }else if ((currLink & APRESENT) > 0) { - dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); - }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) { - dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape); - } - } - } - - /* move one notch forward */ - if ((currLink & IRRELEVANT) == 0) { - prevLink = lastLink; - lastLink = currLink; - lastPos = i; - } - - i = i + step; - if (i == Nx) { - currLink = nextLink; - Nx = -2; - } else if(i != iend) { - currLink = getLink(dest[i]); - } - } - destSize = sourceLength; - if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){ - destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars); - } - - if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) { - destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars); - } - return destSize; -} - -/* - *Name : deShapeUnicode - *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped - * arabic Unicode buffer in 06xx Range - */ -static int32_t -deShapeUnicode(UChar *dest, int32_t sourceLength, - int32_t destSize,uint32_t options, - UErrorCode *pErrorCode, struct uShapeVariables shapeVars) { - int32_t i = 0; - int32_t lamalef_found = 0; - int32_t yehHamzaComposeEnabled = 0; - int32_t seenComposeEnabled = 0; - - yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; - seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0; - - /* - *This for loop changes the buffer from the Unicode FE range to - *the Unicode 06 range - */ - - for(i = 0; i < sourceLength; i++) { - UChar inputChar = dest[i]; - if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */ - UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; - if (c != 0) - dest[i] = c; - } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR)) - && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) { - dest[i] = SPACE_CHAR; - dest[i+1] = YEH_HAMZA_CHAR; - } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1)) - && (isSeenTailFamilyChar(dest[i+1])) ) { - dest[i] = SPACE_CHAR; - } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */ - dest[i] = convertFEto06 [ (inputChar - 0xFE70) ]; - } else { - dest[i] = inputChar ; - } - - if( isLamAlefChar(dest[i]) ) - lamalef_found = 1; - } - - destSize = sourceLength; - if (lamalef_found != 0){ - destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars); - } - return destSize; -} - -/* - **************************************** - * u_shapeArabic - **************************************** - */ - -U_CAPI int32_t U_EXPORT2 -u_shapeArabic(const UChar *source, int32_t sourceLength, - UChar *dest, int32_t destCapacity, - uint32_t options, - UErrorCode *pErrorCode) { - - int32_t destLength; - struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0}; - - /* usual error checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ - if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 || - (((options&U_SHAPE_TASHKEEL_MASK) > 0) && - ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) || - (((options&U_SHAPE_TASHKEEL_MASK) > 0) && - ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) || - (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED || - (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED || - ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE && - (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) || - ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL && - (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) - ) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - /* Validate lamalef options */ - if(((options&U_SHAPE_LAMALEF_MASK) > 0)&& - !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) || - ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) || - ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )|| - ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) || - ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR))) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - /* Validate Tashkeel options */ - if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&& - !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) || - ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END ) - ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )|| - ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL))) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - /* determine the source length */ - if(sourceLength==-1) { - sourceLength=u_strlen(source); - } - if(sourceLength<=0) { - return u_terminateUChars(dest, destCapacity, 0, pErrorCode); - } - - /* check that source and destination do not overlap */ - if( dest!=NULL && - ((source<=dest && dest0) { - int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL; - int32_t aggregate_tashkeel = - (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) == - (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED); - int step=logical_order?1:-1; - int j=logical_order?-1:2*sourceLength; - int i=logical_order?-1:sourceLength; - int end=logical_order?sourceLength:-1; - int aggregation_possible = 1; - UChar prev = 0; - UChar prevLink, currLink = 0; - int newSourceLength = 0; - tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR); - if(tempsource == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - while ((i+=step) != end) { - prevLink = currLink; - currLink = getLink(source[i]); - if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) { - aggregation_possible = 0; - tempsource[j] = (prevdestCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - if (tempsource != NULL) uprv_free(tempsource); - return outputSize; - } - - /* - * need a temporary buffer of size max(outputSize, sourceLength) - * because at first we copy source->temp - */ - if(sourceLength>outputSize) { - outputSize=sourceLength; - } - - /* Start of Arabic letter shaping part */ - if(outputSize<=UPRV_LENGTHOF(buffer)) { - outputSize=UPRV_LENGTHOF(buffer); - tempbuffer=buffer; - } else { - tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); - - /*Test for NULL*/ - if(tempbuffer == NULL) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - if (tempsource != NULL) uprv_free(tempsource); - return 0; - } - } - u_memcpy(tempbuffer, source, sourceLength); - if (tempsource != NULL){ - uprv_free(tempsource); - } - - if(sourceLength 0 - && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) { - /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ - destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars); - }else { - /* default Call the shaping function with tashkeel flag == 1 */ - destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars); - - /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/ - if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){ - destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode); - } - } - break; - case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : - /* Call the shaping function with tashkeel flag == 0 */ - destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars); - break; - - case U_SHAPE_LETTERS_UNSHAPE : - /* Call the deshaping function */ - destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars); - break; - default : - /* will never occur because of validity checks above */ - destLength = 0; - break; - } - - /* - * TODO: (markus 2002aug01) - * For as long as we always preflight the outputSize above - * we should U_ASSERT(outputSize==destLength) - * except for the adjustment above before the tempbuffer allocation - */ - - if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { - countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); - invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); - } - u_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)); - - if(tempbuffer!=buffer) { - uprv_free(tempbuffer); - } - - if(destLength>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - return destLength; - } - - /* End of Arabic letter shaping part */ - } else { - /* - * No letter shaping: - * just make sure the destination is large enough and copy the string. - */ - if(destCapacitysize>=20 && - pInfo->isBigEndian==U_IS_BIG_ENDIAN && - pInfo->charsetFamily==U_CHARSET_FAMILY && - pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ - pInfo->dataFormat[1]==0x50 && - pInfo->dataFormat[2]==0x52 && - pInfo->dataFormat[3]==0x50 && - pInfo->formatVersion[0]==3 && - pInfo->formatVersion[2]==UTRIE_SHIFT && - pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT - ) { - //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); - uprv_memcpy(dataVersion, pInfo->dataVersion, 4); - return TRUE; - } else { - return FALSE; - } -} - -static int32_t U_CALLCONV -getSPrepFoldingOffset(uint32_t data) { - - return (int32_t)data; - -} - -/* hashes an entry */ -static int32_t U_CALLCONV -hashEntry(const UHashTok parm) { - UStringPrepKey *b = (UStringPrepKey *)parm.pointer; - UHashTok namekey, pathkey; - namekey.pointer = b->name; - pathkey.pointer = b->path; - uint32_t unsignedHash = static_cast(uhash_hashChars(namekey)) + - 37u * static_cast(uhash_hashChars(pathkey)); - return static_cast(unsignedHash); -} - -/* compares two entries */ -static UBool U_CALLCONV -compareEntries(const UHashTok p1, const UHashTok p2) { - UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; - UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; - UHashTok name1, name2, path1, path2; - name1.pointer = b1->name; - name2.pointer = b2->name; - path1.pointer = b1->path; - path2.pointer = b2->path; - return ((UBool)(uhash_compareChars(name1, name2) & - uhash_compareChars(path1, path2))); -} - -static void -usprep_unload(UStringPrepProfile* data){ - udata_close(data->sprepData); -} - -static int32_t -usprep_internal_flushCache(UBool noRefCount){ - UStringPrepProfile *profile = NULL; - UStringPrepKey *key = NULL; - int32_t pos = UHASH_FIRST; - int32_t deletedNum = 0; - const UHashElement *e; - - /* - * if shared data hasn't even been lazy evaluated yet - * return 0 - */ - umtx_lock(&usprepMutex); - if (SHARED_DATA_HASHTABLE == NULL) { - umtx_unlock(&usprepMutex); - return 0; - } - - /*creates an enumeration to iterate through every element in the table */ - while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) - { - profile = (UStringPrepProfile *) e->value.pointer; - key = (UStringPrepKey *) e->key.pointer; - - if ((noRefCount== FALSE && profile->refCount == 0) || - noRefCount== TRUE) { - deletedNum++; - uhash_removeElement(SHARED_DATA_HASHTABLE, e); - - /* unload the data */ - usprep_unload(profile); - - if(key->name != NULL) { - uprv_free(key->name); - key->name=NULL; - } - if(key->path != NULL) { - uprv_free(key->path); - key->path=NULL; - } - uprv_free(profile); - uprv_free(key); - } - - } - umtx_unlock(&usprepMutex); - - return deletedNum; -} - -/* Works just like ucnv_flushCache() -static int32_t -usprep_flushCache(){ - return usprep_internal_flushCache(FALSE); -} -*/ - -static UBool U_CALLCONV usprep_cleanup(void){ - if (SHARED_DATA_HASHTABLE != NULL) { - usprep_internal_flushCache(TRUE); - if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { - uhash_close(SHARED_DATA_HASHTABLE); - SHARED_DATA_HASHTABLE = NULL; - } - } - gSharedDataInitOnce.reset(); - return (SHARED_DATA_HASHTABLE == NULL); -} -U_CDECL_END - - -/** Initializes the cache for resources */ -static void U_CALLCONV -createCache(UErrorCode &status) { - SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status); - if (U_FAILURE(status)) { - SHARED_DATA_HASHTABLE = NULL; - } - ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); -} - -static void -initCache(UErrorCode *status) { - umtx_initOnce(gSharedDataInitOnce, &createCache, *status); -} - -static UBool U_CALLCONV -loadData(UStringPrepProfile* profile, - const char* path, - const char* name, - const char* type, - UErrorCode* errorCode) { - /* load Unicode SPREP data from file */ - UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; - UDataMemory *dataMemory; - const int32_t *p=NULL; - const uint8_t *pb; - UVersionInfo normUnicodeVersion; - int32_t normUniVer, sprepUniVer, normCorrVer; - - if(errorCode==NULL || U_FAILURE(*errorCode)) { - return 0; - } - - /* open the data outside the mutex block */ - //TODO: change the path - dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); - if(U_FAILURE(*errorCode)) { - return FALSE; - } - - p=(const int32_t *)udata_getMemory(dataMemory); - pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); - utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); - _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; - - - if(U_FAILURE(*errorCode)) { - udata_close(dataMemory); - return FALSE; - } - - /* in the mutex block, set the data for this process */ - umtx_lock(&usprepMutex); - if(profile->sprepData==NULL) { - profile->sprepData=dataMemory; - dataMemory=NULL; - uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); - uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); - } else { - p=(const int32_t *)udata_getMemory(profile->sprepData); - } - umtx_unlock(&usprepMutex); - /* initialize some variables */ - profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); - - u_getUnicodeVersion(normUnicodeVersion); - normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + - (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); - sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + - (dataVersion[2] << 8 ) + (dataVersion[3]); - normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; - - if(U_FAILURE(*errorCode)){ - udata_close(dataMemory); - return FALSE; - } - if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ - normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ - ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ - ){ - *errorCode = U_INVALID_FORMAT_ERROR; - udata_close(dataMemory); - return FALSE; - } - profile->isDataLoaded = TRUE; - - /* if a different thread set it first, then close the extra data */ - if(dataMemory!=NULL) { - udata_close(dataMemory); /* NULL if it was set correctly */ - } - - - return profile->isDataLoaded; -} - -static UStringPrepProfile* -usprep_getProfile(const char* path, - const char* name, - UErrorCode *status){ - - UStringPrepProfile* profile = NULL; - - initCache(status); - - if(U_FAILURE(*status)){ - return NULL; - } - - UStringPrepKey stackKey; - /* - * const is cast way to save malloc, strcpy and free calls - * we use the passed in pointers for fetching the data from the - * hash table which is safe - */ - stackKey.name = (char*) name; - stackKey.path = (char*) path; - - /* fetch the data from the cache */ - umtx_lock(&usprepMutex); - profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); - if(profile != NULL) { - profile->refCount++; - } - umtx_unlock(&usprepMutex); - - if(profile == NULL) { - /* else load the data and put the data in the cache */ - LocalMemory newProfile; - if(newProfile.allocateInsteadAndReset() == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /* load the data */ - if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ - return NULL; - } - - /* get the options */ - newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); - newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); - - LocalMemory key; - LocalMemory keyName; - LocalMemory keyPath; - if( key.allocateInsteadAndReset() == NULL || - keyName.allocateInsteadAndCopy(static_cast(uprv_strlen(name)+1)) == NULL || - (path != NULL && - keyPath.allocateInsteadAndCopy(static_cast(uprv_strlen(path)+1)) == NULL) - ) { - *status = U_MEMORY_ALLOCATION_ERROR; - usprep_unload(newProfile.getAlias()); - return NULL; - } - - umtx_lock(&usprepMutex); - // If another thread already inserted the same key/value, refcount and cleanup our thread data - profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); - if(profile != NULL) { - profile->refCount++; - usprep_unload(newProfile.getAlias()); - } - else { - /* initialize the key members */ - key->name = keyName.orphan(); - uprv_strcpy(key->name, name); - if(path != NULL){ - key->path = keyPath.orphan(); - uprv_strcpy(key->path, path); - } - profile = newProfile.orphan(); - - /* add the data object to the cache */ - profile->refCount = 1; - uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); - } - umtx_unlock(&usprepMutex); - } - - return profile; -} - -U_CAPI UStringPrepProfile* U_EXPORT2 -usprep_open(const char* path, - const char* name, - UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return NULL; - } - - /* initialize the profile struct members */ - return usprep_getProfile(path,name,status); -} - -U_CAPI UStringPrepProfile* U_EXPORT2 -usprep_openByType(UStringPrepProfileType type, - UErrorCode* status) { - if(status == NULL || U_FAILURE(*status)){ - return NULL; - } - int32_t index = (int32_t)type; - if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - return usprep_open(NULL, PROFILE_NAMES[index], status); -} - -U_CAPI void U_EXPORT2 -usprep_close(UStringPrepProfile* profile){ - if(profile==NULL){ - return; - } - - umtx_lock(&usprepMutex); - /* decrement the ref count*/ - if(profile->refCount > 0){ - profile->refCount--; - } - umtx_unlock(&usprepMutex); - -} - -U_CFUNC void -uprv_syntaxError(const UChar* rules, - int32_t pos, - int32_t rulesLen, - UParseError* parseError){ - if(parseError == NULL){ - return; - } - parseError->offset = pos; - parseError->line = 0 ; // we are not using line numbers - - // for pre-context - int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); - int32_t limit = pos; - - u_memcpy(parseError->preContext,rules+start,limit-start); - //null terminate the buffer - parseError->preContext[limit-start] = 0; - - // for post-context; include error rules[pos] - start = pos; - limit = start + (U_PARSE_CONTEXT_LEN-1); - if (limit > rulesLen) { - limit = rulesLen; - } - if (start < rulesLen) { - u_memcpy(parseError->postContext,rules+start,limit-start); - } - //null terminate the buffer - parseError->postContext[limit-start]= 0; -} - - -static inline UStringPrepType -getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ - - UStringPrepType type; - if(trieWord == 0){ - /* - * Initial value stored in the mapping table - * just return USPREP_TYPE_LIMIT .. so that - * the source codepoint is copied to the destination - */ - type = USPREP_TYPE_LIMIT; - isIndex =FALSE; - value = 0; - }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ - type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); - isIndex =FALSE; - value = 0; - }else{ - /* get the type */ - type = USPREP_MAP; - /* ascertain if the value is index or delta */ - if(trieWord & 0x02){ - isIndex = TRUE; - value = trieWord >> 2; //mask off the lower 2 bits and shift - }else{ - isIndex = FALSE; - value = (int16_t)trieWord; - value = (value >> 2); - } - - if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ - type = USPREP_DELETE; - isIndex =FALSE; - value = 0; - } - } - return type; -} - -// TODO: change to writing to UnicodeString not UChar * -static int32_t -usprep_map( const UStringPrepProfile* profile, - const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status ){ - - uint16_t result; - int32_t destIndex=0; - int32_t srcIndex; - UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); - UStringPrepType type; - int16_t value; - UBool isIndex; - const int32_t* indexes = profile->indexes; - - // no error checking the caller check for error and arguments - // no string length check the caller finds out the string length - - for(srcIndex=0;srcIndexsprepTrie,ch,result); - - type = getValues(result, value, isIndex); - - // check if the source codepoint is unassigned - if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ - - uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); - *status = U_STRINGPREP_UNASSIGNED_ERROR; - return 0; - - }else if(type == USPREP_MAP){ - - int32_t index, length; - - if(isIndex){ - index = value; - if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && - index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ - length = 1; - }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && - index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ - length = 2; - }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && - index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ - length = 3; - }else{ - length = profile->mappingData[index++]; - - } - - /* copy mapping to destination */ - for(int32_t i=0; i< length; i++){ - if(destIndex < destCapacity ){ - dest[destIndex] = profile->mappingData[index+i]; - } - destIndex++; /* for pre-flighting */ - } - continue; - }else{ - // subtract the delta to arrive at the code point - ch -= value; - } - - }else if(type==USPREP_DELETE){ - // just consume the codepoint and contine - continue; - } - //copy the code point into destination - if(ch <= 0xFFFF){ - if(destIndex < destCapacity ){ - dest[destIndex] = (UChar)ch; - } - destIndex++; - }else{ - if(destIndex+1 < destCapacity ){ - dest[destIndex] = U16_LEAD(ch); - dest[destIndex+1] = U16_TRAIL(ch); - } - destIndex +=2; - } - - } - - return u_terminateUChars(dest, destCapacity, destIndex, status); -} - -/* - 1) Map -- For each character in the input, check if it has a mapping - and, if so, replace it with its mapping. - - 2) Normalize -- Possibly normalize the result of step 1 using Unicode - normalization. - - 3) Prohibit -- Check for any characters that are not allowed in the - output. If any are found, return an error. - - 4) Check bidi -- Possibly check for right-to-left characters, and if - any are found, make sure that the whole string satisfies the - requirements for bidirectional strings. If the string does not - satisfy the requirements for bidirectional strings, return an - error. - [Unicode3.2] defines several bidirectional categories; each character - has one bidirectional category assigned to it. For the purposes of - the requirements below, an "RandALCat character" is a character that - has Unicode bidirectional categories "R" or "AL"; an "LCat character" - is a character that has Unicode bidirectional category "L". Note - - - that there are many characters which fall in neither of the above - definitions; Latin digits ( through ) are examples of - this because they have bidirectional category "EN". - - In any profile that specifies bidirectional character handling, all - three of the following requirements MUST be met: - - 1) The characters in section 5.8 MUST be prohibited. - - 2) If a string contains any RandALCat character, the string MUST NOT - contain any LCat character. - - 3) If a string contains any RandALCat character, a RandALCat - character MUST be the first character of the string, and a - RandALCat character MUST be the last character of the string. -*/ -U_CAPI int32_t U_EXPORT2 -usprep_prepare( const UStringPrepProfile* profile, - const UChar* src, int32_t srcLength, - UChar* dest, int32_t destCapacity, - int32_t options, - UParseError* parseError, - UErrorCode* status ){ - - // check error status - if(U_FAILURE(*status)){ - return 0; - } - - //check arguments - if(profile==NULL || - (src==NULL ? srcLength!=0 : srcLength<-1) || - (dest==NULL ? destCapacity!=0 : destCapacity<0)) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - //get the string length - if(srcLength < 0){ - srcLength = u_strlen(src); - } - // map - UnicodeString s1; - UChar *b1 = s1.getBuffer(srcLength); - if(b1==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - int32_t b1Len = usprep_map(profile, src, srcLength, - b1, s1.getCapacity(), options, parseError, status); - s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); - - if(*status == U_BUFFER_OVERFLOW_ERROR){ - // redo processing of string - /* we do not have enough room so grow the buffer*/ - b1 = s1.getBuffer(b1Len); - if(b1==NULL){ - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - *status = U_ZERO_ERROR; // reset error - b1Len = usprep_map(profile, src, srcLength, - b1, s1.getCapacity(), options, parseError, status); - s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); - } - if(U_FAILURE(*status)){ - return 0; - } - - // normalize - UnicodeString s2; - if(profile->doNFKC){ - const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); - FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); - if(U_FAILURE(*status)){ - return 0; - } - fn2.normalize(s1, s2, *status); - }else{ - s2.fastCopyFrom(s1); - } - if(U_FAILURE(*status)){ - return 0; - } - - // Prohibit and checkBiDi in one pass - const UChar *b2 = s2.getBuffer(); - int32_t b2Len = s2.length(); - UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; - UBool leftToRight=FALSE, rightToLeft=FALSE; - int32_t rtlPos =-1, ltrPos =-1; - - for(int32_t b2Index=0; b2IndexsprepTrie,ch,result); - - int16_t value; - UBool isIndex; - UStringPrepType type = getValues(result, value, isIndex); - - if( type == USPREP_PROHIBITED || - ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) - ){ - *status = U_STRINGPREP_PROHIBITED_ERROR; - uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError); - return 0; - } - - if(profile->checkBiDi) { - direction = ubidi_getClass(ch); - if(firstCharDir == U_CHAR_DIRECTION_COUNT){ - firstCharDir = direction; - } - if(direction == U_LEFT_TO_RIGHT){ - leftToRight = TRUE; - ltrPos = b2Index-1; - } - if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ - rightToLeft = TRUE; - rtlPos = b2Index-1; - } - } - } - if(profile->checkBiDi == TRUE){ - // satisfy 2 - if( leftToRight == TRUE && rightToLeft == TRUE){ - *status = U_STRINGPREP_CHECK_BIDI_ERROR; - uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); - return 0; - } - - //satisfy 3 - if( rightToLeft == TRUE && - !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && - (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) - ){ - *status = U_STRINGPREP_CHECK_BIDI_ERROR; - uprv_syntaxError(b2, rtlPos, b2Len, parseError); - return FALSE; - } - } - return s2.extract(dest, destCapacity, *status); -} - - -/* data swapping ------------------------------------------------------------ */ - -U_CAPI int32_t U_EXPORT2 -usprep_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - - const uint8_t *inBytes; - uint8_t *outBytes; - - const int32_t *inIndexes; - int32_t indexes[16]; - - int32_t i, offset, count, size; - - /* udata_swapDataHeader checks the arguments */ - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - /* check data format and format version */ - pInfo=(const UDataInfo *)((const char *)inData+4); - if(!( - pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ - pInfo->dataFormat[1]==0x50 && - pInfo->dataFormat[2]==0x52 && - pInfo->dataFormat[3]==0x50 && - pInfo->formatVersion[0]==3 - )) { - udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], - pInfo->dataFormat[2], pInfo->dataFormat[3], - pInfo->formatVersion[0]); - *pErrorCode=U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes=(const uint8_t *)inData+headerSize; - outBytes=(uint8_t *)outData+headerSize; - - inIndexes=(const int32_t *)inBytes; - - if(length>=0) { - length-=headerSize; - if(length<16*4) { - udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", - length); - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ - for(i=0; i<16; ++i) { - indexes[i]=udata_readInt32(ds, inIndexes[i]); - } - - /* calculate the total length of the data */ - size= - 16*4+ /* size of indexes[] */ - indexes[_SPREP_INDEX_TRIE_SIZE]+ - indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; - - if(length>=0) { - if(lengthswapArray32(ds, inBytes, count, outBytes, pErrorCode); - offset+=count; - - /* swap the UTrie */ - count=indexes[_SPREP_INDEX_TRIE_SIZE]; - utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - offset+=count; - - /* swap the uint16_t mappingTable[] */ - count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; - ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); - //offset+=count; - } - - return headerSize+size; -} - -#endif /* #if !UCONFIG_NO_IDNA */ diff --git a/deps/node/deps/icu-small/source/common/ustack.cpp b/deps/node/deps/icu-small/source/common/ustack.cpp deleted file mode 100644 index fb314b0e..00000000 --- a/deps/node/deps/icu-small/source/common/ustack.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2003-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "uvector.h" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStack) - -UStack::UStack(UErrorCode &status) : - UVector(status) -{ -} - -UStack::UStack(int32_t initialCapacity, UErrorCode &status) : - UVector(initialCapacity, status) -{ -} - -UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) : - UVector(d, c, status) -{ -} - -UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) : - UVector(d, c, initialCapacity, status) -{ -} - -UStack::~UStack() {} - -void* UStack::pop(void) { - int32_t n = size() - 1; - void* result = 0; - if (n >= 0) { - result = elementAt(n); - removeElementAt(n); - } - return result; -} - -int32_t UStack::popi(void) { - int32_t n = size() - 1; - int32_t result = 0; - if (n >= 0) { - result = elementAti(n); - removeElementAt(n); - } - return result; -} - -int32_t UStack::search(void* obj) const { - int32_t i = indexOf(obj); - return (i >= 0) ? size() - i : i; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/ustr_cnv.cpp b/deps/node/deps/icu-small/source/common/ustr_cnv.cpp deleted file mode 100644 index eb37232c..00000000 --- a/deps/node/deps/icu-small/source/common/ustr_cnv.cpp +++ /dev/null @@ -1,256 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 1998-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ustr_cnv.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004aug24 -* created by: Markus W. Scherer -* -* Character conversion functions moved here from ustring.c -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/ustring.h" -#include "unicode/ucnv.h" -#include "cstring.h" -#include "cmemory.h" -#include "umutex.h" -#include "ustr_cnv.h" -#include "ucnv_bld.h" - -/* mutexed access to a shared default converter ----------------------------- */ - -static UConverter *gDefaultConverter = NULL; - -U_CAPI UConverter* U_EXPORT2 -u_getDefaultConverter(UErrorCode *status) -{ - UConverter *converter = NULL; - - if (gDefaultConverter != NULL) { - umtx_lock(NULL); - - /* need to check to make sure it wasn't taken out from under us */ - if (gDefaultConverter != NULL) { - converter = gDefaultConverter; - gDefaultConverter = NULL; - } - umtx_unlock(NULL); - } - - /* if the cache was empty, create a converter */ - if(converter == NULL) { - converter = ucnv_open(NULL, status); - if(U_FAILURE(*status)) { - ucnv_close(converter); - converter = NULL; - } - } - - return converter; -} - -U_CAPI void U_EXPORT2 -u_releaseDefaultConverter(UConverter *converter) -{ - if(gDefaultConverter == NULL) { - if (converter != NULL) { - ucnv_reset(converter); - } - ucnv_enableCleanup(); - umtx_lock(NULL); - if(gDefaultConverter == NULL) { - gDefaultConverter = converter; - converter = NULL; - } - umtx_unlock(NULL); - } - - if(converter != NULL) { - ucnv_close(converter); - } -} - -U_CAPI void U_EXPORT2 -u_flushDefaultConverter() -{ - UConverter *converter = NULL; - - if (gDefaultConverter != NULL) { - umtx_lock(NULL); - - /* need to check to make sure it wasn't taken out from under us */ - if (gDefaultConverter != NULL) { - converter = gDefaultConverter; - gDefaultConverter = NULL; - } - umtx_unlock(NULL); - } - - /* if the cache was populated, flush it */ - if(converter != NULL) { - ucnv_close(converter); - } -} - - -/* conversions between char* and UChar* ------------------------------------- */ - -/* maximum string length for u_uastrcpy() and u_austrcpy() implementations */ -#define MAX_STRLEN 0x0FFFFFFF - -/* - returns the minimum of (the length of the null-terminated string) and n. -*/ -static int32_t u_astrnlen(const char *s1, int32_t n) -{ - int32_t len = 0; - - if (s1) - { - while (n-- && *(s1++)) - { - len++; - } - } - return len; -} - -U_CAPI UChar* U_EXPORT2 -u_uastrncpy(UChar *ucs1, - const char *s2, - int32_t n) -{ - UChar *target = ucs1; - UErrorCode err = U_ZERO_ERROR; - UConverter *cnv = u_getDefaultConverter(&err); - if(U_SUCCESS(err) && cnv != NULL) { - ucnv_reset(cnv); - ucnv_toUnicode(cnv, - &target, - ucs1+n, - &s2, - s2+u_astrnlen(s2, n), - NULL, - TRUE, - &err); - ucnv_reset(cnv); /* be good citizens */ - u_releaseDefaultConverter(cnv); - if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) { - *ucs1 = 0; /* failure */ - } - if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */ - *target = 0; /* terminate */ - } - } else { - *ucs1 = 0; - } - return ucs1; -} - -U_CAPI UChar* U_EXPORT2 -u_uastrcpy(UChar *ucs1, - const char *s2 ) -{ - UErrorCode err = U_ZERO_ERROR; - UConverter *cnv = u_getDefaultConverter(&err); - if(U_SUCCESS(err) && cnv != NULL) { - ucnv_toUChars(cnv, - ucs1, - MAX_STRLEN, - s2, - (int32_t)uprv_strlen(s2), - &err); - u_releaseDefaultConverter(cnv); - if(U_FAILURE(err)) { - *ucs1 = 0; - } - } else { - *ucs1 = 0; - } - return ucs1; -} - -/* - returns the minimum of (the length of the null-terminated string) and n. -*/ -static int32_t u_ustrnlen(const UChar *ucs1, int32_t n) -{ - int32_t len = 0; - - if (ucs1) - { - while (n-- && *(ucs1++)) - { - len++; - } - } - return len; -} - -U_CAPI char* U_EXPORT2 -u_austrncpy(char *s1, - const UChar *ucs2, - int32_t n) -{ - char *target = s1; - UErrorCode err = U_ZERO_ERROR; - UConverter *cnv = u_getDefaultConverter(&err); - if(U_SUCCESS(err) && cnv != NULL) { - ucnv_reset(cnv); - ucnv_fromUnicode(cnv, - &target, - s1+n, - &ucs2, - ucs2+u_ustrnlen(ucs2, n), - NULL, - TRUE, - &err); - ucnv_reset(cnv); /* be good citizens */ - u_releaseDefaultConverter(cnv); - if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) { - *s1 = 0; /* failure */ - } - if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */ - *target = 0; /* terminate */ - } - } else { - *s1 = 0; - } - return s1; -} - -U_CAPI char* U_EXPORT2 -u_austrcpy(char *s1, - const UChar *ucs2 ) -{ - UErrorCode err = U_ZERO_ERROR; - UConverter *cnv = u_getDefaultConverter(&err); - if(U_SUCCESS(err) && cnv != NULL) { - int32_t len = ucnv_fromUChars(cnv, - s1, - MAX_STRLEN, - ucs2, - -1, - &err); - u_releaseDefaultConverter(cnv); - s1[len] = 0; - } else { - *s1 = 0; - } - return s1; -} - -#endif diff --git a/deps/node/deps/icu-small/source/common/ustr_cnv.h b/deps/node/deps/icu-small/source/common/ustr_cnv.h deleted file mode 100644 index 12e86ea0..00000000 --- a/deps/node/deps/icu-small/source/common/ustr_cnv.h +++ /dev/null @@ -1,51 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2010, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ustr_cnv.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004Aug27 -* created by: George Rhoten -*/ - -#ifndef USTR_CNV_IMP_H -#define USTR_CNV_IMP_H - -#include "unicode/utypes.h" -#include "unicode/ucnv.h" - -#if !UCONFIG_NO_CONVERSION - -/** - * Get the default converter. This is a commonly used converter - * that is used for the ustring and UnicodeString API. - * Remember to use the u_releaseDefaultConverter when you are done. - * @internal - */ -U_CAPI UConverter* U_EXPORT2 -u_getDefaultConverter(UErrorCode *status); - - -/** - * Release the default converter to the converter cache. - * @internal - */ -U_CAPI void U_EXPORT2 -u_releaseDefaultConverter(UConverter *converter); - -/** - * Flush the default converter, if cached. - * @internal - */ -U_CAPI void U_EXPORT2 -u_flushDefaultConverter(void); - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/ustr_imp.h b/deps/node/deps/icu-small/source/common/ustr_imp.h deleted file mode 100644 index 943824fa..00000000 --- a/deps/node/deps/icu-small/source/common/ustr_imp.h +++ /dev/null @@ -1,143 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ustr_imp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001jan30 -* created by: Markus W. Scherer -*/ - -#ifndef __USTR_IMP_H__ -#define __USTR_IMP_H__ - -#include "unicode/utypes.h" -#include "unicode/utf8.h" - -/** - * Internal option for unorm_cmpEquivFold() for strncmp style. - * If set, checks for both string length and terminating NUL. - */ -#define _STRNCMP_STYLE 0x1000 - -/** - * Compare two strings in code point order or code unit order. - * Works in strcmp style (both lengths -1), - * strncmp style (lengths equal and >=0, flag TRUE), - * and memcmp/UnicodeString style (at least one length >=0). - */ -U_CFUNC int32_t U_EXPORT2 -uprv_strCompare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - UBool strncmpStyle, UBool codePointOrder); - -U_CAPI int32_t U_EXPORT2 -ustr_hashUCharsN(const UChar *str, int32_t length); - -U_CAPI int32_t U_EXPORT2 -ustr_hashCharsN(const char *str, int32_t length); - -U_CAPI int32_t U_EXPORT2 -ustr_hashICharsN(const char *str, int32_t length); - -/** - * NUL-terminate a UChar * string if possible. - * If length < destCapacity then NUL-terminate. - * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING. - * If length > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR. - * - * @param dest Destination buffer, can be NULL if destCapacity==0. - * @param destCapacity Number of UChars available at dest. - * @param length Number of UChars that were (to be) written to dest. - * @param pErrorCode ICU error code. - * @return length - */ -U_CAPI int32_t U_EXPORT2 -u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); - -/** - * NUL-terminate a char * string if possible. - * Same as u_terminateUChars() but for a different string type. - */ -U_CAPI int32_t U_EXPORT2 -u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); - -/** - * NUL-terminate a UChar32 * string if possible. - * Same as u_terminateUChars() but for a different string type. - */ -U_CAPI int32_t U_EXPORT2 -u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); - -/** - * NUL-terminate a wchar_t * string if possible. - * Same as u_terminateUChars() but for a different string type. - */ -U_CAPI int32_t U_EXPORT2 -u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); - -/** - * Counts the bytes of any whole valid sequence for a UTF-8 lead byte. - * Returns 1 for ASCII 0..0x7f. - * Returns 0 for 0x80..0xc1 as well as for 0xf5..0xff. - * leadByte might be evaluated multiple times. - * - * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. - * @return 0..4 - */ -#define U8_COUNT_BYTES(leadByte) \ - (U8_IS_SINGLE(leadByte) ? 1 : U8_COUNT_BYTES_NON_ASCII(leadByte)) - -/** - * Counts the bytes of any whole valid sequence for a UTF-8 lead byte. - * Returns 0 for 0x00..0xc1 as well as for 0xf5..0xff. - * leadByte might be evaluated multiple times. - * - * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. - * @return 0 or 2..4 - */ -#define U8_COUNT_BYTES_NON_ASCII(leadByte) \ - (U8_IS_LEAD(leadByte) ? ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+2 : 0) - -#ifdef __cplusplus - -U_NAMESPACE_BEGIN - -class UTF8 { -public: - UTF8() = delete; // all static - - /** - * Is t a valid UTF-8 trail byte? - * - * @param prev Must be the preceding lead byte if i==1 and length>=3; - * otherwise ignored. - * @param t The i-th byte following the lead byte. - * @param i The index (1..3) of byte t in the byte sequence. 0 1) { - return U8_IS_TRAIL(t); - } else if (length == 3) { - return U8_IS_VALID_LEAD3_AND_T1(prev, t); - } else { // length == 4 - return U8_IS_VALID_LEAD4_AND_T1(prev, t); - } - } -}; - -U_NAMESPACE_END - -#endif // __cplusplus - -#endif diff --git a/deps/node/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp b/deps/node/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp deleted file mode 100644 index 89888cf3..00000000 --- a/deps/node/deps/icu-small/source/common/ustr_titlecase_brkiter.cpp +++ /dev/null @@ -1,245 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ustr_titlecase_brkiter.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011may30 -* created by: Markus W. Scherer -* -* Titlecasing functions that are based on BreakIterator -* were moved here to break dependency cycles among parts of the common library. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/brkiter.h" -#include "unicode/casemap.h" -#include "unicode/chariter.h" -#include "unicode/localpointer.h" -#include "unicode/ubrk.h" -#include "unicode/ucasemap.h" -#include "unicode/utext.h" -#include "cmemory.h" -#include "uassert.h" -#include "ucase.h" -#include "ucasemap_imp.h" - -U_NAMESPACE_BEGIN - -/** - * Whole-string BreakIterator. - * Titlecasing only calls setText(), first(), and next(). - * We implement the rest only to satisfy the abstract interface. - */ -class WholeStringBreakIterator : public BreakIterator { -public: - WholeStringBreakIterator() : BreakIterator(), length(0) {} - ~WholeStringBreakIterator() U_OVERRIDE; - UBool operator==(const BreakIterator&) const U_OVERRIDE; - BreakIterator *clone() const U_OVERRIDE; - static UClassID U_EXPORT2 getStaticClassID(); - UClassID getDynamicClassID() const U_OVERRIDE; - CharacterIterator &getText() const U_OVERRIDE; - UText *getUText(UText *fillIn, UErrorCode &errorCode) const U_OVERRIDE; - void setText(const UnicodeString &text) U_OVERRIDE; - void setText(UText *text, UErrorCode &errorCode) U_OVERRIDE; - void adoptText(CharacterIterator* it) U_OVERRIDE; - int32_t first() U_OVERRIDE; - int32_t last() U_OVERRIDE; - int32_t previous() U_OVERRIDE; - int32_t next() U_OVERRIDE; - int32_t current() const U_OVERRIDE; - int32_t following(int32_t offset) U_OVERRIDE; - int32_t preceding(int32_t offset) U_OVERRIDE; - UBool isBoundary(int32_t offset) U_OVERRIDE; - int32_t next(int32_t n) U_OVERRIDE; - BreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize, - UErrorCode &errorCode) U_OVERRIDE; - BreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE; - -private: - int32_t length; -}; - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator) - -WholeStringBreakIterator::~WholeStringBreakIterator() {} -UBool WholeStringBreakIterator::operator==(const BreakIterator&) const { return FALSE; } -BreakIterator *WholeStringBreakIterator::clone() const { return nullptr; } - -CharacterIterator &WholeStringBreakIterator::getText() const { - U_ASSERT(FALSE); // really should not be called - // Returns a null reference. - // Otherwise we would have to define a dummy CharacterIterator, - // and either have it as a field and const_cast it to a non-const reference, - // or have it via a pointer and return a reference to that. - CharacterIterator *none = nullptr; - return *none; -} -UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const { - if (U_SUCCESS(errorCode)) { - errorCode = U_UNSUPPORTED_ERROR; - } - return nullptr; -} - -void WholeStringBreakIterator::setText(const UnicodeString &text) { - length = text.length(); -} -void WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) { - if (U_SUCCESS(errorCode)) { - int64_t length64 = utext_nativeLength(text); - if (length64 <= INT32_MAX) { - length = (int32_t)length64; - } else { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - } - } -} -void WholeStringBreakIterator::adoptText(CharacterIterator* it) { - U_ASSERT(FALSE); // should not be called - length = it->getLength(); - delete it; -} - -int32_t WholeStringBreakIterator::first() { return 0; } -int32_t WholeStringBreakIterator::last() { return length; } -int32_t WholeStringBreakIterator::previous() { return 0; } -int32_t WholeStringBreakIterator::next() { return length; } -int32_t WholeStringBreakIterator::current() const { return 0; } -int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; } -int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; } -UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return FALSE; } -int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; } - -BreakIterator *WholeStringBreakIterator::createBufferClone( - void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) { - if (U_SUCCESS(errorCode)) { - errorCode = U_UNSUPPORTED_ERROR; - } - return nullptr; -} -BreakIterator &WholeStringBreakIterator::refreshInputText( - UText * /*input*/, UErrorCode &errorCode) { - if (U_SUCCESS(errorCode)) { - errorCode = U_UNSUPPORTED_ERROR; - } - return *this; -} - -U_CFUNC -BreakIterator *ustrcase_getTitleBreakIterator( - const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter, - LocalPointer &ownedIter, UErrorCode &errorCode) { - if (U_FAILURE(errorCode)) { return nullptr; } - options &= U_TITLECASE_ITERATOR_MASK; - if (options != 0 && iter != nullptr) { - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - if (iter == nullptr) { - switch (options) { - case 0: - iter = BreakIterator::createWordInstance( - locale != nullptr ? *locale : Locale(locID), errorCode); - break; - case U_TITLECASE_WHOLE_STRING: - iter = new WholeStringBreakIterator(); - if (iter == nullptr) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - } - break; - case U_TITLECASE_SENTENCES: - iter = BreakIterator::createSentenceInstance( - locale != nullptr ? *locale : Locale(locID), errorCode); - break; - default: - errorCode = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - ownedIter.adoptInstead(iter); - } - return iter; -} - -int32_t CaseMap::toTitle( - const char *locale, uint32_t options, BreakIterator *iter, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - LocalPointer ownedIter; - iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); - if(iter==NULL) { - return 0; - } - UnicodeString s(srcLength<0, src, srcLength); - iter->setText(s); - return ustrcase_map( - ustrcase_getCaseLocale(locale), options, iter, - dest, destCapacity, - src, srcLength, - ustrcase_internalToTitle, edits, errorCode); -} - -U_NAMESPACE_END - -U_NAMESPACE_USE - -U_CAPI int32_t U_EXPORT2 -u_strToTitle(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, - UErrorCode *pErrorCode) { - LocalPointer ownedIter; - BreakIterator *iter = ustrcase_getTitleBreakIterator( - nullptr, locale, 0, reinterpret_cast(titleIter), - ownedIter, *pErrorCode); - if (iter == nullptr) { - return 0; - } - UnicodeString s(srcLength<0, src, srcLength); - iter->setText(s); - return ustrcase_mapWithOverlap( - ustrcase_getCaseLocale(locale), 0, iter, - dest, destCapacity, - src, srcLength, - ustrcase_internalToTitle, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucasemap_toTitle(UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - if (csm->iter == NULL) { - LocalPointer ownedIter; - BreakIterator *iter = ustrcase_getTitleBreakIterator( - nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode); - if (iter == nullptr) { - return 0; - } - csm->iter = ownedIter.orphan(); - } - UnicodeString s(srcLength<0, src, srcLength); - csm->iter->setText(s); - return ustrcase_map( - csm->caseLocale, csm->options, csm->iter, - dest, destCapacity, - src, srcLength, - ustrcase_internalToTitle, NULL, *pErrorCode); -} - -#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/deps/node/deps/icu-small/source/common/ustr_wcs.cpp b/deps/node/deps/icu-small/source/common/ustr_wcs.cpp deleted file mode 100644 index 0372824f..00000000 --- a/deps/node/deps/icu-small/source/common/ustr_wcs.cpp +++ /dev/null @@ -1,535 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2001-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ustr_wcs.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2004sep07 -* created by: Markus W. Scherer -* -* u_strToWCS() and u_strFromWCS() functions -* moved here from ustrtrns.c for better modularization. -*/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "cstring.h" -#include "cwchar.h" -#include "cmemory.h" -#include "ustr_imp.h" -#include "ustr_cnv.h" - -#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION - -#define _STACK_BUFFER_CAPACITY 1000 -#define _BUFFER_CAPACITY_MULTIPLIER 2 - -#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) -// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. -// Then we could change this to work only with wchar_t buffers. -static inline UBool -u_growAnyBufferFromStatic(void *context, - void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, - int32_t length, int32_t size) { - // Use char* not void* to avoid the compiler's strict-aliasing assumptions - // and related warnings. - char *newBuffer=(char *)uprv_malloc(reqCapacity*size); - if(newBuffer!=NULL) { - if(length>0) { - uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); - } - *pCapacity=reqCapacity; - } else { - *pCapacity=0; - } - - /* release the old pBuffer if it was not statically allocated */ - if(*pBuffer!=(char *)context) { - uprv_free(*pBuffer); - } - - *pBuffer=newBuffer; - return (UBool)(newBuffer!=NULL); -} - -/* helper function */ -static wchar_t* -_strToWCS(wchar_t *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode){ - - char stackBuffer [_STACK_BUFFER_CAPACITY]; - char* tempBuf = stackBuffer; - int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; - char* tempBufLimit = stackBuffer + tempBufCapacity; - UConverter* conv = NULL; - char* saveBuf = tempBuf; - wchar_t* intTarget=NULL; - int32_t intTargetCapacity=0; - int count=0,retVal=0; - - const UChar *pSrcLimit =NULL; - const UChar *pSrc = src; - - conv = u_getDefaultConverter(pErrorCode); - - if(U_FAILURE(*pErrorCode)){ - return NULL; - } - - if(srcLength == -1){ - srcLength = u_strlen(pSrc); - } - - pSrcLimit = pSrc + srcLength; - - for(;;) { - /* reset the error state */ - *pErrorCode = U_ZERO_ERROR; - - /* convert to chars using default converter */ - ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); - count =(tempBuf - saveBuf); - - /* This should rarely occur */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ - tempBuf = saveBuf; - - /* we dont have enough room on the stack grow the buffer */ - int32_t newCapacity = 2 * srcLength; - if(newCapacity <= tempBufCapacity) { - newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; - } - if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, - newCapacity, count, 1)) { - goto cleanup; - } - - saveBuf = tempBuf; - tempBufLimit = tempBuf + tempBufCapacity; - tempBuf = tempBuf + count; - - } else { - break; - } - } - - if(U_FAILURE(*pErrorCode)){ - goto cleanup; - } - - /* done with conversion null terminate the char buffer */ - if(count>=tempBufCapacity){ - tempBuf = saveBuf; - /* we dont have enough room on the stack grow the buffer */ - if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, - count+1, count, 1)) { - goto cleanup; - } - saveBuf = tempBuf; - } - - saveBuf[count]=0; - - - /* allocate more space than required - * here we assume that every char requires - * no more than 2 wchar_ts - */ - intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; - intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); - - if(intTarget){ - - int32_t nulLen = 0; - int32_t remaining = intTargetCapacity; - wchar_t* pIntTarget=intTarget; - tempBuf = saveBuf; - - /* now convert the mbs to wcs */ - for(;;){ - - /* we can call the system API since we are sure that - * there is atleast 1 null in the input - */ - retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); - - if(retVal==-1){ - *pErrorCode = U_INVALID_CHAR_FOUND; - break; - }else if(retVal== remaining){/* should never occur */ - int numWritten = (pIntTarget-intTarget); - u_growAnyBufferFromStatic(NULL,(void**) &intTarget, - &intTargetCapacity, - intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, - numWritten, - sizeof(wchar_t)); - pIntTarget = intTarget; - remaining=intTargetCapacity; - - if(nulLen!=count){ /*there are embedded nulls*/ - pIntTarget+=numWritten; - remaining-=numWritten; - } - - }else{ - int32_t nulVal; - /*scan for nulls */ - /* we donot check for limit since tempBuf is null terminated */ - while(tempBuf[nulLen++] != 0){ - } - nulVal = (nulLen < srcLength) ? 1 : 0; - pIntTarget = pIntTarget + retVal+nulVal; - remaining -=(retVal+nulVal); - - /* check if we have reached the source limit*/ - if(nulLen>=(count)){ - break; - } - } - } - count = (int32_t)(pIntTarget-intTarget); - - if(0 < count && count <= destCapacity){ - uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); - } - - if(pDestLength){ - *pDestLength = count; - } - - /* free the allocated memory */ - uprv_free(intTarget); - - }else{ - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - } -cleanup: - /* are we still using stack buffer */ - if(stackBuffer != saveBuf){ - uprv_free(saveBuf); - } - u_terminateWChars(dest,destCapacity,count,pErrorCode); - - u_releaseDefaultConverter(conv); - - return dest; -} -#endif - -U_CAPI wchar_t* U_EXPORT2 -u_strToWCS(wchar_t *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode){ - - /* args check */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ - return NULL; - } - - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (destCapacity<0) || (dest == NULL && destCapacity > 0) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - -#ifdef U_WCHAR_IS_UTF16 - /* wchar_t is UTF-16 just do a memcpy */ - if(srcLength == -1){ - srcLength = u_strlen(src); - } - if(0 < srcLength && srcLength <= destCapacity){ - u_memcpy((UChar *)dest, src, srcLength); - } - if(pDestLength){ - *pDestLength = srcLength; - } - - u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode); - - return dest; - -#elif defined U_WCHAR_IS_UTF32 - - return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, - src, srcLength, pErrorCode); - -#else - - return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); - -#endif - -} - -#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) -/* helper function */ -static UChar* -_strFromWCS( UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const wchar_t *src, - int32_t srcLength, - UErrorCode *pErrorCode) -{ - int32_t retVal =0, count =0 ; - UConverter* conv = NULL; - UChar* pTarget = NULL; - UChar* pTargetLimit = NULL; - UChar* target = NULL; - - UChar uStack [_STACK_BUFFER_CAPACITY]; - - wchar_t wStack[_STACK_BUFFER_CAPACITY]; - wchar_t* pWStack = wStack; - - - char cStack[_STACK_BUFFER_CAPACITY]; - int32_t cStackCap = _STACK_BUFFER_CAPACITY; - char* pCSrc=cStack; - char* pCSave=pCSrc; - char* pCSrcLimit=NULL; - - const wchar_t* pSrc = src; - const wchar_t* pSrcLimit = NULL; - - if(srcLength ==-1){ - /* if the wchar_t source is null terminated we can safely - * assume that there are no embedded nulls, this is a fast - * path for null terminated strings. - */ - for(;;){ - /* convert wchars to chars */ - retVal = uprv_wcstombs(pCSrc,src, cStackCap); - - if(retVal == -1){ - *pErrorCode = U_ILLEGAL_CHAR_FOUND; - goto cleanup; - }else if(retVal >= (cStackCap-1)){ - /* Should rarely occur */ - u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, - cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); - pCSave = pCSrc; - }else{ - /* converted every thing */ - pCSrc = pCSrc+retVal; - break; - } - } - - }else{ - /* here the source is not null terminated - * so it may have nulls embeded and we need to - * do some extra processing - */ - int32_t remaining =cStackCap; - - pSrcLimit = src + srcLength; - - for(;;){ - int32_t nulLen = 0; - - /* find nulls in the string */ - while(nulLen= _STACK_BUFFER_CAPACITY){ - /* Should rarely occcur */ - /* allocate new buffer buffer */ - pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); - if(pWStack==NULL){ - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - } - if(nulLen>0){ - /* copy the contents to tempStack */ - uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); - } - - /* null terminate the tempBuffer */ - pWStack[nulLen] =0 ; - - if(remaining < (nulLen * MB_CUR_MAX)){ - /* Should rarely occur */ - int32_t len = (pCSrc-pCSave); - pCSrc = pCSave; - /* we do not have enough room so grow the buffer*/ - u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, - cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); - - pCSave = pCSrc; - pCSrc = pCSave+len; - remaining = cStackCap-(pCSrc - pCSave); - } - /* convert to chars */ - retVal = uprv_wcstombs(pCSrc,pWStack,remaining); - - pCSrc += retVal; - pSrc += nulLen; - srcLength-=nulLen; /* decrement the srcLength */ - break; - } - } - } - - /* OK..now we have converted from wchar_ts to chars now - * convert chars to UChars - */ - pCSrcLimit = pCSrc; - pCSrc = pCSave; - pTarget = target= dest; - pTargetLimit = dest + destCapacity; - - conv= u_getDefaultConverter(pErrorCode); - - if(U_FAILURE(*pErrorCode)|| conv==NULL){ - goto cleanup; - } - - for(;;) { - - *pErrorCode = U_ZERO_ERROR; - - /* convert to stack buffer*/ - ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); - - /* increment count to number written to stack */ - count+= pTarget - target; - - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ - target = uStack; - pTarget = uStack; - pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; - } else { - break; - } - - } - - if(pDestLength){ - *pDestLength =count; - } - - u_terminateUChars(dest,destCapacity,count,pErrorCode); - -cleanup: - - if(cStack != pCSave){ - uprv_free(pCSave); - } - - if(wStack != pWStack){ - uprv_free(pWStack); - } - - u_releaseDefaultConverter(conv); - - return dest; -} -#endif - -U_CAPI UChar* U_EXPORT2 -u_strFromWCS(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const wchar_t *src, - int32_t srcLength, - UErrorCode *pErrorCode) -{ - - /* args check */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ - return NULL; - } - - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (destCapacity<0) || (dest == NULL && destCapacity > 0) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - -#ifdef U_WCHAR_IS_UTF16 - /* wchar_t is UTF-16 just do a memcpy */ - if(srcLength == -1){ - srcLength = u_strlen((const UChar *)src); - } - if(0 < srcLength && srcLength <= destCapacity){ - u_memcpy(dest, (const UChar *)src, srcLength); - } - if(pDestLength){ - *pDestLength = srcLength; - } - - u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); - - return dest; - -#elif defined U_WCHAR_IS_UTF32 - - return u_strFromUTF32(dest, destCapacity, pDestLength, - (UChar32*)src, srcLength, pErrorCode); - -#else - - return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); - -#endif - -} - -#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ diff --git a/deps/node/deps/icu-small/source/common/ustrcase.cpp b/deps/node/deps/icu-small/source/common/ustrcase.cpp deleted file mode 100644 index 618e847c..00000000 --- a/deps/node/deps/icu-small/source/common/ustrcase.cpp +++ /dev/null @@ -1,1818 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2001-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: ustrcase.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002feb20 -* created by: Markus W. Scherer -* -* Implementation file for string casing C API functions. -* Uses functions from uchar.c for basic functionality that requires access -* to the Unicode Character Database (uprops.dat). -*/ - -#include "unicode/utypes.h" -#include "unicode/brkiter.h" -#include "unicode/casemap.h" -#include "unicode/edits.h" -#include "unicode/stringoptions.h" -#include "unicode/ustring.h" -#include "unicode/ucasemap.h" -#include "unicode/ubrk.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "ucase.h" -#include "ucasemap_imp.h" -#include "ustr_imp.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - -namespace { - -int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity, - Edits *edits, UErrorCode &errorCode) { - if (U_SUCCESS(errorCode)) { - if (destIndex > destCapacity) { - errorCode = U_BUFFER_OVERFLOW_ERROR; - } else if (edits != NULL) { - edits->copyErrorTo(errorCode); - } - } - return destIndex; -} - -/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ -inline int32_t -appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, - int32_t result, const UChar *s, - int32_t cpLength, uint32_t options, icu::Edits *edits) { - UChar32 c; - int32_t length; - - /* decode the result */ - if(result<0) { - /* (not) original code point */ - if(edits!=NULL) { - edits->addUnchanged(cpLength); - } - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } - c=~result; - if(destIndexaddReplace(cpLength, 1); - } - return destIndex; - } else { - c=result; - length=U16_LENGTH(c); - } - if(edits!=NULL) { - edits->addReplace(cpLength, length); - } - } - if(length>(INT32_MAX-destIndex)) { - return -1; // integer overflow - } - - if(destIndex=0) { - /* code point */ - UBool isError=FALSE; - U16_APPEND(dest, destIndex, destCapacity, c, isError); - if(isError) { - /* overflow, nothing written */ - destIndex+=length; - } - } else { - /* string */ - if((destIndex+length)<=destCapacity) { - while(length>0) { - dest[destIndex++]=*s++; - --length; - } - } else { - /* overflow */ - destIndex+=length; - } - } - } else { - /* preflight */ - destIndex+=length; - } - return destIndex; -} - -inline int32_t -appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { - if(destIndexaddUnchanged(length); - } - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } - if(length>(INT32_MAX-destIndex)) { - return -1; // integer overflow - } - if((destIndex+length)<=destCapacity) { - u_memcpy(dest+destIndex, s, length); - } - return destIndex + length; -} - -inline int32_t -appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, - const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { - if (length <= 0) { - return destIndex; - } - return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits); -} - -UChar32 U_CALLCONV -utf16_caseContextIterator(void *context, int8_t dir) { - UCaseContext *csc=(UCaseContext *)context; - UChar32 c; - - if(dir<0) { - /* reset for backward iteration */ - csc->index=csc->cpStart; - csc->dir=dir; - } else if(dir>0) { - /* reset for forward iteration */ - csc->index=csc->cpLimit; - csc->dir=dir; - } else { - /* continue current iteration direction */ - dir=csc->dir; - } - - if(dir<0) { - if(csc->startindex) { - U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); - return c; - } - } else { - if(csc->indexlimit) { - U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); - return c; - } - } - return U_SENTINEL; -} - -/** - * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. - * caseLocale < 0: Case-folds [srcStart..srcLimit[. - */ -int32_t toLower(int32_t caseLocale, uint32_t options, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, - icu::Edits *edits, UErrorCode &errorCode) { - const int8_t *latinToLower; - if (caseLocale == UCASE_LOC_ROOT || - (caseLocale >= 0 ? - !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : - (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { - latinToLower = LatinCase::TO_LOWER_NORMAL; - } else { - latinToLower = LatinCase::TO_LOWER_TR_LT; - } - const UTrie2 *trie = ucase_getTrie(); - int32_t destIndex = 0; - int32_t prev = srcStart; - int32_t srcIndex = srcStart; - for (;;) { - // fast path for simple cases - UChar lead = 0; - while (srcIndex < srcLimit) { - lead = src[srcIndex]; - int32_t delta; - if (lead < LatinCase::LONG_S) { - int8_t d = latinToLower[lead]; - if (d == LatinCase::EXC) { break; } - ++srcIndex; - if (d == 0) { continue; } - delta = d; - } else if (lead >= 0xd800) { - break; // surrogate or higher - } else { - uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); - if (UCASE_HAS_EXCEPTION(props)) { break; } - ++srcIndex; - if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { - continue; - } - } - lead += static_cast(delta); - destIndex = appendUnchanged(dest, destIndex, destCapacity, - src + prev, srcIndex - 1 - prev, options, edits); - if (destIndex >= 0) { - destIndex = appendUChar(dest, destIndex, destCapacity, lead); - if (edits != nullptr) { - edits->addReplace(1, 1); - } - } - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - prev = srcIndex; - } - if (srcIndex >= srcLimit) { - break; - } - // slow path - int32_t cpStart = srcIndex++; - UChar trail; - UChar32 c; - if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) { - c = U16_GET_SUPPLEMENTARY(lead, trail); - ++srcIndex; - } else { - c = lead; - } - const UChar *s; - if (caseLocale >= 0) { - csc->cpStart = cpStart; - csc->cpLimit = srcIndex; - c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale); - } else { - c = ucase_toFullFolding(c, &s, options); - } - if (c >= 0) { - destIndex = appendUnchanged(dest, destIndex, destCapacity, - src + prev, cpStart - prev, options, edits); - if (destIndex >= 0) { - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - } - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - prev = srcIndex; - } - } - destIndex = appendUnchanged(dest, destIndex, destCapacity, - src + prev, srcIndex - prev, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - return destIndex; -} - -int32_t toUpper(int32_t caseLocale, uint32_t options, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, int32_t srcLength, - icu::Edits *edits, UErrorCode &errorCode) { - const int8_t *latinToUpper; - if (caseLocale == UCASE_LOC_TURKISH) { - latinToUpper = LatinCase::TO_UPPER_TR; - } else { - latinToUpper = LatinCase::TO_UPPER_NORMAL; - } - const UTrie2 *trie = ucase_getTrie(); - int32_t destIndex = 0; - int32_t prev = 0; - int32_t srcIndex = 0; - for (;;) { - // fast path for simple cases - UChar lead = 0; - while (srcIndex < srcLength) { - lead = src[srcIndex]; - int32_t delta; - if (lead < LatinCase::LONG_S) { - int8_t d = latinToUpper[lead]; - if (d == LatinCase::EXC) { break; } - ++srcIndex; - if (d == 0) { continue; } - delta = d; - } else if (lead >= 0xd800) { - break; // surrogate or higher - } else { - uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); - if (UCASE_HAS_EXCEPTION(props)) { break; } - ++srcIndex; - if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { - continue; - } - } - lead += static_cast(delta); - destIndex = appendUnchanged(dest, destIndex, destCapacity, - src + prev, srcIndex - 1 - prev, options, edits); - if (destIndex >= 0) { - destIndex = appendUChar(dest, destIndex, destCapacity, lead); - if (edits != nullptr) { - edits->addReplace(1, 1); - } - } - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - prev = srcIndex; - } - if (srcIndex >= srcLength) { - break; - } - // slow path - int32_t cpStart; - csc->cpStart = cpStart = srcIndex++; - UChar trail; - UChar32 c; - if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) { - c = U16_GET_SUPPLEMENTARY(lead, trail); - ++srcIndex; - } else { - c = lead; - } - csc->cpLimit = srcIndex; - const UChar *s; - c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale); - if (c >= 0) { - destIndex = appendUnchanged(dest, destIndex, destCapacity, - src + prev, cpStart - prev, options, edits); - if (destIndex >= 0) { - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - srcIndex - cpStart, options, edits); - } - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - prev = srcIndex; - } - } - destIndex = appendUnchanged(dest, destIndex, destCapacity, - src + prev, srcIndex - prev, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - return destIndex; -} - -} // namespace - -U_NAMESPACE_END - -U_NAMESPACE_USE - -#if !UCONFIG_NO_BREAK_ITERATION - -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { - if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) { - return 0; - } - - /* set up local variables */ - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - int32_t destIndex=0; - int32_t prev=0; - UBool isFirstIndex=TRUE; - - /* titlecasing loop */ - while(prevfirst(); - } else { - index=iter->next(); - } - if(index==UBRK_DONE || index>srcLength) { - index=srcLength; - } - - /* - * Segment [prev..index[ into 3 parts: - * a) skipped characters (copy as-is) [prev..titleStart[ - * b) first letter (titlecase) [titleStart..titleLimit[ - * c) subsequent characters (lowercase) [titleLimit..index[ - */ - if(prevaddReplace(1, 1); - } - titleLimit++; - } else if (src[titleStart+1] == 0x004A) { - // Keep the capital J from getting lowercased. - destIndex=appendUnchanged(dest, destIndex, destCapacity, - src+titleStart+1, 1, options, edits); - if(destIndex<0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - titleLimit++; - } - } - - /* lowercase [titleLimit..index[ */ - if(titleLimit 0) { - uint32_t upper = data & UPPER_MASK; - // Add a dialytika to this iota or ypsilon vowel - // if we removed a tonos from the previous vowel, - // and that previous vowel did not also have (or gain) a dialytika. - // Adding one only to the final vowel in a longer sequence - // (which does not occur in normal writing) would require lookahead. - // Set the same flag as for preserving an existing dialytika. - if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && - (upper == 0x399 || upper == 0x3A5)) { - data |= HAS_DIALYTIKA; - } - int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. - if ((data & HAS_YPOGEGRAMMENI) != 0) { - numYpogegrammeni = 1; - } - // Skip combining diacritics after this Greek letter. - while (nextIndex < srcLength) { - uint32_t diacriticData = getDiacriticData(src[nextIndex]); - if (diacriticData != 0) { - data |= diacriticData; - if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { - ++numYpogegrammeni; - } - ++nextIndex; - } else { - break; // not a Greek diacritic - } - } - if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { - nextState |= AFTER_VOWEL_WITH_ACCENT; - } - // Map according to Greek rules. - UBool addTonos = FALSE; - if (upper == 0x397 && - (data & HAS_ACCENT) != 0 && - numYpogegrammeni == 0 && - (state & AFTER_CASED) == 0 && - !isFollowedByCasedLetter(src, nextIndex, srcLength)) { - // Keep disjunctive "or" with (only) a tonos. - // We use the same "word boundary" conditions as for the Final_Sigma test. - if (i == nextIndex) { - upper = 0x389; // Preserve the precomposed form. - } else { - addTonos = TRUE; - } - } else if ((data & HAS_DIALYTIKA) != 0) { - // Preserve a vowel with dialytika in precomposed form if it exists. - if (upper == 0x399) { - upper = 0x3AA; - data &= ~HAS_EITHER_DIALYTIKA; - } else if (upper == 0x3A5) { - upper = 0x3AB; - data &= ~HAS_EITHER_DIALYTIKA; - } - } - - UBool change; - if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { - change = TRUE; // common, simple usage - } else { - // Find out first whether we are changing the text. - change = src[i] != upper || numYpogegrammeni > 0; - int32_t i2 = i + 1; - if ((data & HAS_EITHER_DIALYTIKA) != 0) { - change |= i2 >= nextIndex || src[i2] != 0x308; - ++i2; - } - if (addTonos) { - change |= i2 >= nextIndex || src[i2] != 0x301; - ++i2; - } - int32_t oldLength = nextIndex - i; - int32_t newLength = (i2 - i) + numYpogegrammeni; - change |= oldLength != newLength; - if (change) { - if (edits != NULL) { - edits->addReplace(oldLength, newLength); - } - } else { - if (edits != NULL) { - edits->addUnchanged(oldLength); - } - // Write unchanged text? - change = (options & U_OMIT_UNCHANGED_TEXT) == 0; - } - } - - if (change) { - destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper); - if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika - } - if (destIndex >= 0 && addTonos) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); - } - while (destIndex >= 0 && numYpogegrammeni > 0) { - destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); - --numYpogegrammeni; - } - if(destIndex<0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - } else { - const UChar *s; - c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); - destIndex = appendResult(dest, destIndex, destCapacity, c, s, - nextIndex - i, options, edits); - if (destIndex < 0) { - errorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - i = nextIndex; - state = nextState; - } - - return destIndex; -} - -} // namespace GreekUpper -U_NAMESPACE_END - -/* functions available in the common library (for unistr_case.cpp) */ - -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - int32_t destIndex = toLower( - caseLocale, options, - dest, destCapacity, - src, &csc, 0, srcLength, - edits, errorCode); - return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); -} - -U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { - int32_t destIndex; - if (caseLocale == UCASE_LOC_GREEK) { - destIndex = GreekUpper::toUpper(options, dest, destCapacity, - src, srcLength, edits, errorCode); - } else { - UCaseContext csc=UCASECONTEXT_INITIALIZER; - csc.p=(void *)src; - csc.limit=srcLength; - destIndex = toUpper( - caseLocale, options, - dest, destCapacity, - src, &csc, srcLength, - edits, errorCode); - } - return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); -} - -U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { - int32_t destIndex = toLower( - -1, options, - dest, destCapacity, - src, nullptr, 0, srcLength, - edits, errorCode); - return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); -} - -U_CFUNC int32_t -ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UStringCaseMapper *stringCaseMapper, - icu::Edits *edits, - UErrorCode &errorCode) { - int32_t destLength; - - /* check argument values */ - if(U_FAILURE(errorCode)) { - return 0; - } - if( destCapacity<0 || - (dest==NULL && destCapacity>0) || - src==NULL || - srcLength<-1 - ) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* get the string length */ - if(srcLength==-1) { - srcLength=u_strlen(src); - } - - /* check for overlapping source and destination */ - if( dest!=NULL && - ((src>=dest && src<(dest+destCapacity)) || - (dest>=src && dest<(src+srcLength))) - ) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { - edits->reset(); - } - destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR - dest, destCapacity, src, srcLength, edits, errorCode); - return u_terminateUChars(dest, destCapacity, destLength, &errorCode); -} - -U_CFUNC int32_t -ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UStringCaseMapper *stringCaseMapper, - UErrorCode &errorCode) { - UChar buffer[300]; - UChar *temp; - - int32_t destLength; - - /* check argument values */ - if(U_FAILURE(errorCode)) { - return 0; - } - if( destCapacity<0 || - (dest==NULL && destCapacity>0) || - src==NULL || - srcLength<-1 - ) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* get the string length */ - if(srcLength==-1) { - srcLength=u_strlen(src); - } - - /* check for overlapping source and destination */ - if( dest!=NULL && - ((src>=dest && src<(dest+destCapacity)) || - (dest>=src && dest<(src+srcLength))) - ) { - /* overlap: provide a temporary destination buffer and later copy the result */ - if(destCapacity<=UPRV_LENGTHOF(buffer)) { - /* the stack buffer is large enough */ - temp=buffer; - } else { - /* allocate a buffer */ - temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); - if(temp==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - } - } else { - temp=dest; - } - - destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR - temp, destCapacity, src, srcLength, NULL, errorCode); - if(temp!=dest) { - /* copy the result string to the destination buffer */ - if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) { - u_memmove(dest, temp, destLength); - } - if(temp!=buffer) { - uprv_free(temp); - } - } - - return u_terminateUChars(dest, destCapacity, destLength, &errorCode); -} - -/* public API functions */ - -U_CAPI int32_t U_EXPORT2 -u_strFoldCase(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode) { - return ustrcase_mapWithOverlap( - UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalFold, *pErrorCode); -} - -U_NAMESPACE_BEGIN - -int32_t CaseMap::fold( - uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - return ustrcase_map( - UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalFold, edits, errorCode); -} - -U_NAMESPACE_END - -/* case-insensitive string comparisons -------------------------------------- */ - -/* - * This function is a copy of unorm_cmpEquivFold() minus the parts for - * canonical equivalence. - * Keep the functions in sync, and see there for how this works. - * The duplication is for modularization: - * It makes caseless (but not canonical caseless) matches independent of - * the normalization code. - */ - -/* stack element for previous-level source/decomposition pointers */ -struct CmpEquivLevel { - const UChar *start, *s, *limit; -}; -typedef struct CmpEquivLevel CmpEquivLevel; - -/** - * Internal implementation code comparing string with case fold. - * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch(). - * - * @param s1 input string 1 - * @param length1 length of string 1, or -1 (NULL terminated) - * @param s2 input string 2 - * @param length2 length of string 2, or -1 (NULL terminated) - * @param options compare options - * @param matchLen1 (output) length of partial prefix match in s1 - * @param matchLen2 (output) length of partial prefix match in s2 - * @param pErrorCode receives error status - * @return The result of comparison - */ -static int32_t _cmpFold( - const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - int32_t *matchLen1, int32_t *matchLen2, - UErrorCode *pErrorCode) { - int32_t cmpRes = 0; - - /* current-level start/limit - s1/s2 as current */ - const UChar *start1, *start2, *limit1, *limit2; - - /* points to the original start address */ - const UChar *org1, *org2; - - /* points to the end of match + 1 */ - const UChar *m1, *m2; - - /* case folding variables */ - const UChar *p; - int32_t length; - - /* stacks of previous-level start/current/limit */ - CmpEquivLevel stack1[2], stack2[2]; - - /* case folding buffers, only use current-level start/limit */ - UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; - - /* track which is the current level per string */ - int32_t level1, level2; - - /* current code units, and code points for lookups */ - UChar32 c1, c2, cp1, cp2; - - /* no argument error checking because this itself is not an API */ - - /* - * assume that at least the option U_COMPARE_IGNORE_CASE is set - * otherwise this function would have to behave exactly as uprv_strCompare() - */ - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* initialize */ - if(matchLen1) { - U_ASSERT(matchLen2 !=NULL); - *matchLen1=0; - *matchLen2=0; - } - - start1=m1=org1=s1; - if(length1==-1) { - limit1=NULL; - } else { - limit1=s1+length1; - } - - start2=m2=org2=s2; - if(length2==-1) { - limit2=NULL; - } else { - limit2=s2+length2; - } - - level1=level2=0; - c1=c2=-1; - - /* comparison loop */ - for(;;) { - /* - * here a code unit value of -1 means "get another code unit" - * below it will mean "this source is finished" - */ - - if(c1<0) { - /* get next code unit from string 1, post-increment */ - for(;;) { - if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { - if(level1==0) { - c1=-1; - break; - } - } else { - ++s1; - break; - } - - /* reached end of level buffer, pop one level */ - do { - --level1; - start1=stack1[level1].start; /*Not uninitialized*/ - } while(start1==NULL); - s1=stack1[level1].s; /*Not uninitialized*/ - limit1=stack1[level1].limit; /*Not uninitialized*/ - } - } - - if(c2<0) { - /* get next code unit from string 2, post-increment */ - for(;;) { - if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { - if(level2==0) { - c2=-1; - break; - } - } else { - ++s2; - break; - } - - /* reached end of level buffer, pop one level */ - do { - --level2; - start2=stack2[level2].start; /*Not uninitialized*/ - } while(start2==NULL); - s2=stack2[level2].s; /*Not uninitialized*/ - limit2=stack2[level2].limit; /*Not uninitialized*/ - } - } - - /* - * compare c1 and c2 - * either variable c1, c2 is -1 only if the corresponding string is finished - */ - if(c1==c2) { - const UChar *next1, *next2; - - if(c1<0) { - cmpRes=0; /* c1==c2==-1 indicating end of strings */ - break; - } - - /* - * Note: Move the match positions in both strings at the same time - * only when corresponding code point(s) in the original strings - * are fully consumed. For example, when comparing s1="Fust" and - * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches - * the first code point in the case-folded data. But the second "s" - * has no matching code point in s1, so this implementation returns - * 2 as the prefix match length ("Fu"). - */ - next1=next2=NULL; - if(level1==0) { - next1=s1; - } else if(s1==limit1) { - /* Note: This implementation only use a single level of stack. - * If this code needs to be changed to use multiple levels - * of stacks, the code above should check if the current - * code is at the end of all stacks. - */ - U_ASSERT(level1==1); - - /* is s1 at the end of the current stack? */ - next1=stack1[0].s; - } - - if (next1!=NULL) { - if(level2==0) { - next2=s2; - } else if(s2==limit2) { - U_ASSERT(level2==1); - - /* is s2 at the end of the current stack? */ - next2=stack2[0].s; - } - if(next2!=NULL) { - m1=next1; - m2=next2; - } - } - c1=c2=-1; /* make us fetch new code units */ - continue; - } else if(c1<0) { - cmpRes=-1; /* string 1 ends before string 2 */ - break; - } else if(c2<0) { - cmpRes=1; /* string 2 ends before string 1 */ - break; - } - /* c1!=c2 && c1>=0 && c2>=0 */ - - /* get complete code points for c1, c2 for lookups if either is a surrogate */ - cp1=c1; - if(U_IS_SURROGATE(c1)) { - UChar c; - - if(U_IS_SURROGATE_LEAD(c1)) { - if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { - /* advance ++s1; only below if cp1 decomposes/case-folds */ - cp1=U16_GET_SUPPLEMENTARY(c1, c); - } - } else /* isTrail(c1) */ { - if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { - cp1=U16_GET_SUPPLEMENTARY(c, c1); - } - } - } - - cp2=c2; - if(U_IS_SURROGATE(c2)) { - UChar c; - - if(U_IS_SURROGATE_LEAD(c2)) { - if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { - /* advance ++s2; only below if cp2 decomposes/case-folds */ - cp2=U16_GET_SUPPLEMENTARY(c2, c); - } - } else /* isTrail(c2) */ { - if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { - cp2=U16_GET_SUPPLEMENTARY(c, c2); - } - } - } - - /* - * go down one level for each string - * continue with the main loop as soon as there is a real change - */ - - if( level1==0 && - (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0 - ) { - /* cp1 case-folds to the code point "length" or to p[length] */ - if(U_IS_SURROGATE(c1)) { - if(U_IS_SURROGATE_LEAD(c1)) { - /* advance beyond source surrogate pair if it case-folds */ - ++s1; - } else /* isTrail(c1) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s2; - --m2; - c2=*(s2-1); - } - } - - /* push current level pointers */ - stack1[0].start=start1; - stack1[0].s=s1; - stack1[0].limit=limit1; - ++level1; - - /* copy the folding result to fold1[] */ - if(length<=UCASE_MAX_STRING_LENGTH) { - u_memcpy(fold1, p, length); - } else { - int32_t i=0; - U16_APPEND_UNSAFE(fold1, i, length); - length=i; - } - - /* set next level pointers to case folding */ - start1=s1=fold1; - limit1=fold1+length; - - /* get ready to read from decomposition, continue with loop */ - c1=-1; - continue; - } - - if( level2==0 && - (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0 - ) { - /* cp2 case-folds to the code point "length" or to p[length] */ - if(U_IS_SURROGATE(c2)) { - if(U_IS_SURROGATE_LEAD(c2)) { - /* advance beyond source surrogate pair if it case-folds */ - ++s2; - } else /* isTrail(c2) */ { - /* - * we got a supplementary code point when hitting its trail surrogate, - * therefore the lead surrogate must have been the same as in the other string; - * compare this decomposition with the lead surrogate in the other string - * remember that this simulates bulk text replacement: - * the decomposition would replace the entire code point - */ - --s1; - --m2; - c1=*(s1-1); - } - } - - /* push current level pointers */ - stack2[0].start=start2; - stack2[0].s=s2; - stack2[0].limit=limit2; - ++level2; - - /* copy the folding result to fold2[] */ - if(length<=UCASE_MAX_STRING_LENGTH) { - u_memcpy(fold2, p, length); - } else { - int32_t i=0; - U16_APPEND_UNSAFE(fold2, i, length); - length=i; - } - - /* set next level pointers to case folding */ - start2=s2=fold2; - limit2=fold2+length; - - /* get ready to read from decomposition, continue with loop */ - c2=-1; - continue; - } - - /* - * no decomposition/case folding, max level for both sides: - * return difference result - * - * code point order comparison must not just return cp1-cp2 - * because when single surrogates are present then the surrogate pairs - * that formed cp1 and cp2 may be from different string indexes - * - * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units - * c1=d800 cp1=10001 c2=dc00 cp2=10000 - * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } - * - * therefore, use same fix-up as in ustring.c/uprv_strCompare() - * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ - * so we have slightly different pointer/start/limit comparisons here - */ - - if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { - /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ - if( - (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || - (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) - ) { - /* part of a surrogate pair, leave >=d800 */ - } else { - /* BMP code point - may be surrogate code point - make =d800 */ - } else { - /* BMP code point - may be surrogate code point - make (m1-org1); - *matchLen2=static_cast(m2-org2); - } - return cmpRes; -} - -/* internal function */ -U_CFUNC int32_t -u_strcmpFold(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode) { - return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode); -} - -/* public API functions */ - -U_CAPI int32_t U_EXPORT2 -u_strCaseCompare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode) { - /* argument checking */ - if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { - return 0; - } - if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - return u_strcmpFold(s1, length1, s2, length2, - options|U_COMPARE_IGNORE_CASE, - pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { - UErrorCode errorCode=U_ZERO_ERROR; - return u_strcmpFold(s1, -1, s2, -1, - options|U_COMPARE_IGNORE_CASE, - &errorCode); -} - -U_CAPI int32_t U_EXPORT2 -u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { - UErrorCode errorCode=U_ZERO_ERROR; - return u_strcmpFold(s1, length, s2, length, - options|U_COMPARE_IGNORE_CASE, - &errorCode); -} - -U_CAPI int32_t U_EXPORT2 -u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { - UErrorCode errorCode=U_ZERO_ERROR; - return u_strcmpFold(s1, n, s2, n, - options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), - &errorCode); -} - -/* internal API - detect length of shared prefix */ -U_CAPI void -u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - int32_t *matchLen1, int32_t *matchLen2, - UErrorCode *pErrorCode) { - _cmpFold(s1, length1, s2, length2, options, - matchLen1, matchLen2, pErrorCode); -} diff --git a/deps/node/deps/icu-small/source/common/ustrcase_locale.cpp b/deps/node/deps/icu-small/source/common/ustrcase_locale.cpp deleted file mode 100644 index 2ecd24f0..00000000 --- a/deps/node/deps/icu-small/source/common/ustrcase_locale.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: ustrcase_locale.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2011may31 -* created by: Markus W. Scherer -* -* Locale-sensitive case mapping functions (ones that call uloc_getDefault()) -* were moved here to break dependency cycles among parts of the common library. -*/ - -#include "unicode/utypes.h" -#include "uassert.h" -#include "unicode/brkiter.h" -#include "unicode/casemap.h" -#include "unicode/ucasemap.h" -#include "unicode/uloc.h" -#include "unicode/ustring.h" -#include "ucase.h" -#include "ucasemap_imp.h" - -U_CFUNC int32_t -ustrcase_getCaseLocale(const char *locale) { - if (locale == NULL) { - locale = uloc_getDefault(); - } - if (*locale == 0) { - return UCASE_LOC_ROOT; - } else { - return ucase_getCaseLocale(locale); - } -} - -/* public API functions */ - -U_CAPI int32_t U_EXPORT2 -u_strToLower(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - return ustrcase_mapWithOverlap( - ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalToLower, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -u_strToUpper(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - return ustrcase_mapWithOverlap( - ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalToUpper, *pErrorCode); -} - -U_NAMESPACE_BEGIN - -int32_t CaseMap::toLower( - const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - return ustrcase_map( - ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalToLower, edits, errorCode); -} - -int32_t CaseMap::toUpper( - const char *locale, uint32_t options, - const UChar *src, int32_t srcLength, - UChar *dest, int32_t destCapacity, Edits *edits, - UErrorCode &errorCode) { - return ustrcase_map( - ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL - dest, destCapacity, - src, srcLength, - ustrcase_internalToUpper, edits, errorCode); -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/ustrenum.cpp b/deps/node/deps/icu-small/source/common/ustrenum.cpp deleted file mode 100644 index 8be79c98..00000000 --- a/deps/node/deps/icu-small/source/common/ustrenum.cpp +++ /dev/null @@ -1,398 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: November 11 2002 -* Since: ICU 2.4 -********************************************************************** -*/ -#include "utypeinfo.h" // for 'typeid' to work - -#include "unicode/ustring.h" -#include "unicode/strenum.h" -#include "unicode/putil.h" -#include "uenumimp.h" -#include "ustrenum.h" -#include "cstring.h" -#include "cmemory.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN -// StringEnumeration implementation ---------------------------------------- *** - -StringEnumeration::StringEnumeration() - : chars(charsBuffer), charsCapacity(sizeof(charsBuffer)) { -} - -StringEnumeration::~StringEnumeration() { - if (chars != NULL && chars != charsBuffer) { - uprv_free(chars); - } -} - -// StringEnumeration base class clone() default implementation, does not clone -StringEnumeration * -StringEnumeration::clone() const { - return NULL; -} - -const char * -StringEnumeration::next(int32_t *resultLength, UErrorCode &status) { - const UnicodeString *s=snext(status); - if(U_SUCCESS(status) && s!=NULL) { - unistr=*s; - ensureCharsCapacity(unistr.length()+1, status); - if(U_SUCCESS(status)) { - if(resultLength!=NULL) { - *resultLength=unistr.length(); - } - unistr.extract(0, INT32_MAX, chars, charsCapacity, US_INV); - return chars; - } - } - - return NULL; -} - -const UChar * -StringEnumeration::unext(int32_t *resultLength, UErrorCode &status) { - const UnicodeString *s=snext(status); - if(U_SUCCESS(status) && s!=NULL) { - unistr=*s; - if(resultLength!=NULL) { - *resultLength=unistr.length(); - } - return unistr.getTerminatedBuffer(); - } - - return NULL; -} - -const UnicodeString * -StringEnumeration::snext(UErrorCode &status) { - int32_t length; - const char *s=next(&length, status); - return setChars(s, length, status); -} - -void -StringEnumeration::ensureCharsCapacity(int32_t capacity, UErrorCode &status) { - if(U_SUCCESS(status) && capacity>charsCapacity) { - if(capacity<(charsCapacity+charsCapacity/2)) { - // avoid allocation thrashing - capacity=charsCapacity+charsCapacity/2; - } - if(chars!=charsBuffer) { - uprv_free(chars); - } - chars=(char *)uprv_malloc(capacity); - if(chars==NULL) { - chars=charsBuffer; - charsCapacity=sizeof(charsBuffer); - status=U_MEMORY_ALLOCATION_ERROR; - } else { - charsCapacity=capacity; - } - } -} - -UnicodeString * -StringEnumeration::setChars(const char *s, int32_t length, UErrorCode &status) { - if(U_SUCCESS(status) && s!=NULL) { - if(length<0) { - length=(int32_t)uprv_strlen(s); - } - - UChar *buffer=unistr.getBuffer(length+1); - if(buffer!=NULL) { - u_charsToUChars(s, buffer, length); - buffer[length]=0; - unistr.releaseBuffer(length); - return &unistr; - } else { - status=U_MEMORY_ALLOCATION_ERROR; - } - } - - return NULL; -} -UBool -StringEnumeration::operator==(const StringEnumeration& that)const { - return typeid(*this) == typeid(that); -} - -UBool -StringEnumeration::operator!=(const StringEnumeration& that)const { - return !operator==(that); -} - -// UStringEnumeration implementation --------------------------------------- *** - -UStringEnumeration * U_EXPORT2 -UStringEnumeration::fromUEnumeration( - UEnumeration *uenumToAdopt, UErrorCode &status) { - if (U_FAILURE(status)) { - uenum_close(uenumToAdopt); - return NULL; - } - UStringEnumeration *result = new UStringEnumeration(uenumToAdopt); - if (result == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - uenum_close(uenumToAdopt); - return NULL; - } - return result; -} - -UStringEnumeration::UStringEnumeration(UEnumeration* _uenum) : - uenum(_uenum) { - U_ASSERT(_uenum != 0); -} - -UStringEnumeration::~UStringEnumeration() { - uenum_close(uenum); -} - -int32_t UStringEnumeration::count(UErrorCode& status) const { - return uenum_count(uenum, &status); -} - -const char *UStringEnumeration::next(int32_t *resultLength, UErrorCode &status) { - return uenum_next(uenum, resultLength, &status); -} - -const UnicodeString* UStringEnumeration::snext(UErrorCode& status) { - int32_t length; - const UChar* str = uenum_unext(uenum, &length, &status); - if (str == 0 || U_FAILURE(status)) { - return 0; - } - return &unistr.setTo(str, length); -} - -void UStringEnumeration::reset(UErrorCode& status) { - uenum_reset(uenum, &status); -} - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStringEnumeration) -U_NAMESPACE_END - -// C wrapper --------------------------------------------------------------- *** - -#define THIS(en) ((icu::StringEnumeration*)(en->context)) - -U_CDECL_BEGIN - -/** - * Wrapper API to make StringEnumeration look like UEnumeration. - */ -static void U_CALLCONV -ustrenum_close(UEnumeration* en) { - delete THIS(en); - uprv_free(en); -} - -/** - * Wrapper API to make StringEnumeration look like UEnumeration. - */ -static int32_t U_CALLCONV -ustrenum_count(UEnumeration* en, - UErrorCode* ec) -{ - return THIS(en)->count(*ec); -} - -/** - * Wrapper API to make StringEnumeration look like UEnumeration. - */ -static const UChar* U_CALLCONV -ustrenum_unext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* ec) -{ - return THIS(en)->unext(resultLength, *ec); -} - -/** - * Wrapper API to make StringEnumeration look like UEnumeration. - */ -static const char* U_CALLCONV -ustrenum_next(UEnumeration* en, - int32_t* resultLength, - UErrorCode* ec) -{ - return THIS(en)->next(resultLength, *ec); -} - -/** - * Wrapper API to make StringEnumeration look like UEnumeration. - */ -static void U_CALLCONV -ustrenum_reset(UEnumeration* en, - UErrorCode* ec) -{ - THIS(en)->reset(*ec); -} - -/** - * Pseudo-vtable for UEnumeration wrapper around StringEnumeration. - * The StringEnumeration pointer will be stored in 'context'. - */ -static const UEnumeration USTRENUM_VT = { - NULL, - NULL, // store StringEnumeration pointer here - ustrenum_close, - ustrenum_count, - ustrenum_unext, - ustrenum_next, - ustrenum_reset -}; - -U_CDECL_END - -/** - * Given a StringEnumeration, wrap it in a UEnumeration. The - * StringEnumeration is adopted; after this call, the caller must not - * delete it (regardless of error status). - */ -U_CAPI UEnumeration* U_EXPORT2 -uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec) { - UEnumeration* result = NULL; - if (U_SUCCESS(*ec) && adopted != NULL) { - result = (UEnumeration*) uprv_malloc(sizeof(UEnumeration)); - if (result == NULL) { - *ec = U_MEMORY_ALLOCATION_ERROR; - } else { - uprv_memcpy(result, &USTRENUM_VT, sizeof(USTRENUM_VT)); - result->context = adopted; - } - } - if (result == NULL) { - delete adopted; - } - return result; -} - -// C wrapper --------------------------------------------------------------- *** - -U_CDECL_BEGIN - -typedef struct UCharStringEnumeration { - UEnumeration uenum; - int32_t index, count; -} UCharStringEnumeration; - -static void U_CALLCONV -ucharstrenum_close(UEnumeration* en) { - uprv_free(en); -} - -static int32_t U_CALLCONV -ucharstrenum_count(UEnumeration* en, - UErrorCode* /*ec*/) { - return ((UCharStringEnumeration*)en)->count; -} - -static const UChar* U_CALLCONV -ucharstrenum_unext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* /*ec*/) { - UCharStringEnumeration *e = (UCharStringEnumeration*) en; - if (e->index >= e->count) { - return NULL; - } - const UChar* result = ((const UChar**)e->uenum.context)[e->index++]; - if (resultLength) { - *resultLength = (int32_t)u_strlen(result); - } - return result; -} - - -static const char* U_CALLCONV -ucharstrenum_next(UEnumeration* en, - int32_t* resultLength, - UErrorCode* /*ec*/) { - UCharStringEnumeration *e = (UCharStringEnumeration*) en; - if (e->index >= e->count) { - return NULL; - } - const char* result = ((const char**)e->uenum.context)[e->index++]; - if (resultLength) { - *resultLength = (int32_t)uprv_strlen(result); - } - return result; -} - -static void U_CALLCONV -ucharstrenum_reset(UEnumeration* en, - UErrorCode* /*ec*/) { - ((UCharStringEnumeration*)en)->index = 0; -} - -static const UEnumeration UCHARSTRENUM_VT = { - NULL, - NULL, // store StringEnumeration pointer here - ucharstrenum_close, - ucharstrenum_count, - uenum_unextDefault, - ucharstrenum_next, - ucharstrenum_reset -}; - -static const UEnumeration UCHARSTRENUM_U_VT = { - NULL, - NULL, // store StringEnumeration pointer here - ucharstrenum_close, - ucharstrenum_count, - ucharstrenum_unext, - uenum_nextDefault, - ucharstrenum_reset -}; - -U_CDECL_END - -U_CAPI UEnumeration* U_EXPORT2 -uenum_openCharStringsEnumeration(const char* const strings[], int32_t count, - UErrorCode* ec) { - UCharStringEnumeration* result = NULL; - if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) { - result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration)); - if (result == NULL) { - *ec = U_MEMORY_ALLOCATION_ERROR; - } else { - U_ASSERT((char*)result==(char*)(&result->uenum)); - uprv_memcpy(result, &UCHARSTRENUM_VT, sizeof(UCHARSTRENUM_VT)); - result->uenum.context = (void*)strings; - result->index = 0; - result->count = count; - } - } - return (UEnumeration*) result; -} - -U_CAPI UEnumeration* U_EXPORT2 -uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count, - UErrorCode* ec) { - UCharStringEnumeration* result = NULL; - if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) { - result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration)); - if (result == NULL) { - *ec = U_MEMORY_ALLOCATION_ERROR; - } else { - U_ASSERT((char*)result==(char*)(&result->uenum)); - uprv_memcpy(result, &UCHARSTRENUM_U_VT, sizeof(UCHARSTRENUM_U_VT)); - result->uenum.context = (void*)strings; - result->index = 0; - result->count = count; - } - } - return (UEnumeration*) result; -} - - -// end C Wrapper diff --git a/deps/node/deps/icu-small/source/common/ustrenum.h b/deps/node/deps/icu-small/source/common/ustrenum.h deleted file mode 100644 index 582727cd..00000000 --- a/deps/node/deps/icu-small/source/common/ustrenum.h +++ /dev/null @@ -1,86 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2002-2014, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Author: Alan Liu -* Created: November 11 2002 -* Since: ICU 2.4 -********************************************************************** -*/ -#ifndef _USTRENUM_H_ -#define _USTRENUM_H_ - -#include "unicode/uenum.h" -#include "unicode/strenum.h" - -//---------------------------------------------------------------------- -U_NAMESPACE_BEGIN - -/** - * A wrapper to make a UEnumeration into a StringEnumeration. The - * wrapper adopts the UEnumeration is wraps. - */ -class U_COMMON_API UStringEnumeration : public StringEnumeration { - -public: - /** - * Constructor. This constructor adopts its UEnumeration - * argument. - * @param uenum a UEnumeration object. This object takes - * ownership of 'uenum' and will close it in its destructor. The - * caller must not call uenum_close on 'uenum' after calling this - * constructor. - */ - UStringEnumeration(UEnumeration* uenum); - - /** - * Destructor. This closes the UEnumeration passed in to the - * constructor. - */ - virtual ~UStringEnumeration(); - - /** - * Return the number of elements that the iterator traverses. - * @param status the error code. - * @return number of elements in the iterator. - */ - virtual int32_t count(UErrorCode& status) const; - - virtual const char* next(int32_t *resultLength, UErrorCode& status); - - /** - * Returns the next element a UnicodeString*. If there are no - * more elements, returns NULL. - * @param status the error code. - * @return a pointer to the string, or NULL. - */ - virtual const UnicodeString* snext(UErrorCode& status); - - /** - * Resets the iterator. - * @param status the error code. - */ - virtual void reset(UErrorCode& status); - - /** - * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class. - */ - virtual UClassID getDynamicClassID() const; - - /** - * ICU4C "poor man's RTTI", returns a UClassID for this ICU class. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - static UStringEnumeration * U_EXPORT2 fromUEnumeration( - UEnumeration *enumToAdopt, UErrorCode &status); -private: - UEnumeration *uenum; // owned -}; - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/ustrfmt.cpp b/deps/node/deps/icu-small/source/common/ustrfmt.cpp deleted file mode 100644 index 1a9b15a5..00000000 --- a/deps/node/deps/icu-small/source/common/ustrfmt.cpp +++ /dev/null @@ -1,59 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2001-2006, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "cstring.h" -#include "ustrfmt.h" - - -/*** - * Fills in a UChar* string with the radix-based representation of a - * uint32_t number padded with zeroes to minwidth. The result - * will be null terminated if there is room. - * - * @param buffer UChar buffer to receive result - * @param capacity capacity of buffer - * @param i the unsigned number to be formatted - * @param radix the radix from 2..36 - * @param minwidth the minimum width. If the result is narrower than - * this, '0's will be added on the left. Must be <= - * capacity. - * @return the length of the result, not including any terminating - * null - */ -U_CAPI int32_t U_EXPORT2 -uprv_itou (UChar * buffer, int32_t capacity, - uint32_t i, uint32_t radix, int32_t minwidth) -{ - int32_t length = 0; - int digit; - int32_t j; - UChar temp; - - do{ - digit = (int)(i % radix); - buffer[length++]=(UChar)(digit<=9?(0x0030+digit):(0x0030+digit+7)); - i=i/radix; - } while(i && length= 0) { - return (UChar *)string + idx; - } else { - return NULL; - } -} - -/* Search for a codepoint in a string that matches one of the matchSet codepoints. */ -U_CAPI int32_t U_EXPORT2 -u_strcspn(const UChar *string, const UChar *matchSet) -{ - int32_t idx = _matchFromSet(string, matchSet, TRUE); - if(idx >= 0) { - return idx; - } else { - return -idx - 1; /* == u_strlen(string) */ - } -} - -/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */ -U_CAPI int32_t U_EXPORT2 -u_strspn(const UChar *string, const UChar *matchSet) -{ - int32_t idx = _matchFromSet(string, matchSet, FALSE); - if(idx >= 0) { - return idx; - } else { - return -idx - 1; /* == u_strlen(string) */ - } -} - -/* ----- Text manipulation functions --- */ - -U_CAPI UChar* U_EXPORT2 -u_strtok_r(UChar *src, - const UChar *delim, - UChar **saveState) -{ - UChar *tokSource; - UChar *nextToken; - uint32_t nonDelimIdx; - - /* If saveState is NULL, the user messed up. */ - if (src != NULL) { - tokSource = src; - *saveState = src; /* Set to "src" in case there are no delimiters */ - } - else if (*saveState) { - tokSource = *saveState; - } - else { - /* src == NULL && *saveState == NULL */ - /* This shouldn't happen. We already finished tokenizing. */ - return NULL; - } - - /* Skip initial delimiters */ - nonDelimIdx = u_strspn(tokSource, delim); - tokSource = &tokSource[nonDelimIdx]; - - if (*tokSource) { - nextToken = u_strpbrk(tokSource, delim); - if (nextToken != NULL) { - /* Create a token */ - *(nextToken++) = 0; - *saveState = nextToken; - return tokSource; - } - else if (*saveState) { - /* Return the last token */ - *saveState = NULL; - return tokSource; - } - } - else { - /* No tokens were found. Only delimiters were left. */ - *saveState = NULL; - } - return NULL; -} - -/* Miscellaneous functions -------------------------------------------------- */ - -U_CAPI UChar* U_EXPORT2 -u_strcat(UChar *dst, - const UChar *src) -{ - UChar *anchor = dst; /* save a pointer to start of dst */ - - while(*dst != 0) { /* To end of first string */ - ++dst; - } - while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ - } - - return anchor; -} - -U_CAPI UChar* U_EXPORT2 -u_strncat(UChar *dst, - const UChar *src, - int32_t n ) -{ - if(n > 0) { - UChar *anchor = dst; /* save a pointer to start of dst */ - - while(*dst != 0) { /* To end of first string */ - ++dst; - } - while((*dst = *src) != 0) { /* copy string 2 over */ - ++dst; - if(--n == 0) { - *dst = 0; - break; - } - ++src; - } - - return anchor; - } else { - return dst; - } -} - -/* ----- Text property functions --- */ - -U_CAPI int32_t U_EXPORT2 -u_strcmp(const UChar *s1, - const UChar *s2) -{ - UChar c1, c2; - - for(;;) { - c1=*s1++; - c2=*s2++; - if (c1 != c2 || c1 == 0) { - break; - } - } - return (int32_t)c1 - (int32_t)c2; -} - -U_CFUNC int32_t U_EXPORT2 -uprv_strCompare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - UBool strncmpStyle, UBool codePointOrder) { - const UChar *start1, *start2, *limit1, *limit2; - UChar c1, c2; - - /* setup for fix-up */ - start1=s1; - start2=s2; - - /* compare identical prefixes - they do not need to be fixed up */ - if(length1<0 && length2<0) { - /* strcmp style, both NUL-terminated */ - if(s1==s2) { - return 0; - } - - for(;;) { - c1=*s1; - c2=*s2; - if(c1!=c2) { - break; - } - if(c1==0) { - return 0; - } - ++s1; - ++s2; - } - - /* setup for fix-up */ - limit1=limit2=NULL; - } else if(strncmpStyle) { - /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */ - if(s1==s2) { - return 0; - } - - limit1=start1+length1; - - for(;;) { - /* both lengths are same, check only one limit */ - if(s1==limit1) { - return 0; - } - - c1=*s1; - c2=*s2; - if(c1!=c2) { - break; - } - if(c1==0) { - return 0; - } - ++s1; - ++s2; - } - - /* setup for fix-up */ - limit2=start2+length1; /* use length1 here, too, to enforce assumption */ - } else { - /* memcmp/UnicodeString style, both length-specified */ - int32_t lengthResult; - - if(length1<0) { - length1=u_strlen(s1); - } - if(length2<0) { - length2=u_strlen(s2); - } - - /* limit1=start1+min(lenght1, length2) */ - if(length1length2 */ { - lengthResult=1; - limit1=start1+length2; - } - - if(s1==s2) { - return lengthResult; - } - - for(;;) { - /* check pseudo-limit */ - if(s1==limit1) { - return lengthResult; - } - - c1=*s1; - c2=*s2; - if(c1!=c2) { - break; - } - ++s1; - ++s2; - } - - /* setup for fix-up */ - limit1=start1+length1; - limit2=start2+length2; - } - - /* if both values are in or above the surrogate range, fix them up */ - if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { - /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ - if( - (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) || - (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1))) - ) { - /* part of a surrogate pair, leave >=d800 */ - } else { - /* BMP code point - may be surrogate code point - make =d800 */ - } else { - /* BMP code point - may be surrogate code point - make move(iter1, 0, UITER_START); - iter2->move(iter2, 0, UITER_START); - - /* compare identical prefixes - they do not need to be fixed up */ - for(;;) { - c1=iter1->next(iter1); - c2=iter2->next(iter2); - if(c1!=c2) { - break; - } - if(c1==-1) { - return 0; - } - } - - /* if both values are in or above the surrogate range, fix them up */ - if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { - /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ - if( - (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) || - (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1)))) - ) { - /* part of a surrogate pair, leave >=d800 */ - } else { - /* BMP code point - may be surrogate code point - make current(iter2))) || - (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2)))) - ) { - /* part of a surrogate pair, leave >=d800 */ - } else { - /* BMP code point - may be surrogate code point - make =0. - * - * Consistently leaving them _behind_ the different units is not an option - * because the current "unit" is the end of the string if that is reached, - * and in such a case the iterator does not move. - * For example, when comparing "ab" with "abc", both iterators rest _on_ the end - * of their strings. Calling previous() on each does not move them to where - * the comparison fails. - * - * So the simplest semantics is to not define where the iterators end up. - * - * The following fragment is part of what would need to be done for backing up. - */ -void fragment { - /* iff a surrogate is part of a surrogate pair, leave >=d800 */ - if(c1<=0xdbff) { - if(!U16_IS_TRAIL(iter1->current(iter1))) { - /* lead surrogate code point - make getIndex(iter1, UITER_CURRENT); - iter1->previous(iter1); /* ==c1 */ - if(!U16_IS_LEAD(iter1->previous(iter1))) { - /* trail surrogate code point - make move(iter1, idx, UITER_ZERO); - } else /* 0xe000<=c1<=0xffff */ { - /* BMP code point - make 0) { - int32_t rc; - for(;;) { - rc = (int32_t)*s1 - (int32_t)*s2; - if(rc != 0 || *s1 == 0 || --n == 0) { - return rc; - } - ++s1; - ++s2; - } - } else { - return 0; - } -} - -U_CAPI int32_t U_EXPORT2 -u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) { - return uprv_strCompare(s1, n, s2, n, TRUE, TRUE); -} - -U_CAPI UChar* U_EXPORT2 -u_strcpy(UChar *dst, - const UChar *src) -{ - UChar *anchor = dst; /* save a pointer to start of dst */ - - while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ - } - - return anchor; -} - -U_CAPI UChar* U_EXPORT2 -u_strncpy(UChar *dst, - const UChar *src, - int32_t n) -{ - UChar *anchor = dst; /* save a pointer to start of dst */ - - /* copy string 2 over */ - while(n > 0 && (*(dst++) = *(src++)) != 0) { - --n; - } - - return anchor; -} - -U_CAPI int32_t U_EXPORT2 -u_strlen(const UChar *s) -{ -#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR - return (int32_t)uprv_wcslen((const wchar_t *)s); -#else - const UChar *t = s; - while(*t != 0) { - ++t; - } - return t - s; -#endif -} - -U_CAPI int32_t U_EXPORT2 -u_countChar32(const UChar *s, int32_t length) { - int32_t count; - - if(s==NULL || length<-1) { - return 0; - } - - count=0; - if(length>=0) { - while(length>0) { - ++count; - if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) { - s+=2; - length-=2; - } else { - ++s; - --length; - } - } - } else /* length==-1 */ { - UChar c; - - for(;;) { - if((c=*s++)==0) { - break; - } - ++count; - - /* - * sufficient to look ahead one because of UTF-16; - * safe to look ahead one because at worst that would be the terminating NUL - */ - if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { - ++s; - } - } - } - return count; -} - -U_CAPI UBool U_EXPORT2 -u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { - - if(number<0) { - return TRUE; - } - if(s==NULL || length<-1) { - return FALSE; - } - - if(length==-1) { - /* s is NUL-terminated */ - UChar c; - - /* count code points until they exceed */ - for(;;) { - if((c=*s++)==0) { - return FALSE; - } - if(number==0) { - return TRUE; - } - if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { - ++s; - } - --number; - } - } else { - /* length>=0 known */ - const UChar *limit; - int32_t maxSupplementary; - - /* s contains at least (length+1)/2 code points: <=2 UChars per cp */ - if(((length+1)/2)>number) { - return TRUE; - } - - /* check if s does not even contain enough UChars */ - maxSupplementary=length-number; - if(maxSupplementary<=0) { - return FALSE; - } - /* there are maxSupplementary=length-number more UChars than asked-for code points */ - - /* - * count code points until they exceed and also check that there are - * no more than maxSupplementary supplementary code points (UChar pairs) - */ - limit=s+length; - for(;;) { - if(s==limit) { - return FALSE; - } - if(number==0) { - return TRUE; - } - if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) { - ++s; - if(--maxSupplementary<=0) { - /* too many pairs - too few code points */ - return FALSE; - } - } - --number; - } - } -} - -U_CAPI UChar * U_EXPORT2 -u_memcpy(UChar *dest, const UChar *src, int32_t count) { - if(count > 0) { - uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR); - } - return dest; -} - -U_CAPI UChar * U_EXPORT2 -u_memmove(UChar *dest, const UChar *src, int32_t count) { - if(count > 0) { - uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR); - } - return dest; -} - -U_CAPI UChar * U_EXPORT2 -u_memset(UChar *dest, UChar c, int32_t count) { - if(count > 0) { - UChar *ptr = dest; - UChar *limit = dest + count; - - while (ptr < limit) { - *(ptr++) = c; - } - } - return dest; -} - -U_CAPI int32_t U_EXPORT2 -u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) { - if(count > 0) { - const UChar *limit = buf1 + count; - int32_t result; - - while (buf1 < limit) { - result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2; - if (result != 0) { - return result; - } - buf1++; - buf2++; - } - } - return 0; -} - -U_CAPI int32_t U_EXPORT2 -u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) { - return uprv_strCompare(s1, count, s2, count, FALSE, TRUE); -} - -/* u_unescape & support fns ------------------------------------------------- */ - -/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ -static const UChar UNESCAPE_MAP[] = { - /*" 0x22, 0x22 */ - /*' 0x27, 0x27 */ - /*? 0x3F, 0x3F */ - /*\ 0x5C, 0x5C */ - /*a*/ 0x61, 0x07, - /*b*/ 0x62, 0x08, - /*e*/ 0x65, 0x1b, - /*f*/ 0x66, 0x0c, - /*n*/ 0x6E, 0x0a, - /*r*/ 0x72, 0x0d, - /*t*/ 0x74, 0x09, - /*v*/ 0x76, 0x0b -}; -enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) }; - -/* Convert one octal digit to a numeric value 0..7, or -1 on failure */ -static int8_t _digit8(UChar c) { - if (c >= 0x0030 && c <= 0x0037) { - return (int8_t)(c - 0x0030); - } - return -1; -} - -/* Convert one hex digit to a numeric value 0..F, or -1 on failure */ -static int8_t _digit16(UChar c) { - if (c >= 0x0030 && c <= 0x0039) { - return (int8_t)(c - 0x0030); - } - if (c >= 0x0041 && c <= 0x0046) { - return (int8_t)(c - (0x0041 - 10)); - } - if (c >= 0x0061 && c <= 0x0066) { - return (int8_t)(c - (0x0061 - 10)); - } - return -1; -} - -/* Parse a single escape sequence. Although this method deals in - * UChars, it does not use C++ or UnicodeString. This allows it to - * be used from C contexts. */ -U_CAPI UChar32 U_EXPORT2 -u_unescapeAt(UNESCAPE_CHAR_AT charAt, - int32_t *offset, - int32_t length, - void *context) { - - int32_t start = *offset; - UChar c; - UChar32 result = 0; - int8_t n = 0; - int8_t minDig = 0; - int8_t maxDig = 0; - int8_t bitsPerDigit = 4; - int8_t dig; - int32_t i; - UBool braces = FALSE; - - /* Check that offset is in range */ - if (*offset < 0 || *offset >= length) { - goto err; - } - - /* Fetch first UChar after '\\' */ - c = charAt((*offset)++, context); - - /* Convert hexadecimal and octal escapes */ - switch (c) { - case 0x0075 /*'u'*/: - minDig = maxDig = 4; - break; - case 0x0055 /*'U'*/: - minDig = maxDig = 8; - break; - case 0x0078 /*'x'*/: - minDig = 1; - if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) { - ++(*offset); - braces = TRUE; - maxDig = 8; - } else { - maxDig = 2; - } - break; - default: - dig = _digit8(c); - if (dig >= 0) { - minDig = 1; - maxDig = 3; - n = 1; /* Already have first octal digit */ - bitsPerDigit = 3; - result = dig; - } - break; - } - if (minDig != 0) { - while (*offset < length && n < maxDig) { - c = charAt(*offset, context); - dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c)); - if (dig < 0) { - break; - } - result = (result << bitsPerDigit) | dig; - ++(*offset); - ++n; - } - if (n < minDig) { - goto err; - } - if (braces) { - if (c != 0x7D /*}*/) { - goto err; - } - ++(*offset); - } - if (result < 0 || result >= 0x110000) { - goto err; - } - /* If an escape sequence specifies a lead surrogate, see if - * there is a trail surrogate after it, either as an escape or - * as a literal. If so, join them up into a supplementary. - */ - if (*offset < length && U16_IS_LEAD(result)) { - int32_t ahead = *offset + 1; - c = charAt(*offset, context); - if (c == 0x5C /*'\\'*/ && ahead < length) { - c = (UChar) u_unescapeAt(charAt, &ahead, length, context); - } - if (U16_IS_TRAIL(c)) { - *offset = ahead; - result = U16_GET_SUPPLEMENTARY(result, c); - } - } - return result; - } - - /* Convert C-style escapes in table */ - for (i=0; i destCapacity) { - srcLen = destCapacity; - } - u_charsToUChars(src, dest, srcLen); -} - -/* Do an invariant conversion of char* -> UChar*, with escape parsing */ -U_CAPI int32_t U_EXPORT2 -u_unescape(const char *src, UChar *dest, int32_t destCapacity) { - const char *segment = src; - int32_t i = 0; - char c; - - while ((c=*src) != 0) { - /* '\\' intentionally written as compiler-specific - * character constant to correspond to compiler-specific - * char* constants. */ - if (c == '\\') { - int32_t lenParsed = 0; - UChar32 c32; - if (src != segment) { - if (dest != NULL) { - _appendUChars(dest + i, destCapacity - i, - segment, (int32_t)(src - segment)); - } - i += (int32_t)(src - segment); - } - ++src; /* advance past '\\' */ - c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src); - if (lenParsed == 0) { - goto err; - } - src += lenParsed; /* advance past escape seq. */ - if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) { - U16_APPEND_UNSAFE(dest, i, c32); - } else { - i += U16_LENGTH(c32); - } - segment = src; - } else { - ++src; - } - } - if (src != segment) { - if (dest != NULL) { - _appendUChars(dest + i, destCapacity - i, - segment, (int32_t)(src - segment)); - } - i += (int32_t)(src - segment); - } - if (dest != NULL && i < destCapacity) { - dest[i] = 0; - } - return i; - - err: - if (dest != NULL && destCapacity > 0) { - *dest = 0; - } - return 0; -} - -/* NUL-termination of strings ----------------------------------------------- */ - -/** - * NUL-terminate a string no matter what its type. - * Set warning and error codes accordingly. - */ -#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \ - if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \ - /* not a public function, so no complete argument checking */ \ - \ - if(length<0) { \ - /* assume that the caller handles this */ \ - } else if(lengthdestCapacity */ { \ - /* even the string itself did not fit - set an error code */ \ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \ - } \ - } - -U_CAPI int32_t U_EXPORT2 -u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { - __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); - return length; -} - -U_CAPI int32_t U_EXPORT2 -u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { - __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); - return length; -} - -U_CAPI int32_t U_EXPORT2 -u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { - __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); - return length; -} - -U_CAPI int32_t U_EXPORT2 -u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { - __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); - return length; -} - -// Compute the hash code for a string -------------------------------------- *** - -// Moved here from uhash.c so that UnicodeString::hashCode() does not depend -// on UHashtable code. - -/* - Compute the hash by iterating sparsely over about 32 (up to 63) - characters spaced evenly through the string. For each character, - multiply the previous hash value by a prime number and add the new - character in, like a linear congruential random number generator, - producing a pseudorandom deterministic value well distributed over - the output range. [LIU] -*/ - -#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \ - uint32_t hash = 0; \ - const TYPE *p = (const TYPE*) STR; \ - if (p != NULL) { \ - int32_t len = (int32_t)(STRLEN); \ - int32_t inc = ((len - 32) / 32) + 1; \ - const TYPE *limit = p + len; \ - while (p(hash) - -/* Used by UnicodeString to compute its hashcode - Not public API. */ -U_CAPI int32_t U_EXPORT2 -ustr_hashUCharsN(const UChar *str, int32_t length) { - STRING_HASH(UChar, str, length, *p); -} - -U_CAPI int32_t U_EXPORT2 -ustr_hashCharsN(const char *str, int32_t length) { - STRING_HASH(uint8_t, str, length, *p); -} - -U_CAPI int32_t U_EXPORT2 -ustr_hashICharsN(const char *str, int32_t length) { - STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p)); -} diff --git a/deps/node/deps/icu-small/source/common/ustrtrns.cpp b/deps/node/deps/icu-small/source/common/ustrtrns.cpp deleted file mode 100644 index 583ec63c..00000000 --- a/deps/node/deps/icu-small/source/common/ustrtrns.cpp +++ /dev/null @@ -1,1451 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* File ustrtrns.cpp -* -* Modification History: -* -* Date Name Description -* 9/10/2001 Ram Creation. -****************************************************************************** -*/ - -/******************************************************************************* - * - * u_strTo* and u_strFrom* APIs - * WCS functions moved to ustr_wcs.c for better modularization - * - ******************************************************************************* - */ - - -#include "unicode/putil.h" -#include "unicode/ustring.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "cstring.h" -#include "cmemory.h" -#include "ustr_imp.h" -#include "uassert.h" - -U_CAPI UChar* U_EXPORT2 -u_strFromUTF32WithSub(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar32 *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode) { - const UChar32 *srcLimit; - UChar32 ch; - UChar *destLimit; - UChar *pDest; - int32_t reqLength; - int32_t numSubstitutions; - - /* args check */ - if(U_FAILURE(*pErrorCode)){ - return NULL; - } - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (destCapacity<0) || (dest == NULL && destCapacity > 0) || - subchar > 0x10ffff || U_IS_SURROGATE(subchar) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(pNumSubstitutions != NULL) { - *pNumSubstitutions = 0; - } - - pDest = dest; - destLimit = (dest!=NULL)?(dest + destCapacity):NULL; - reqLength = 0; - numSubstitutions = 0; - - if(srcLength < 0) { - /* simple loop for conversion of a NUL-terminated BMP string */ - while((ch=*src) != 0 && - ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) { - ++src; - if(pDest < destLimit) { - *pDest++ = (UChar)ch; - } else { - ++reqLength; - } - } - srcLimit = src; - if(ch != 0) { - /* "complicated" case, find the end of the remaining string */ - while(*++srcLimit != 0) {} - } - } else { - srcLimit = (src!=NULL)?(src + srcLength):NULL; - } - - /* convert with length */ - while(src < srcLimit) { - ch = *src++; - do { - /* usually "loops" once; twice only for writing subchar */ - if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) { - if(pDest < destLimit) { - *pDest++ = (UChar)ch; - } else { - ++reqLength; - } - break; - } else if(0x10000 <= ch && ch <= 0x10ffff) { - if(pDest!=NULL && ((pDest + 2) <= destLimit)) { - *pDest++ = U16_LEAD(ch); - *pDest++ = U16_TRAIL(ch); - } else { - reqLength += 2; - } - break; - } else if((ch = subchar) < 0) { - /* surrogate code point, or not a Unicode code point at all */ - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else { - ++numSubstitutions; - } - } while(TRUE); - } - - reqLength += (int32_t)(pDest - dest); - if(pDestLength) { - *pDestLength = reqLength; - } - if(pNumSubstitutions != NULL) { - *pNumSubstitutions = numSubstitutions; - } - - /* Terminate the buffer */ - u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); - - return dest; -} - -U_CAPI UChar* U_EXPORT2 -u_strFromUTF32(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar32 *src, - int32_t srcLength, - UErrorCode *pErrorCode) { - return u_strFromUTF32WithSub( - dest, destCapacity, pDestLength, - src, srcLength, - U_SENTINEL, NULL, - pErrorCode); -} - -U_CAPI UChar32* U_EXPORT2 -u_strToUTF32WithSub(UChar32 *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode) { - const UChar *srcLimit; - UChar32 ch; - UChar ch2; - UChar32 *destLimit; - UChar32 *pDest; - int32_t reqLength; - int32_t numSubstitutions; - - /* args check */ - if(U_FAILURE(*pErrorCode)){ - return NULL; - } - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (destCapacity<0) || (dest == NULL && destCapacity > 0) || - subchar > 0x10ffff || U_IS_SURROGATE(subchar) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(pNumSubstitutions != NULL) { - *pNumSubstitutions = 0; - } - - pDest = dest; - destLimit = (dest!=NULL)?(dest + destCapacity):NULL; - reqLength = 0; - numSubstitutions = 0; - - if(srcLength < 0) { - /* simple loop for conversion of a NUL-terminated BMP string */ - while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) { - ++src; - if(pDest < destLimit) { - *pDest++ = ch; - } else { - ++reqLength; - } - } - srcLimit = src; - if(ch != 0) { - /* "complicated" case, find the end of the remaining string */ - while(*++srcLimit != 0) {} - } - } else { - srcLimit = (src!=NULL)?(src + srcLength):NULL; - } - - /* convert with length */ - while(src < srcLimit) { - ch = *src++; - if(!U16_IS_SURROGATE(ch)) { - /* write or count ch below */ - } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) { - ++src; - ch = U16_GET_SUPPLEMENTARY(ch, ch2); - } else if((ch = subchar) < 0) { - /* unpaired surrogate */ - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else { - ++numSubstitutions; - } - if(pDest < destLimit) { - *pDest++ = ch; - } else { - ++reqLength; - } - } - - reqLength += (int32_t)(pDest - dest); - if(pDestLength) { - *pDestLength = reqLength; - } - if(pNumSubstitutions != NULL) { - *pNumSubstitutions = numSubstitutions; - } - - /* Terminate the buffer */ - u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode); - - return dest; -} - -U_CAPI UChar32* U_EXPORT2 -u_strToUTF32(UChar32 *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode) { - return u_strToUTF32WithSub( - dest, destCapacity, pDestLength, - src, srcLength, - U_SENTINEL, NULL, - pErrorCode); -} - -U_CAPI UChar* U_EXPORT2 -u_strFromUTF8WithSub(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char* src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode){ - /* args check */ - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (destCapacity<0) || (dest == NULL && destCapacity > 0) || - subchar > 0x10ffff || U_IS_SURROGATE(subchar) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(pNumSubstitutions!=NULL) { - *pNumSubstitutions=0; - } - UChar *pDest = dest; - UChar *pDestLimit = dest+destCapacity; - int32_t reqLength = 0; - int32_t numSubstitutions=0; - - /* - * Inline processing of UTF-8 byte sequences: - * - * Byte sequences for the most common characters are handled inline in - * the conversion loops. In order to reduce the path lengths for those - * characters, the tests are arranged in a kind of binary search. - * ASCII (<=0x7f) is checked first, followed by the dividing point - * between 2- and 3-byte sequences (0xe0). - * The 3-byte branch is tested first to speed up CJK text. - * The compiler should combine the subtractions for the two tests for 0xe0. - * Each branch then tests for the other end of its range. - */ - - if(srcLength < 0){ - /* - * Transform a NUL-terminated string. - * The code explicitly checks for NULs only in the lead byte position. - * A NUL byte in the trail byte position fails the trail byte range check anyway. - */ - int32_t i; - UChar32 c; - for(i = 0; (c = (uint8_t)src[i]) != 0 && (pDest < pDestLimit);) { - // modified copy of U8_NEXT() - ++i; - if(U8_IS_SINGLE(c)) { - *pDest++=(UChar)c; - } else { - uint8_t __t1, __t2; - if( /* handle U+0800..U+FFFF inline */ - (0xe0<=(c) && (c)<0xf0) && - U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && - (__t2=src[(i)+1]-0x80)<=0x3f) { - *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2; - i+=2; - } else if( /* handle U+0080..U+07FF inline */ - ((c)<0xe0 && (c)>=0xc2) && - (__t1=src[i]-0x80)<=0x3f) { - *pDest++ = (((c)&0x1f)<<6)|__t1; - ++(i); - } else { - /* function call for "complicated" and error cases */ - (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1); - if(c<0 && (++numSubstitutions, c = subchar) < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else if(c<=0xFFFF) { - *(pDest++)=(UChar)c; - } else { - *(pDest++)=U16_LEAD(c); - if(pDest=0xc2) && - (__t1=src[i]-0x80)<=0x3f) { - ++reqLength; - ++(i); - } else { - /* function call for "complicated" and error cases */ - (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1); - if(c<0 && (++numSubstitutions, c = subchar) < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } - reqLength += U16_LENGTH(c); - } - } - } - } else /* srcLength >= 0 */ { - /* Faster loop without ongoing checking for srcLength and pDestLimit. */ - int32_t i = 0; - UChar32 c; - for(;;) { - /* - * Each iteration of the inner loop progresses by at most 3 UTF-8 - * bytes and one UChar, for most characters. - * For supplementary code points (4 & 2), which are rare, - * there is an additional adjustment. - */ - int32_t count = (int32_t)(pDestLimit - pDest); - int32_t count2 = (srcLength - i) / 3; - if(count > count2) { - count = count2; /* min(remaining dest, remaining src/3) */ - } - if(count < 3) { - /* - * Too much overhead if we get near the end of the string, - * continue with the next loop. - */ - break; - } - - do { - // modified copy of U8_NEXT() - c = (uint8_t)src[i++]; - if(U8_IS_SINGLE(c)) { - *pDest++=(UChar)c; - } else { - uint8_t __t1, __t2; - if( /* handle U+0800..U+FFFF inline */ - (0xe0<=(c) && (c)<0xf0) && - ((i)+1)=0xc2) && - ((i)!=srcLength) && - (__t1=src[i]-0x80)<=0x3f) { - *pDest++ = (((c)&0x1f)<<6)|__t1; - ++(i); - } else { - if(c >= 0xf0 || subchar > 0xffff) { - // We may read up to four bytes and write up to two UChars, - // which we didn't account for with computing count, - // so we adjust it here. - if(--count == 0) { - --i; // back out byte c - break; - } - } - - /* function call for "complicated" and error cases */ - (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); - if(c<0 && (++numSubstitutions, c = subchar) < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else if(c<=0xFFFF) { - *(pDest++)=(UChar)c; - } else { - *(pDest++)=U16_LEAD(c); - *(pDest++)=U16_TRAIL(c); - } - } - } - } while(--count > 0); - } - - while(i < srcLength && (pDest < pDestLimit)) { - // modified copy of U8_NEXT() - c = (uint8_t)src[i++]; - if(U8_IS_SINGLE(c)) { - *pDest++=(UChar)c; - } else { - uint8_t __t1, __t2; - if( /* handle U+0800..U+FFFF inline */ - (0xe0<=(c) && (c)<0xf0) && - ((i)+1)=0xc2) && - ((i)!=srcLength) && - (__t1=src[i]-0x80)<=0x3f) { - *pDest++ = (((c)&0x1f)<<6)|__t1; - ++(i); - } else { - /* function call for "complicated" and error cases */ - (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); - if(c<0 && (++numSubstitutions, c = subchar) < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else if(c<=0xFFFF) { - *(pDest++)=(UChar)c; - } else { - *(pDest++)=U16_LEAD(c); - if(pDest=0xc2) && - ((i)!=srcLength) && - (__t1=src[i]-0x80)<=0x3f) { - ++reqLength; - ++(i); - } else { - /* function call for "complicated" and error cases */ - (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); - if(c<0 && (++numSubstitutions, c = subchar) < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } - reqLength += U16_LENGTH(c); - } - } - } - } - - reqLength+=(int32_t)(pDest - dest); - - if(pNumSubstitutions!=NULL) { - *pNumSubstitutions=numSubstitutions; - } - - if(pDestLength){ - *pDestLength = reqLength; - } - - /* Terminate the buffer */ - u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); - - return dest; -} - -U_CAPI UChar* U_EXPORT2 -u_strFromUTF8(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char* src, - int32_t srcLength, - UErrorCode *pErrorCode){ - return u_strFromUTF8WithSub( - dest, destCapacity, pDestLength, - src, srcLength, - U_SENTINEL, NULL, - pErrorCode); -} - -U_CAPI UChar * U_EXPORT2 -u_strFromUTF8Lenient(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UErrorCode *pErrorCode) { - UChar *pDest = dest; - UChar32 ch; - int32_t reqLength = 0; - uint8_t* pSrc = (uint8_t*) src; - - /* args check */ - if(U_FAILURE(*pErrorCode)){ - return NULL; - } - - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (destCapacity<0) || (dest == NULL && destCapacity > 0) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(srcLength < 0) { - /* Transform a NUL-terminated string. */ - UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL; - uint8_t t1, t2, t3; /* trail bytes */ - - while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) { - if(ch < 0xc0) { - /* - * ASCII, or a trail byte in lead position which is treated like - * a single-byte sequence for better character boundary - * resynchronization after illegal sequences. - */ - *pDest++=(UChar)ch; - ++pSrc; - continue; - } else if(ch < 0xe0) { /* U+0080..U+07FF */ - if((t1 = pSrc[1]) != 0) { - /* 0x3080 = (0xc0 << 6) + 0x80 */ - *pDest++ = (UChar)((ch << 6) + t1 - 0x3080); - pSrc += 2; - continue; - } - } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) { - /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ - /* 0x2080 = (0x80 << 6) + 0x80 */ - *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080); - pSrc += 3; - continue; - } - } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) { - pSrc += 4; - /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ - ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080; - *(pDest++) = U16_LEAD(ch); - if(pDest < pDestLimit) { - *(pDest++) = U16_TRAIL(ch); - } else { - reqLength = 1; - break; - } - continue; - } - } - - /* truncated character at the end */ - *pDest++ = 0xfffd; - while(*++pSrc != 0) {} - break; - } - - /* Pre-flight the rest of the string. */ - while((ch = *pSrc) != 0) { - if(ch < 0xc0) { - /* - * ASCII, or a trail byte in lead position which is treated like - * a single-byte sequence for better character boundary - * resynchronization after illegal sequences. - */ - ++reqLength; - ++pSrc; - continue; - } else if(ch < 0xe0) { /* U+0080..U+07FF */ - if(pSrc[1] != 0) { - ++reqLength; - pSrc += 2; - continue; - } - } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - if(pSrc[1] != 0 && pSrc[2] != 0) { - ++reqLength; - pSrc += 3; - continue; - } - } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) { - reqLength += 2; - pSrc += 4; - continue; - } - } - - /* truncated character at the end */ - ++reqLength; - break; - } - } else /* srcLength >= 0 */ { - const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL; - - /* - * This function requires that if srcLength is given, then it must be - * destCapatity >= srcLength so that we need not check for - * destination buffer overflow in the loop. - */ - if(destCapacity < srcLength) { - if(pDestLength != NULL) { - *pDestLength = srcLength; /* this likely overestimates the true destLength! */ - } - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return NULL; - } - - if((pSrcLimit - pSrc) >= 4) { - pSrcLimit -= 3; /* temporarily reduce pSrcLimit */ - - /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ - do { - ch = *pSrc++; - if(ch < 0xc0) { - /* - * ASCII, or a trail byte in lead position which is treated like - * a single-byte sequence for better character boundary - * resynchronization after illegal sequences. - */ - *pDest++=(UChar)ch; - } else if(ch < 0xe0) { /* U+0080..U+07FF */ - /* 0x3080 = (0xc0 << 6) + 0x80 */ - *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); - } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ - /* 0x2080 = (0x80 << 6) + 0x80 */ - ch = (ch << 12) + (*pSrc++ << 6); - *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); - } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ - ch = (ch << 18) + (*pSrc++ << 12); - ch += *pSrc++ << 6; - ch += *pSrc++ - 0x3c82080; - *(pDest++) = U16_LEAD(ch); - *(pDest++) = U16_TRAIL(ch); - } - } while(pSrc < pSrcLimit); - - pSrcLimit += 3; /* restore original pSrcLimit */ - } - - while(pSrc < pSrcLimit) { - ch = *pSrc++; - if(ch < 0xc0) { - /* - * ASCII, or a trail byte in lead position which is treated like - * a single-byte sequence for better character boundary - * resynchronization after illegal sequences. - */ - *pDest++=(UChar)ch; - continue; - } else if(ch < 0xe0) { /* U+0080..U+07FF */ - if(pSrc < pSrcLimit) { - /* 0x3080 = (0xc0 << 6) + 0x80 */ - *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); - continue; - } - } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - if((pSrcLimit - pSrc) >= 2) { - /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ - /* 0x2080 = (0x80 << 6) + 0x80 */ - ch = (ch << 12) + (*pSrc++ << 6); - *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); - pSrc += 3; - continue; - } - } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - if((pSrcLimit - pSrc) >= 3) { - /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ - ch = (ch << 18) + (*pSrc++ << 12); - ch += *pSrc++ << 6; - ch += *pSrc++ - 0x3c82080; - *(pDest++) = U16_LEAD(ch); - *(pDest++) = U16_TRAIL(ch); - pSrc += 4; - continue; - } - } - - /* truncated character at the end */ - *pDest++ = 0xfffd; - break; - } - } - - reqLength+=(int32_t)(pDest - dest); - - if(pDestLength){ - *pDestLength = reqLength; - } - - /* Terminate the buffer */ - u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); - - return dest; -} - -static inline uint8_t * -_appendUTF8(uint8_t *pDest, UChar32 c) { - /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */ - if((c)<=0x7f) { - *pDest++=(uint8_t)c; - } else if(c<=0x7ff) { - *pDest++=(uint8_t)((c>>6)|0xc0); - *pDest++=(uint8_t)((c&0x3f)|0x80); - } else if(c<=0xffff) { - *pDest++=(uint8_t)((c>>12)|0xe0); - *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80); - *pDest++=(uint8_t)(((c)&0x3f)|0x80); - } else /* if((uint32_t)(c)<=0x10ffff) */ { - *pDest++=(uint8_t)(((c)>>18)|0xf0); - *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80); - *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80); - *pDest++=(uint8_t)(((c)&0x3f)|0x80); - } - return pDest; -} - - -U_CAPI char* U_EXPORT2 -u_strToUTF8WithSub(char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *pSrc, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode){ - int32_t reqLength=0; - uint32_t ch=0,ch2=0; - uint8_t *pDest = (uint8_t *)dest; - uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL; - int32_t numSubstitutions; - - /* args check */ - if(U_FAILURE(*pErrorCode)){ - return NULL; - } - - if( (pSrc==NULL && srcLength!=0) || srcLength < -1 || - (destCapacity<0) || (dest == NULL && destCapacity > 0) || - subchar > 0x10ffff || U_IS_SURROGATE(subchar) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(pNumSubstitutions!=NULL) { - *pNumSubstitutions=0; - } - numSubstitutions=0; - - if(srcLength==-1) { - while((ch=*pSrc)!=0) { - ++pSrc; - if(ch <= 0x7f) { - if(pDest= 2) { - *pDest++=(uint8_t)((ch>>6)|0xc0); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - reqLength = 2; - break; - } - } else if(ch <= 0xd7ff || ch >= 0xe000) { - if((pDestLimit - pDest) >= 3) { - *pDest++=(uint8_t)((ch>>12)|0xe0); - *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - reqLength = 3; - break; - } - } else /* ch is a surrogate */ { - int32_t length; - - /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/ - if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { - ++pSrc; - ch=U16_GET_SUPPLEMENTARY(ch, ch2); - } else if(subchar>=0) { - ch=subchar; - ++numSubstitutions; - } else { - /* Unicode 3.2 forbids surrogate code points in UTF-8 */ - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } - - length = U8_LENGTH(ch); - if((pDestLimit - pDest) >= length) { - /* convert and append*/ - pDest=_appendUTF8(pDest, ch); - } else { - reqLength = length; - break; - } - } - } - while((ch=*pSrc++)!=0) { - if(ch<=0x7f) { - ++reqLength; - } else if(ch<=0x7ff) { - reqLength+=2; - } else if(!U16_IS_SURROGATE(ch)) { - reqLength+=3; - } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { - ++pSrc; - reqLength+=4; - } else if(subchar>=0) { - reqLength+=U8_LENGTH(subchar); - ++numSubstitutions; - } else { - /* Unicode 3.2 forbids surrogate code points in UTF-8 */ - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } - } - } else { - const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL; - int32_t count; - - /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ - for(;;) { - /* - * Each iteration of the inner loop progresses by at most 3 UTF-8 - * bytes and one UChar, for most characters. - * For supplementary code points (4 & 2), which are rare, - * there is an additional adjustment. - */ - count = (int32_t)((pDestLimit - pDest) / 3); - srcLength = (int32_t)(pSrcLimit - pSrc); - if(count > srcLength) { - count = srcLength; /* min(remaining dest/3, remaining src) */ - } - if(count < 3) { - /* - * Too much overhead if we get near the end of the string, - * continue with the next loop. - */ - break; - } - do { - ch=*pSrc++; - if(ch <= 0x7f) { - *pDest++ = (uint8_t)ch; - } else if(ch <= 0x7ff) { - *pDest++=(uint8_t)((ch>>6)|0xc0); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else if(ch <= 0xd7ff || ch >= 0xe000) { - *pDest++=(uint8_t)((ch>>12)|0xe0); - *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else /* ch is a surrogate */ { - /* - * We will read two UChars and probably output four bytes, - * which we didn't account for with computing count, - * so we adjust it here. - */ - if(--count == 0) { - --pSrc; /* undo ch=*pSrc++ for the lead surrogate */ - break; /* recompute count */ - } - - if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { - ++pSrc; - ch=U16_GET_SUPPLEMENTARY(ch, ch2); - - /* writing 4 bytes per 2 UChars is ok */ - *pDest++=(uint8_t)((ch>>18)|0xf0); - *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80); - *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - /* Unicode 3.2 forbids surrogate code points in UTF-8 */ - if(subchar>=0) { - ch=subchar; - ++numSubstitutions; - } else { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } - - /* convert and append*/ - pDest=_appendUTF8(pDest, ch); - } - } - } while(--count > 0); - } - - while(pSrc= 2) { - *pDest++=(uint8_t)((ch>>6)|0xc0); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - reqLength = 2; - break; - } - } else if(ch <= 0xd7ff || ch >= 0xe000) { - if((pDestLimit - pDest) >= 3) { - *pDest++=(uint8_t)((ch>>12)|0xe0); - *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - reqLength = 3; - break; - } - } else /* ch is a surrogate */ { - int32_t length; - - if(U16_IS_SURROGATE_LEAD(ch) && pSrc=0) { - ch=subchar; - ++numSubstitutions; - } else { - /* Unicode 3.2 forbids surrogate code points in UTF-8 */ - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } - - length = U8_LENGTH(ch); - if((pDestLimit - pDest) >= length) { - /* convert and append*/ - pDest=_appendUTF8(pDest, ch); - } else { - reqLength = length; - break; - } - } - } - while(pSrc=0) { - reqLength+=U8_LENGTH(subchar); - ++numSubstitutions; - } else { - /* Unicode 3.2 forbids surrogate code points in UTF-8 */ - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } - } - } - - reqLength+=(int32_t)(pDest - (uint8_t *)dest); - - if(pNumSubstitutions!=NULL) { - *pNumSubstitutions=numSubstitutions; - } - - if(pDestLength){ - *pDestLength = reqLength; - } - - /* Terminate the buffer */ - u_terminateChars(dest, destCapacity, reqLength, pErrorCode); - return dest; -} - -U_CAPI char* U_EXPORT2 -u_strToUTF8(char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *pSrc, - int32_t srcLength, - UErrorCode *pErrorCode){ - return u_strToUTF8WithSub( - dest, destCapacity, pDestLength, - pSrc, srcLength, - U_SENTINEL, NULL, - pErrorCode); -} - -U_CAPI UChar* U_EXPORT2 -u_strFromJavaModifiedUTF8WithSub( - UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode) { - /* args check */ - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (dest==NULL && destCapacity!=0) || destCapacity<0 || - subchar > 0x10ffff || U_IS_SURROGATE(subchar) - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(pNumSubstitutions!=NULL) { - *pNumSubstitutions=0; - } - UChar *pDest = dest; - UChar *pDestLimit = dest+destCapacity; - int32_t reqLength = 0; - int32_t numSubstitutions=0; - - if(srcLength < 0) { - /* - * Transform a NUL-terminated ASCII string. - * Handle non-ASCII strings with slower code. - */ - UChar32 c; - while(((c = (uint8_t)*src) != 0) && c <= 0x7f && (pDest < pDestLimit)) { - *pDest++=(UChar)c; - ++src; - } - if(c == 0) { - reqLength=(int32_t)(pDest - dest); - if(pDestLength) { - *pDestLength = reqLength; - } - - /* Terminate the buffer */ - u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); - return dest; - } - srcLength = static_cast(uprv_strlen(src)); - } - - /* Faster loop without ongoing checking for srcLength and pDestLimit. */ - UChar32 ch; - uint8_t t1, t2; - int32_t i = 0; - for(;;) { - int32_t count = (int32_t)(pDestLimit - pDest); - int32_t count2 = srcLength - i; - if(count >= count2 && srcLength > 0 && U8_IS_SINGLE(*src)) { - /* fast ASCII loop */ - int32_t start = i; - uint8_t b; - while(i < srcLength && U8_IS_SINGLE(b = src[i])) { - *pDest++=b; - ++i; - } - int32_t delta = i - start; - count -= delta; - count2 -= delta; - } - /* - * Each iteration of the inner loop progresses by at most 3 UTF-8 - * bytes and one UChar. - */ - if(subchar > 0xFFFF) { - break; - } - count2 /= 3; - if(count > count2) { - count = count2; /* min(remaining dest, remaining src/3) */ - } - if(count < 3) { - /* - * Too much overhead if we get near the end of the string, - * continue with the next loop. - */ - break; - } - do { - ch = (uint8_t)src[i++]; - if(U8_IS_SINGLE(ch)) { - *pDest++=(UChar)ch; - } else { - if(ch >= 0xe0) { - if( /* handle U+0000..U+FFFF inline */ - ch <= 0xef && - (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f && - (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f - ) { - /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ - *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); - i += 2; - continue; - } - } else { - if( /* handle U+0000..U+07FF inline */ - ch >= 0xc0 && - (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f - ) { - *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); - ++i; - continue; - } - } - - if(subchar < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else if(subchar > 0xffff && --count == 0) { - /* - * We need to write two UChars, adjusted count for that, - * and ran out of space. - */ - --i; // back out byte ch - break; - } else { - /* function call for error cases */ - utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); - ++numSubstitutions; - *(pDest++)=(UChar)subchar; - } - } - } while(--count > 0); - } - - while(i < srcLength && (pDest < pDestLimit)) { - ch = (uint8_t)src[i++]; - if(U8_IS_SINGLE(ch)){ - *pDest++=(UChar)ch; - } else { - if(ch >= 0xe0) { - if( /* handle U+0000..U+FFFF inline */ - ch <= 0xef && - (i+1) < srcLength && - (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f && - (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f - ) { - /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ - *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); - i += 2; - continue; - } - } else { - if( /* handle U+0000..U+07FF inline */ - ch >= 0xc0 && - i < srcLength && - (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f - ) { - *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); - ++i; - continue; - } - } - - if(subchar < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else { - /* function call for error cases */ - utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); - ++numSubstitutions; - if(subchar<=0xFFFF) { - *(pDest++)=(UChar)subchar; - } else { - *(pDest++)=U16_LEAD(subchar); - if(pDest= 0xe0) { - if( /* handle U+0000..U+FFFF inline */ - ch <= 0xef && - (i+1) < srcLength && - (uint8_t)(src[i] - 0x80) <= 0x3f && - (uint8_t)(src[i+1] - 0x80) <= 0x3f - ) { - reqLength++; - i += 2; - continue; - } - } else { - if( /* handle U+0000..U+07FF inline */ - ch >= 0xc0 && - i < srcLength && - (uint8_t)(src[i] - 0x80) <= 0x3f - ) { - reqLength++; - ++i; - continue; - } - } - - if(subchar < 0) { - *pErrorCode = U_INVALID_CHAR_FOUND; - return NULL; - } else { - /* function call for error cases */ - utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); - ++numSubstitutions; - reqLength+=U16_LENGTH(ch); - } - } - } - - if(pNumSubstitutions!=NULL) { - *pNumSubstitutions=numSubstitutions; - } - - reqLength+=(int32_t)(pDest - dest); - if(pDestLength) { - *pDestLength = reqLength; - } - - /* Terminate the buffer */ - u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); - return dest; -} - -U_CAPI char* U_EXPORT2 -u_strToJavaModifiedUTF8( - char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode) { - int32_t reqLength=0; - uint32_t ch=0; - uint8_t *pDest = (uint8_t *)dest; - uint8_t *pDestLimit = pDest + destCapacity; - const UChar *pSrcLimit; - int32_t count; - - /* args check */ - if(U_FAILURE(*pErrorCode)){ - return NULL; - } - if( (src==NULL && srcLength!=0) || srcLength < -1 || - (dest==NULL && destCapacity!=0) || destCapacity<0 - ) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(srcLength==-1) { - /* Convert NUL-terminated ASCII, then find the string length. */ - while((ch=*src)<=0x7f && ch != 0 && pDest= srcLength && srcLength > 0 && *src <= 0x7f) { - /* fast ASCII loop */ - const UChar *prevSrc = src; - int32_t delta; - while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) { - *pDest++=(uint8_t)ch; - ++src; - } - delta = (int32_t)(src - prevSrc); - count -= delta; - srcLength -= delta; - } - /* - * Each iteration of the inner loop progresses by at most 3 UTF-8 - * bytes and one UChar. - */ - count /= 3; - if(count > srcLength) { - count = srcLength; /* min(remaining dest/3, remaining src) */ - } - if(count < 3) { - /* - * Too much overhead if we get near the end of the string, - * continue with the next loop. - */ - break; - } - do { - ch=*src++; - if(ch <= 0x7f && ch != 0) { - *pDest++ = (uint8_t)ch; - } else if(ch <= 0x7ff) { - *pDest++=(uint8_t)((ch>>6)|0xc0); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - *pDest++=(uint8_t)((ch>>12)|0xe0); - *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } - } while(--count > 0); - } - - while(src= 2) { - *pDest++=(uint8_t)((ch>>6)|0xc0); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - reqLength = 2; - break; - } - } else { - if((pDestLimit - pDest) >= 3) { - *pDest++=(uint8_t)((ch>>12)|0xe0); - *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); - *pDest++=(uint8_t)((ch&0x3f)|0x80); - } else { - reqLength = 3; - break; - } - } - } - while(srcpFuncs->access(ut, index, forward); -} - - - -U_CAPI UBool U_EXPORT2 -utext_moveIndex32(UText *ut, int32_t delta) { - UChar32 c; - if (delta > 0) { - do { - if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) { - return FALSE; - } - c = ut->chunkContents[ut->chunkOffset]; - if (U16_IS_SURROGATE(c)) { - c = utext_next32(ut); - if (c == U_SENTINEL) { - return FALSE; - } - } else { - ut->chunkOffset++; - } - } while(--delta>0); - - } else if (delta<0) { - do { - if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) { - return FALSE; - } - c = ut->chunkContents[ut->chunkOffset-1]; - if (U16_IS_SURROGATE(c)) { - c = utext_previous32(ut); - if (c == U_SENTINEL) { - return FALSE; - } - } else { - ut->chunkOffset--; - } - } while(++delta<0); - } - - return TRUE; -} - - -U_CAPI int64_t U_EXPORT2 -utext_nativeLength(UText *ut) { - return ut->pFuncs->nativeLength(ut); -} - - -U_CAPI UBool U_EXPORT2 -utext_isLengthExpensive(const UText *ut) { - UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0; - return r; -} - - -U_CAPI int64_t U_EXPORT2 -utext_getNativeIndex(const UText *ut) { - if(ut->chunkOffset <= ut->nativeIndexingLimit) { - return ut->chunkNativeStart+ut->chunkOffset; - } else { - return ut->pFuncs->mapOffsetToNative(ut); - } -} - - -U_CAPI void U_EXPORT2 -utext_setNativeIndex(UText *ut, int64_t index) { - if(indexchunkNativeStart || index>=ut->chunkNativeLimit) { - // The desired position is outside of the current chunk. - // Access the new position. Assume a forward iteration from here, - // which will also be optimimum for a single random access. - // Reverse iterations may suffer slightly. - ut->pFuncs->access(ut, index, TRUE); - } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) { - // utf-16 indexing. - ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart); - } else { - ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index); - } - // The convention is that the index must always be on a code point boundary. - // Adjust the index position if it is in the middle of a surrogate pair. - if (ut->chunkOffsetchunkLength) { - UChar c= ut->chunkContents[ut->chunkOffset]; - if (U16_IS_TRAIL(c)) { - if (ut->chunkOffset==0) { - ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE); - } - if (ut->chunkOffset>0) { - UChar lead = ut->chunkContents[ut->chunkOffset-1]; - if (U16_IS_LEAD(lead)) { - ut->chunkOffset--; - } - } - } - } -} - - - -U_CAPI int64_t U_EXPORT2 -utext_getPreviousNativeIndex(UText *ut) { - // - // Fast-path the common case. - // Common means current position is not at the beginning of a chunk - // and the preceding character is not supplementary. - // - int32_t i = ut->chunkOffset - 1; - int64_t result; - if (i >= 0) { - UChar c = ut->chunkContents[i]; - if (U16_IS_TRAIL(c) == FALSE) { - if (i <= ut->nativeIndexingLimit) { - result = ut->chunkNativeStart + i; - } else { - ut->chunkOffset = i; - result = ut->pFuncs->mapOffsetToNative(ut); - ut->chunkOffset++; - } - return result; - } - } - - // If at the start of text, simply return 0. - if (ut->chunkOffset==0 && ut->chunkNativeStart==0) { - return 0; - } - - // Harder, less common cases. We are at a chunk boundary, or on a surrogate. - // Keep it simple, use other functions to handle the edges. - // - utext_previous32(ut); - result = UTEXT_GETNATIVEINDEX(ut); - utext_next32(ut); - return result; -} - - -// -// utext_current32. Get the UChar32 at the current position. -// UText iteration position is always on a code point boundary, -// never on the trail half of a surrogate pair. -// -U_CAPI UChar32 U_EXPORT2 -utext_current32(UText *ut) { - UChar32 c; - if (ut->chunkOffset==ut->chunkLength) { - // Current position is just off the end of the chunk. - if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { - // Off the end of the text. - return U_SENTINEL; - } - } - - c = ut->chunkContents[ut->chunkOffset]; - if (U16_IS_LEAD(c) == FALSE) { - // Normal, non-supplementary case. - return c; - } - - // - // Possible supplementary char. - // - UChar32 trail = 0; - UChar32 supplementaryC = c; - if ((ut->chunkOffset+1) < ut->chunkLength) { - // The trail surrogate is in the same chunk. - trail = ut->chunkContents[ut->chunkOffset+1]; - } else { - // The trail surrogate is in a different chunk. - // Because we must maintain the iteration position, we need to switch forward - // into the new chunk, get the trail surrogate, then revert the chunk back to the - // original one. - // An edge case to be careful of: the entire text may end with an unpaired - // leading surrogate. The attempt to access the trail will fail, but - // the original position before the unpaired lead still needs to be restored. - int64_t nativePosition = ut->chunkNativeLimit; - int32_t originalOffset = ut->chunkOffset; - if (ut->pFuncs->access(ut, nativePosition, TRUE)) { - trail = ut->chunkContents[ut->chunkOffset]; - } - UBool r = ut->pFuncs->access(ut, nativePosition, FALSE); // reverse iteration flag loads preceding chunk - U_ASSERT(r==TRUE); - ut->chunkOffset = originalOffset; - if(!r) { - return U_SENTINEL; - } - } - - if (U16_IS_TRAIL(trail)) { - supplementaryC = U16_GET_SUPPLEMENTARY(c, trail); - } - return supplementaryC; - -} - - -U_CAPI UChar32 U_EXPORT2 -utext_char32At(UText *ut, int64_t nativeIndex) { - UChar32 c = U_SENTINEL; - - // Fast path the common case. - if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) { - ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart); - c = ut->chunkContents[ut->chunkOffset]; - if (U16_IS_SURROGATE(c) == FALSE) { - return c; - } - } - - - utext_setNativeIndex(ut, nativeIndex); - if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffsetchunkLength) { - c = ut->chunkContents[ut->chunkOffset]; - if (U16_IS_SURROGATE(c)) { - // For surrogates, let current32() deal with the complications - // of supplementaries that may span chunk boundaries. - c = utext_current32(ut); - } - } - return c; -} - - -U_CAPI UChar32 U_EXPORT2 -utext_next32(UText *ut) { - UChar32 c; - - if (ut->chunkOffset >= ut->chunkLength) { - if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { - return U_SENTINEL; - } - } - - c = ut->chunkContents[ut->chunkOffset++]; - if (U16_IS_LEAD(c) == FALSE) { - // Normal case, not supplementary. - // (A trail surrogate seen here is just returned as is, as a surrogate value. - // It cannot be part of a pair.) - return c; - } - - if (ut->chunkOffset >= ut->chunkLength) { - if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { - // c is an unpaired lead surrogate at the end of the text. - // return it as it is. - return c; - } - } - UChar32 trail = ut->chunkContents[ut->chunkOffset]; - if (U16_IS_TRAIL(trail) == FALSE) { - // c was an unpaired lead surrogate, not at the end of the text. - // return it as it is (unpaired). Iteration position is on the - // following character, possibly in the next chunk, where the - // trail surrogate would have been if it had existed. - return c; - } - - UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail); - ut->chunkOffset++; // move iteration position over the trail surrogate. - return supplementary; - } - - -U_CAPI UChar32 U_EXPORT2 -utext_previous32(UText *ut) { - UChar32 c; - - if (ut->chunkOffset <= 0) { - if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) { - return U_SENTINEL; - } - } - ut->chunkOffset--; - c = ut->chunkContents[ut->chunkOffset]; - if (U16_IS_TRAIL(c) == FALSE) { - // Normal case, not supplementary. - // (A lead surrogate seen here is just returned as is, as a surrogate value. - // It cannot be part of a pair.) - return c; - } - - if (ut->chunkOffset <= 0) { - if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) { - // c is an unpaired trail surrogate at the start of the text. - // return it as it is. - return c; - } - } - - UChar32 lead = ut->chunkContents[ut->chunkOffset-1]; - if (U16_IS_LEAD(lead) == FALSE) { - // c was an unpaired trail surrogate, not at the end of the text. - // return it as it is (unpaired). Iteration position is at c - return c; - } - - UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c); - ut->chunkOffset--; // move iteration position over the lead surrogate. - return supplementary; -} - - - -U_CAPI UChar32 U_EXPORT2 -utext_next32From(UText *ut, int64_t index) { - UChar32 c = U_SENTINEL; - - if(indexchunkNativeStart || index>=ut->chunkNativeLimit) { - // Desired position is outside of the current chunk. - if(!ut->pFuncs->access(ut, index, TRUE)) { - // no chunk available here - return U_SENTINEL; - } - } else if (index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) { - // Desired position is in chunk, with direct 1:1 native to UTF16 indexing - ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart); - } else { - // Desired position is in chunk, with non-UTF16 indexing. - ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index); - } - - c = ut->chunkContents[ut->chunkOffset++]; - if (U16_IS_SURROGATE(c)) { - // Surrogates. Many edge cases. Use other functions that already - // deal with the problems. - utext_setNativeIndex(ut, index); - c = utext_next32(ut); - } - return c; -} - - -U_CAPI UChar32 U_EXPORT2 -utext_previous32From(UText *ut, int64_t index) { - // - // Return the character preceding the specified index. - // Leave the iteration position at the start of the character that was returned. - // - UChar32 cPrev; // The character preceding cCurr, which is what we will return. - - // Address the chunk containg the position preceding the incoming index - // A tricky edge case: - // We try to test the requested native index against the chunkNativeStart to determine - // whether the character preceding the one at the index is in the current chunk. - // BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the - // requested index is on something other than the first position of the first char. - // - if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) { - // Requested native index is outside of the current chunk. - if(!ut->pFuncs->access(ut, index, FALSE)) { - // no chunk available here - return U_SENTINEL; - } - } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) { - // Direct UTF-16 indexing. - ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart); - } else { - ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index); - if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) { - // no chunk available here - return U_SENTINEL; - } - } - - // - // Simple case with no surrogates. - // - ut->chunkOffset--; - cPrev = ut->chunkContents[ut->chunkOffset]; - - if (U16_IS_SURROGATE(cPrev)) { - // Possible supplementary. Many edge cases. - // Let other functions do the heavy lifting. - utext_setNativeIndex(ut, index); - cPrev = utext_previous32(ut); - } - return cPrev; -} - - -U_CAPI int32_t U_EXPORT2 -utext_extract(UText *ut, - int64_t start, int64_t limit, - UChar *dest, int32_t destCapacity, - UErrorCode *status) { - return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status); - } - - - -U_CAPI UBool U_EXPORT2 -utext_equals(const UText *a, const UText *b) { - if (a==NULL || b==NULL || - a->magic != UTEXT_MAGIC || - b->magic != UTEXT_MAGIC) { - // Null or invalid arguments don't compare equal to anything. - return FALSE; - } - - if (a->pFuncs != b->pFuncs) { - // Different types of text providers. - return FALSE; - } - - if (a->context != b->context) { - // Different sources (different strings) - return FALSE; - } - if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) { - // Different current position in the string. - return FALSE; - } - - return TRUE; -} - -U_CAPI UBool U_EXPORT2 -utext_isWritable(const UText *ut) -{ - UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0; - return b; -} - - -U_CAPI void U_EXPORT2 -utext_freeze(UText *ut) { - // Zero out the WRITABLE flag. - ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE)); -} - - -U_CAPI UBool U_EXPORT2 -utext_hasMetaData(const UText *ut) -{ - UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0; - return b; -} - - - -U_CAPI int32_t U_EXPORT2 -utext_replace(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - const UChar *replacementText, int32_t replacementLength, - UErrorCode *status) -{ - if (U_FAILURE(*status)) { - return 0; - } - if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) { - *status = U_NO_WRITE_PERMISSION; - return 0; - } - int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status); - return i; -} - -U_CAPI void U_EXPORT2 -utext_copy(UText *ut, - int64_t nativeStart, int64_t nativeLimit, - int64_t destIndex, - UBool move, - UErrorCode *status) -{ - if (U_FAILURE(*status)) { - return; - } - if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) { - *status = U_NO_WRITE_PERMISSION; - return; - } - ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status); -} - - - -U_CAPI UText * U_EXPORT2 -utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) { - if (U_FAILURE(*status)) { - return dest; - } - UText *result = src->pFuncs->clone(dest, src, deep, status); - if (U_FAILURE(*status)) { - return result; - } - if (result == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return result; - } - if (readOnly) { - utext_freeze(result); - } - return result; -} - - - -//------------------------------------------------------------------------------ -// -// UText common functions implementation -// -//------------------------------------------------------------------------------ - -// -// UText.flags bit definitions -// -enum { - UTEXT_HEAP_ALLOCATED = 1, // 1 if ICU has allocated this UText struct on the heap. - // 0 if caller provided storage for the UText. - - UTEXT_EXTRA_HEAP_ALLOCATED = 2, // 1 if ICU has allocated extra storage as a separate - // heap block. - // 0 if there is no separate allocation. Either no extra - // storage was requested, or it is appended to the end - // of the main UText storage. - - UTEXT_OPEN = 4 // 1 if this UText is currently open - // 0 if this UText is not open. -}; - - -// -// Extended form of a UText. The purpose is to aid in computing the total size required -// when a provider asks for a UText to be allocated with extra storage. - -struct ExtendedUText { - UText ut; - UAlignedMemory extension; -}; - -static const UText emptyText = UTEXT_INITIALIZER; - -U_CAPI UText * U_EXPORT2 -utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) { - if (U_FAILURE(*status)) { - return ut; - } - - if (ut == NULL) { - // We need to heap-allocate storage for the new UText - int32_t spaceRequired = sizeof(UText); - if (extraSpace > 0) { - spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(UAlignedMemory); - } - ut = (UText *)uprv_malloc(spaceRequired); - if (ut == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } else { - *ut = emptyText; - ut->flags |= UTEXT_HEAP_ALLOCATED; - if (spaceRequired>0) { - ut->extraSize = extraSpace; - ut->pExtra = &((ExtendedUText *)ut)->extension; - } - } - } else { - // We have been supplied with an already existing UText. - // Verify that it really appears to be a UText. - if (ut->magic != UTEXT_MAGIC) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return ut; - } - // If the ut is already open and there's a provider supplied close - // function, call it. - if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL) { - ut->pFuncs->close(ut); - } - ut->flags &= ~UTEXT_OPEN; - - // If extra space was requested by our caller, check whether - // sufficient already exists, and allocate new if needed. - if (extraSpace > ut->extraSize) { - // Need more space. If there is existing separately allocated space, - // delete it first, then allocate new space. - if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) { - uprv_free(ut->pExtra); - ut->extraSize = 0; - } - ut->pExtra = uprv_malloc(extraSpace); - if (ut->pExtra == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - ut->extraSize = extraSpace; - ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED; - } - } - } - if (U_SUCCESS(*status)) { - ut->flags |= UTEXT_OPEN; - - // Initialize all remaining fields of the UText. - // - ut->context = NULL; - ut->chunkContents = NULL; - ut->p = NULL; - ut->q = NULL; - ut->r = NULL; - ut->a = 0; - ut->b = 0; - ut->c = 0; - ut->chunkOffset = 0; - ut->chunkLength = 0; - ut->chunkNativeStart = 0; - ut->chunkNativeLimit = 0; - ut->nativeIndexingLimit = 0; - ut->providerProperties = 0; - ut->privA = 0; - ut->privB = 0; - ut->privC = 0; - ut->privP = NULL; - if (ut->pExtra!=NULL && ut->extraSize>0) - uprv_memset(ut->pExtra, 0, ut->extraSize); - - } - return ut; -} - - -U_CAPI UText * U_EXPORT2 -utext_close(UText *ut) { - if (ut==NULL || - ut->magic != UTEXT_MAGIC || - (ut->flags & UTEXT_OPEN) == 0) - { - // The supplied ut is not an open UText. - // Do nothing. - return ut; - } - - // If the provider gave us a close function, call it now. - // This will clean up anything allocated specifically by the provider. - if (ut->pFuncs->close != NULL) { - ut->pFuncs->close(ut); - } - ut->flags &= ~UTEXT_OPEN; - - // If we (the framework) allocated the UText or subsidiary storage, - // delete it. - if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) { - uprv_free(ut->pExtra); - ut->pExtra = NULL; - ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED; - ut->extraSize = 0; - } - - // Zero out function table of the closed UText. This is a defensive move, - // inteded to cause applications that inadvertantly use a closed - // utext to crash with null pointer errors. - ut->pFuncs = NULL; - - if (ut->flags & UTEXT_HEAP_ALLOCATED) { - // This UText was allocated by UText setup. We need to free it. - // Clear magic, so we can detect if the user messes up and immediately - // tries to reopen another UText using the deleted storage. - ut->magic = 0; - uprv_free(ut); - ut = NULL; - } - return ut; -} - - - - -// -// invalidateChunk Reset a chunk to have no contents, so that the next call -// to access will cause new data to load. -// This is needed when copy/move/replace operate directly on the -// backing text, potentially putting it out of sync with the -// contents in the chunk. -// -static void -invalidateChunk(UText *ut) { - ut->chunkLength = 0; - ut->chunkNativeLimit = 0; - ut->chunkNativeStart = 0; - ut->chunkOffset = 0; - ut->nativeIndexingLimit = 0; -} - -// -// pinIndex Do range pinning on a native index parameter. -// 64 bit pinning is done in place. -// 32 bit truncated result is returned as a convenience for -// use in providers that don't need 64 bits. -static int32_t -pinIndex(int64_t &index, int64_t limit) { - if (index<0) { - index = 0; - } else if (index > limit) { - index = limit; - } - return (int32_t)index; -} - - -U_CDECL_BEGIN - -// -// Pointer relocation function, -// a utility used by shallow clone. -// Adjust a pointer that refers to something within one UText (the source) -// to refer to the same relative offset within a another UText (the target) -// -static void adjustPointer(UText *dest, const void **destPtr, const UText *src) { - // convert all pointers to (char *) so that byte address arithmetic will work. - char *dptr = (char *)*destPtr; - char *dUText = (char *)dest; - char *sUText = (char *)src; - - if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) { - // target ptr was to something within the src UText's pExtra storage. - // relocate it into the target UText's pExtra region. - *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra); - } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) { - // target ptr was pointing to somewhere within the source UText itself. - // Move it to the same offset within the target UText. - *destPtr = dUText + (dptr-sUText); - } -} - - -// -// Clone. This is a generic copy-the-utext-by-value clone function that can be -// used as-is with some utext types, and as a helper by other clones. -// -static UText * U_CALLCONV -shallowTextClone(UText * dest, const UText * src, UErrorCode * status) { - if (U_FAILURE(*status)) { - return NULL; - } - int32_t srcExtraSize = src->extraSize; - - // - // Use the generic text_setup to allocate storage if required. - // - dest = utext_setup(dest, srcExtraSize, status); - if (U_FAILURE(*status)) { - return dest; - } - - // - // flags (how the UText was allocated) and the pointer to the - // extra storage must retain the values in the cloned utext that - // were set up by utext_setup. Save them separately before - // copying the whole struct. - // - void *destExtra = dest->pExtra; - int32_t flags = dest->flags; - - - // - // Copy the whole UText struct by value. - // Any "Extra" storage is copied also. - // - int sizeToCopy = src->sizeOfStruct; - if (sizeToCopy > dest->sizeOfStruct) { - sizeToCopy = dest->sizeOfStruct; - } - uprv_memcpy(dest, src, sizeToCopy); - dest->pExtra = destExtra; - dest->flags = flags; - if (srcExtraSize > 0) { - uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize); - } - - // - // Relocate any pointers in the target that refer to the UText itself - // to point to the cloned copy rather than the original source. - // - adjustPointer(dest, &dest->context, src); - adjustPointer(dest, &dest->p, src); - adjustPointer(dest, &dest->q, src); - adjustPointer(dest, &dest->r, src); - adjustPointer(dest, (const void **)&dest->chunkContents, src); - - // The newly shallow-cloned UText does _not_ own the underlying storage for the text. - // (The source for the clone may or may not have owned the text.) - - dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); - - return dest; -} - - -U_CDECL_END - - - -//------------------------------------------------------------------------------ -// -// UText implementation for UTF-8 char * strings (read-only) -// Limitation: string length must be <= 0x7fffffff in length. -// (length must for in an int32_t variable) -// -// Use of UText data members: -// context pointer to UTF-8 string -// utext.b is the input string length (bytes). -// utext.c Length scanned so far in string -// (for optimizing finding length of zero terminated strings.) -// utext.p pointer to the current buffer -// utext.q pointer to the other buffer. -// -//------------------------------------------------------------------------------ - -// Chunk size. -// Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes. -// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes -// to two UChars.) -// The longest illegal byte sequence treated as a single error (and converted to U+FFFD) -// is a three-byte sequence (truncated four-byte sequence). -// -enum { UTF8_TEXT_CHUNK_SIZE=32 }; - -// -// UTF8Buf Two of these structs will be set up in the UText's extra allocated space. -// Each contains the UChar chunk buffer, the to and from native maps, and -// header info. -// -// because backwards iteration fills the buffers starting at the end and -// working towards the front, the filled part of the buffers may not begin -// at the start of the available storage for the buffers. -// -// Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for -// the last character added being a supplementary, and thus requiring a surrogate -// pair. Doing this is simpler than checking for the edge case. -// - -struct UTF8Buf { - int32_t bufNativeStart; // Native index of first char in UChar buf - int32_t bufNativeLimit; // Native index following last char in buf. - int32_t bufStartIdx; // First filled position in buf. - int32_t bufLimitIdx; // Limit of filled range in buf. - int32_t bufNILimit; // Limit of native indexing part of buf - int32_t toUCharsMapStart; // Native index corresponding to - // mapToUChars[0]. - // Set to bufNativeStart when filling forwards. - // Set to computed value when filling backwards. - - UChar buf[UTF8_TEXT_CHUNK_SIZE+4]; // The UChar buffer. Requires one extra position beyond the - // the chunk size, to allow for surrogate at the end. - // Length must be identical to mapToNative array, below, - // because of the way indexing works when the array is - // filled backwards during a reverse iteration. Thus, - // the additional extra size. - uint8_t mapToNative[UTF8_TEXT_CHUNK_SIZE+4]; // map UChar index in buf to - // native offset from bufNativeStart. - // Requires two extra slots, - // one for a supplementary starting in the last normal position, - // and one for an entry for the buffer limit position. - uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to - // correspoding offset in filled part of buf. - int32_t align; -}; - -U_CDECL_BEGIN - -// -// utf8TextLength -// -// Get the length of the string. If we don't already know it, -// we'll need to scan for the trailing nul. -// -static int64_t U_CALLCONV -utf8TextLength(UText *ut) { - if (ut->b < 0) { - // Zero terminated string, and we haven't scanned to the end yet. - // Scan it now. - const char *r = (const char *)ut->context + ut->c; - while (*r != 0) { - r++; - } - if ((r - (const char *)ut->context) < 0x7fffffff) { - ut->b = (int32_t)(r - (const char *)ut->context); - } else { - // Actual string was bigger (more than 2 gig) than we - // can handle. Clip it to 2 GB. - ut->b = 0x7fffffff; - } - ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - } - return ut->b; -} - - - - - - -static UBool U_CALLCONV -utf8TextAccess(UText *ut, int64_t index, UBool forward) { - // - // Apologies to those who are allergic to goto statements. - // Consider each goto to a labelled block to be the equivalent of - // call the named block as if it were a function(); - // return; - // - const uint8_t *s8=(const uint8_t *)ut->context; - UTF8Buf *u8b = NULL; - int32_t length = ut->b; // Length of original utf-8 - int32_t ix= (int32_t)index; // Requested index, trimmed to 32 bits. - int32_t mapIndex = 0; - if (index<0) { - ix=0; - } else if (index > 0x7fffffff) { - // Strings with 64 bit lengths not supported by this UTF-8 provider. - ix = 0x7fffffff; - } - - // Pin requested index to the string length. - if (ix>length) { - if (length>=0) { - ix=length; - } else if (ix>=ut->c) { - // Zero terminated string, and requested index is beyond - // the region that has already been scanned. - // Scan up to either the end of the string or to the - // requested position, whichever comes first. - while (ut->cc]!=0) { - ut->c++; - } - // TODO: support for null terminated string length > 32 bits. - if (s8[ut->c] == 0) { - // We just found the actual length of the string. - // Trim the requested index back to that. - ix = ut->c; - ut->b = ut->c; - length = ut->c; - ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - } - } - } - - // - // Dispatch to the appropriate action for a forward iteration request. - // - if (forward) { - if (ix==ut->chunkNativeLimit) { - // Check for normal sequential iteration cases first. - if (ix==length) { - // Just reached end of string - // Don't swap buffers, but do set the - // current buffer position. - ut->chunkOffset = ut->chunkLength; - return FALSE; - } else { - // End of current buffer. - // check whether other buffer already has what we need. - UTF8Buf *altB = (UTF8Buf *)ut->q; - if (ix>=altB->bufNativeStart && ixbufNativeLimit) { - goto swapBuffers; - } - } - } - - // A random access. Desired index could be in either or niether buf. - // For optimizing the order of testing, first check for the index - // being in the other buffer. This will be the case for uses that - // move back and forth over a fairly limited range - { - u8b = (UTF8Buf *)ut->q; // the alternate buffer - if (ix>=u8b->bufNativeStart && ixbufNativeLimit) { - // Requested index is in the other buffer. - goto swapBuffers; - } - if (ix == length) { - // Requested index is end-of-string. - // (this is the case of randomly seeking to the end. - // The case of iterating off the end is handled earlier.) - if (ix == ut->chunkNativeLimit) { - // Current buffer extends up to the end of the string. - // Leave it as the current buffer. - ut->chunkOffset = ut->chunkLength; - return FALSE; - } - if (ix == u8b->bufNativeLimit) { - // Alternate buffer extends to the end of string. - // Swap it in as the current buffer. - goto swapBuffersAndFail; - } - - // Neither existing buffer extends to the end of the string. - goto makeStubBuffer; - } - - if (ixchunkNativeStart || ix>=ut->chunkNativeLimit) { - // Requested index is in neither buffer. - goto fillForward; - } - - // Requested index is in this buffer. - u8b = (UTF8Buf *)ut->p; // the current buffer - mapIndex = ix - u8b->toUCharsMapStart; - U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); - ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; - return TRUE; - - } - } - - - // - // Dispatch to the appropriate action for a - // Backwards Diretion iteration request. - // - if (ix==ut->chunkNativeStart) { - // Check for normal sequential iteration cases first. - if (ix==0) { - // Just reached the start of string - // Don't swap buffers, but do set the - // current buffer position. - ut->chunkOffset = 0; - return FALSE; - } else { - // Start of current buffer. - // check whether other buffer already has what we need. - UTF8Buf *altB = (UTF8Buf *)ut->q; - if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) { - goto swapBuffers; - } - } - } - - // A random access. Desired index could be in either or niether buf. - // For optimizing the order of testing, - // Most likely case: in the other buffer. - // Second most likely: in neither buffer. - // Unlikely, but must work: in the current buffer. - u8b = (UTF8Buf *)ut->q; // the alternate buffer - if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) { - // Requested index is in the other buffer. - goto swapBuffers; - } - // Requested index is start-of-string. - // (this is the case of randomly seeking to the start. - // The case of iterating off the start is handled earlier.) - if (ix==0) { - if (u8b->bufNativeStart==0) { - // Alternate buffer contains the data for the start string. - // Make it be the current buffer. - goto swapBuffersAndFail; - } else { - // Request for data before the start of string, - // neither buffer is usable. - // set up a zero-length buffer. - goto makeStubBuffer; - } - } - - if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) { - // Requested index is in neither buffer. - goto fillReverse; - } - - // Requested index is in this buffer. - // Set the utf16 buffer index. - u8b = (UTF8Buf *)ut->p; - mapIndex = ix - u8b->toUCharsMapStart; - ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; - if (ut->chunkOffset==0) { - // This occurs when the first character in the text is - // a multi-byte UTF-8 char, and the requested index is to - // one of the trailing bytes. Because there is no preceding , - // character, this access fails. We can't pick up on the - // situation sooner because the requested index is not zero. - return FALSE; - } else { - return TRUE; - } - - - -swapBuffers: - // The alternate buffer (ut->q) has the string data that was requested. - // Swap the primary and alternate buffers, and set the - // chunk index into the new primary buffer. - { - u8b = (UTF8Buf *)ut->q; - ut->q = ut->p; - ut->p = u8b; - ut->chunkContents = &u8b->buf[u8b->bufStartIdx]; - ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx; - ut->chunkNativeStart = u8b->bufNativeStart; - ut->chunkNativeLimit = u8b->bufNativeLimit; - ut->nativeIndexingLimit = u8b->bufNILimit; - - // Index into the (now current) chunk - // Use the map to set the chunk index. It's more trouble than it's worth - // to check whether native indexing can be used. - U_ASSERT(ix>=u8b->bufNativeStart); - U_ASSERT(ix<=u8b->bufNativeLimit); - mapIndex = ix - u8b->toUCharsMapStart; - U_ASSERT(mapIndex>=0); - U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars)); - ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; - - return TRUE; - } - - - swapBuffersAndFail: - // We got a request for either the start or end of the string, - // with iteration continuing in the out-of-bounds direction. - // The alternate buffer already contains the data up to the - // start/end. - // Swap the buffers, then return failure, indicating that we couldn't - // make things correct for continuing the iteration in the requested - // direction. The position & buffer are correct should the - // user decide to iterate in the opposite direction. - u8b = (UTF8Buf *)ut->q; - ut->q = ut->p; - ut->p = u8b; - ut->chunkContents = &u8b->buf[u8b->bufStartIdx]; - ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx; - ut->chunkNativeStart = u8b->bufNativeStart; - ut->chunkNativeLimit = u8b->bufNativeLimit; - ut->nativeIndexingLimit = u8b->bufNILimit; - - // Index into the (now current) chunk - // For this function (swapBuffersAndFail), the requested index - // will always be at either the start or end of the chunk. - if (ix==u8b->bufNativeLimit) { - ut->chunkOffset = ut->chunkLength; - } else { - ut->chunkOffset = 0; - U_ASSERT(ix == u8b->bufNativeStart); - } - return FALSE; - -makeStubBuffer: - // The user has done a seek/access past the start or end - // of the string. Rather than loading data that is likely - // to never be used, just set up a zero-length buffer at - // the position. - u8b = (UTF8Buf *)ut->q; - u8b->bufNativeStart = ix; - u8b->bufNativeLimit = ix; - u8b->bufStartIdx = 0; - u8b->bufLimitIdx = 0; - u8b->bufNILimit = 0; - u8b->toUCharsMapStart = ix; - u8b->mapToNative[0] = 0; - u8b->mapToUChars[0] = 0; - goto swapBuffersAndFail; - - - -fillForward: - { - // Move the incoming index to a code point boundary. - U8_SET_CP_START(s8, 0, ix); - - // Swap the UText buffers. - // We want to fill what was previously the alternate buffer, - // and make what was the current buffer be the new alternate. - UTF8Buf *u8b_swap = (UTF8Buf *)ut->q; - ut->q = ut->p; - ut->p = u8b_swap; - - int32_t strLen = ut->b; - UBool nulTerminated = FALSE; - if (strLen < 0) { - strLen = 0x7fffffff; - nulTerminated = TRUE; - } - - UChar *buf = u8b_swap->buf; - uint8_t *mapToNative = u8b_swap->mapToNative; - uint8_t *mapToUChars = u8b_swap->mapToUChars; - int32_t destIx = 0; - int32_t srcIx = ix; - UBool seenNonAscii = FALSE; - UChar32 c = 0; - - // Fill the chunk buffer and mapping arrays. - while (destIx0 && c<0x80) { - // Special case ASCII range for speed. - // zero is excluded to simplify bounds checking. - buf[destIx] = (UChar)c; - mapToNative[destIx] = (uint8_t)(srcIx - ix); - mapToUChars[srcIx-ix] = (uint8_t)destIx; - srcIx++; - destIx++; - } else { - // General case, handle everything. - if (seenNonAscii == FALSE) { - seenNonAscii = TRUE; - u8b_swap->bufNILimit = destIx; - } - - int32_t cIx = srcIx; - int32_t dIx = destIx; - int32_t dIxSaved = destIx; - U8_NEXT_OR_FFFD(s8, srcIx, strLen, c); - if (c==0 && nulTerminated) { - srcIx--; - break; - } - - U16_APPEND_UNSAFE(buf, destIx, c); - do { - mapToNative[dIx++] = (uint8_t)(cIx - ix); - } while (dIx < destIx); - - do { - mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved; - } while (cIx < srcIx); - } - if (srcIx>=strLen) { - break; - } - - } - - // store Native <--> Chunk Map entries for the end of the buffer. - // There is no actual character here, but the index position is valid. - mapToNative[destIx] = (uint8_t)(srcIx - ix); - mapToUChars[srcIx - ix] = (uint8_t)destIx; - - // fill in Buffer descriptor - u8b_swap->bufNativeStart = ix; - u8b_swap->bufNativeLimit = srcIx; - u8b_swap->bufStartIdx = 0; - u8b_swap->bufLimitIdx = destIx; - if (seenNonAscii == FALSE) { - u8b_swap->bufNILimit = destIx; - } - u8b_swap->toUCharsMapStart = u8b_swap->bufNativeStart; - - // Set UText chunk to refer to this buffer. - ut->chunkContents = buf; - ut->chunkOffset = 0; - ut->chunkLength = u8b_swap->bufLimitIdx; - ut->chunkNativeStart = u8b_swap->bufNativeStart; - ut->chunkNativeLimit = u8b_swap->bufNativeLimit; - ut->nativeIndexingLimit = u8b_swap->bufNILimit; - - // For zero terminated strings, keep track of the maximum point - // scanned so far. - if (nulTerminated && srcIx>ut->c) { - ut->c = srcIx; - if (c==0) { - // We scanned to the end. - // Remember the actual length. - ut->b = srcIx; - ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - } - } - return TRUE; - } - - -fillReverse: - { - // Move the incoming index to a code point boundary. - // Can only do this if the incoming index is somewhere in the interior of the string. - // If index is at the end, there is no character there to look at. - if (ix != ut->b) { - // Note: this function will only move the index back if it is on a trail byte - // and there is a preceding lead byte and the sequence from the lead - // through this trail could be part of a valid UTF-8 sequence - // Otherwise the index remains unchanged. - U8_SET_CP_START(s8, 0, ix); - } - - // Swap the UText buffers. - // We want to fill what was previously the alternate buffer, - // and make what was the current buffer be the new alternate. - UTF8Buf *u8b_swap = (UTF8Buf *)ut->q; - ut->q = ut->p; - ut->p = u8b_swap; - - UChar *buf = u8b_swap->buf; - uint8_t *mapToNative = u8b_swap->mapToNative; - uint8_t *mapToUChars = u8b_swap->mapToUChars; - int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1; - // Note that toUCharsMapStart can be negative. Happens when the remaining - // text from current position to the beginning is less than the buffer size. - // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry. - int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region - // at end of buffer to leave room - // for a surrogate pair at the - // buffer start. - int32_t srcIx = ix; - int32_t bufNILimit = destIx; - UChar32 c; - - // Map to/from Native Indexes, fill in for the position at the end of - // the buffer. - // - mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); - mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; - - // Fill the chunk buffer - // Work backwards, filling from the end of the buffer towards the front. - // - while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) { - srcIx--; - destIx--; - - // Get last byte of the UTF-8 character - c = s8[srcIx]; - if (c<0x80) { - // Special case ASCII range for speed. - buf[destIx] = (UChar)c; - U_ASSERT(toUCharsMapStart <= srcIx); - mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; - mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); - } else { - // General case, handle everything non-ASCII. - - int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char - - // Get the full character from the UTF8 string. - // use code derived from tbe macros in utf8.h - // Leaves srcIx pointing at the first byte of the UTF-8 char. - // - c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3); - // leaves srcIx at first byte of the multi-byte char. - - // Store the character in UTF-16 buffer. - if (c<0x10000) { - buf[destIx] = (UChar)c; - mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); - } else { - buf[destIx] = U16_TRAIL(c); - mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); - buf[--destIx] = U16_LEAD(c); - mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); - } - - // Fill in the map from native indexes to UChars buf index. - do { - mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx; - } while (sIx >= srcIx); - U_ASSERT(toUCharsMapStart <= (srcIx+1)); - - // Set native indexing limit to be the current position. - // We are processing a non-ascii, non-native-indexing char now; - // the limit will be here if the rest of the chars to be - // added to this buffer are ascii. - bufNILimit = destIx; - } - } - u8b_swap->bufNativeStart = srcIx; - u8b_swap->bufNativeLimit = ix; - u8b_swap->bufStartIdx = destIx; - u8b_swap->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2; - u8b_swap->bufNILimit = bufNILimit - u8b_swap->bufStartIdx; - u8b_swap->toUCharsMapStart = toUCharsMapStart; - - ut->chunkContents = &buf[u8b_swap->bufStartIdx]; - ut->chunkLength = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx; - ut->chunkOffset = ut->chunkLength; - ut->chunkNativeStart = u8b_swap->bufNativeStart; - ut->chunkNativeLimit = u8b_swap->bufNativeLimit; - ut->nativeIndexingLimit = u8b_swap->bufNILimit; - return TRUE; - } - -} - - - -// -// This is a slightly modified copy of u_strFromUTF8, -// Inserts a Replacement Char rather than failing on invalid UTF-8 -// Removes unnecessary features. -// -static UChar* -utext_strFromUTF8(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char* src, - int32_t srcLength, // required. NUL terminated not supported. - UErrorCode *pErrorCode - ) -{ - - UChar *pDest = dest; - UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL; - UChar32 ch=0; - int32_t index = 0; - int32_t reqLength = 0; - uint8_t* pSrc = (uint8_t*) src; - - - while((index < srcLength)&&(pDest0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - int32_t length = ut->b; - int32_t start32 = pinIndex(start, length); - int32_t limit32 = pinIndex(limit, length); - - if(start32>limit32) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - - // adjust the incoming indexes to land on code point boundaries if needed. - // adjust by no more than three, because that is the largest number of trail bytes - // in a well formed UTF8 character. - const uint8_t *buf = (const uint8_t *)ut->context; - int i; - if (start32 < ut->chunkNativeLimit) { - for (i=0; i<3; i++) { - if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) { - break; - } - start32--; - } - } - - if (limit32 < ut->chunkNativeLimit) { - for (i=0; i<3; i++) { - if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) { - break; - } - limit32--; - } - } - - // Do the actual extract. - int32_t destLength=0; - utext_strFromUTF8(dest, destCapacity, &destLength, - (const char *)ut->context+start32, limit32-start32, - pErrorCode); - utf8TextAccess(ut, limit32, TRUE); - return destLength; -} - -// -// utf8TextMapOffsetToNative -// -// Map a chunk (UTF-16) offset to a native index. -static int64_t U_CALLCONV -utf8TextMapOffsetToNative(const UText *ut) { - // - UTF8Buf *u8b = (UTF8Buf *)ut->p; - U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength); - int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart; - U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit); - return nativeOffset; -} - -// -// Map a native index to the corrsponding chunk offset -// -static int32_t U_CALLCONV -utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) { - U_ASSERT(index64 <= 0x7fffffff); - int32_t index = (int32_t)index64; - UTF8Buf *u8b = (UTF8Buf *)ut->p; - U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit); - U_ASSERT(index<=ut->chunkNativeLimit); - int32_t mapIndex = index - u8b->toUCharsMapStart; - U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); - int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; - U_ASSERT(offset>=0 && offset<=ut->chunkLength); - return offset; -} - -static UText * U_CALLCONV -utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) -{ - // First do a generic shallow clone. Does everything needed for the UText struct itself. - dest = shallowTextClone(dest, src, status); - - // For deep clones, make a copy of the string. - // The copied storage is owned by the newly created clone. - // - // TODO: There is an isssue with using utext_nativeLength(). - // That function is non-const in cases where the input was NUL terminated - // and the length has not yet been determined. - // This function (clone()) is const. - // There potentially a thread safety issue lurking here. - // - if (deep && U_SUCCESS(*status)) { - int32_t len = (int32_t)utext_nativeLength((UText *)src); - char *copyStr = (char *)uprv_malloc(len+1); - if (copyStr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - uprv_memcpy(copyStr, src->context, len+1); - dest->context = copyStr; - dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); - } - } - return dest; -} - - -static void U_CALLCONV -utf8TextClose(UText *ut) { - // Most of the work of close is done by the generic UText framework close. - // All that needs to be done here is to delete the UTF8 string if the UText - // owns it. This occurs if the UText was created by cloning. - if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { - char *s = (char *)ut->context; - uprv_free(s); - ut->context = NULL; - } -} - -U_CDECL_END - - -static const struct UTextFuncs utf8Funcs = -{ - sizeof(UTextFuncs), - 0, 0, 0, // Reserved alignment padding - utf8TextClone, - utf8TextLength, - utf8TextAccess, - utf8TextExtract, - NULL, /* replace*/ - NULL, /* copy */ - utf8TextMapOffsetToNative, - utf8TextMapIndexToUTF16, - utf8TextClose, - NULL, // spare 1 - NULL, // spare 2 - NULL // spare 3 -}; - - -static const char gEmptyString[] = {0}; - -U_CAPI UText * U_EXPORT2 -utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) { - if(U_FAILURE(*status)) { - return NULL; - } - if(s==NULL && length==0) { - s = gEmptyString; - } - - if(s==NULL || length<-1 || length>INT32_MAX) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status); - if (U_FAILURE(*status)) { - return ut; - } - - ut->pFuncs = &utf8Funcs; - ut->context = s; - ut->b = (int32_t)length; - ut->c = (int32_t)length; - if (ut->c < 0) { - ut->c = 0; - ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - } - ut->p = ut->pExtra; - ut->q = (char *)ut->pExtra + sizeof(UTF8Buf); - return ut; - -} - - - - - - - - -//------------------------------------------------------------------------------ -// -// UText implementation wrapper for Replaceable (read/write) -// -// Use of UText data members: -// context pointer to Replaceable. -// p pointer to Replaceable if it is owned by the UText. -// -//------------------------------------------------------------------------------ - - - -// minimum chunk size for this implementation: 3 -// to allow for possible trimming for code point boundaries -enum { REP_TEXT_CHUNK_SIZE=10 }; - -struct ReplExtra { - /* - * Chunk UChars. - * +1 to simplify filling with surrogate pair at the end. - */ - UChar s[REP_TEXT_CHUNK_SIZE+1]; -}; - - -U_CDECL_BEGIN - -static UText * U_CALLCONV -repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) { - // First do a generic shallow clone. Does everything needed for the UText struct itself. - dest = shallowTextClone(dest, src, status); - - // For deep clones, make a copy of the Replaceable. - // The copied Replaceable storage is owned by the newly created UText clone. - // A non-NULL pointer in UText.p is the signal to the close() function to delete - // it. - // - if (deep && U_SUCCESS(*status)) { - const Replaceable *replSrc = (const Replaceable *)src->context; - dest->context = replSrc->clone(); - dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); - - // with deep clone, the copy is writable, even when the source is not. - dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE); - } - return dest; -} - - -static void U_CALLCONV -repTextClose(UText *ut) { - // Most of the work of close is done by the generic UText framework close. - // All that needs to be done here is delete the Replaceable if the UText - // owns it. This occurs if the UText was created by cloning. - if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { - Replaceable *rep = (Replaceable *)ut->context; - delete rep; - ut->context = NULL; - } -} - - -static int64_t U_CALLCONV -repTextLength(UText *ut) { - const Replaceable *replSrc = (const Replaceable *)ut->context; - int32_t len = replSrc->length(); - return len; -} - - -static UBool U_CALLCONV -repTextAccess(UText *ut, int64_t index, UBool forward) { - const Replaceable *rep=(const Replaceable *)ut->context; - int32_t length=rep->length(); // Full length of the input text (bigger than a chunk) - - // clip the requested index to the limits of the text. - int32_t index32 = pinIndex(index, length); - U_ASSERT(index<=INT32_MAX); - - - /* - * Compute start/limit boundaries around index, for a segment of text - * to be extracted. - * To allow for the possibility that our user gave an index to the trailing - * half of a surrogate pair, we must request one extra preceding UChar when - * going in the forward direction. This will ensure that the buffer has the - * entire code point at the specified index. - */ - if(forward) { - - if (index32>=ut->chunkNativeStart && index32chunkNativeLimit) { - // Buffer already contains the requested position. - ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart); - return TRUE; - } - if (index32>=length && ut->chunkNativeLimit==length) { - // Request for end of string, and buffer already extends up to it. - // Can't get the data, but don't change the buffer. - ut->chunkOffset = length - (int32_t)ut->chunkNativeStart; - return FALSE; - } - - ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1; - // Going forward, so we want to have the buffer with stuff at and beyond - // the requested index. The -1 gets us one code point before the - // requested index also, to handle the case of the index being on - // a trail surrogate of a surrogate pair. - if(ut->chunkNativeLimit > length) { - ut->chunkNativeLimit = length; - } - // unless buffer ran off end, start is index-1. - ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE; - if(ut->chunkNativeStart < 0) { - ut->chunkNativeStart = 0; - } - } else { - // Reverse iteration. Fill buffer with data preceding the requested index. - if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) { - // Requested position already in buffer. - ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart; - return TRUE; - } - if (index32==0 && ut->chunkNativeStart==0) { - // Request for start, buffer already begins at start. - // No data, but keep the buffer as is. - ut->chunkOffset = 0; - return FALSE; - } - - // Figure out the bounds of the chunk to extract for reverse iteration. - // Need to worry about chunk not splitting surrogate pairs, and while still - // containing the data we need. - // Fix by requesting a chunk that includes an extra UChar at the end. - // If this turns out to be a lead surrogate, we can lop it off and still have - // the data we wanted. - ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE; - if (ut->chunkNativeStart < 0) { - ut->chunkNativeStart = 0; - } - - ut->chunkNativeLimit = index32 + 1; - if (ut->chunkNativeLimit > length) { - ut->chunkNativeLimit = length; - } - } - - // Extract the new chunk of text from the Replaceable source. - ReplExtra *ex = (ReplExtra *)ut->pExtra; - // UnicodeString with its buffer a writable alias to the chunk buffer - UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/); - rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer); - - ut->chunkContents = ex->s; - ut->chunkLength = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart); - ut->chunkOffset = (int32_t)(index32 - ut->chunkNativeStart); - - // Surrogate pairs from the input text must not span chunk boundaries. - // If end of chunk could be the start of a surrogate, trim it off. - if (ut->chunkNativeLimit < length && - U16_IS_LEAD(ex->s[ut->chunkLength-1])) { - ut->chunkLength--; - ut->chunkNativeLimit--; - if (ut->chunkOffset > ut->chunkLength) { - ut->chunkOffset = ut->chunkLength; - } - } - - // if the first UChar in the chunk could be the trailing half of a surrogate pair, - // trim it off. - if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) { - ++(ut->chunkContents); - ++(ut->chunkNativeStart); - --(ut->chunkLength); - --(ut->chunkOffset); - } - - // adjust the index/chunkOffset to a code point boundary - U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset); - - // Use fast indexing for get/setNativeIndex() - ut->nativeIndexingLimit = ut->chunkLength; - - return TRUE; -} - - - -static int32_t U_CALLCONV -repTextExtract(UText *ut, - int64_t start, int64_t limit, - UChar *dest, int32_t destCapacity, - UErrorCode *status) { - const Replaceable *rep=(const Replaceable *)ut->context; - int32_t length=rep->length(); - - if(U_FAILURE(*status)) { - return 0; - } - if(destCapacity<0 || (dest==NULL && destCapacity>0)) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - } - if(start>limit) { - *status=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - int32_t start32 = pinIndex(start, length); - int32_t limit32 = pinIndex(limit, length); - - // adjust start, limit if they point to trail half of surrogates - if (start32charAt(start32)) && - U_IS_SUPPLEMENTARY(rep->char32At(start32))){ - start32--; - } - if (limit32charAt(limit32)) && - U_IS_SUPPLEMENTARY(rep->char32At(limit32))){ - limit32--; - } - - length=limit32-start32; - if(length>destCapacity) { - limit32 = start32 + destCapacity; - } - UnicodeString buffer(dest, 0, destCapacity); // writable alias - rep->extractBetween(start32, limit32, buffer); - repTextAccess(ut, limit32, TRUE); - - return u_terminateUChars(dest, destCapacity, length, status); -} - -static int32_t U_CALLCONV -repTextReplace(UText *ut, - int64_t start, int64_t limit, - const UChar *src, int32_t length, - UErrorCode *status) { - Replaceable *rep=(Replaceable *)ut->context; - int32_t oldLength; - - if(U_FAILURE(*status)) { - return 0; - } - if(src==NULL && length!=0) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - oldLength=rep->length(); // will subtract from new length - if(start>limit ) { - *status=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - int32_t start32 = pinIndex(start, oldLength); - int32_t limit32 = pinIndex(limit, oldLength); - - // Snap start & limit to code point boundaries. - if (start32charAt(start32)) && - start32>0 && U16_IS_LEAD(rep->charAt(start32-1))) - { - start32--; - } - if (limit32charAt(limit32-1)) && - U16_IS_TRAIL(rep->charAt(limit32))) - { - limit32++; - } - - // Do the actual replace operation using methods of the Replaceable class - UnicodeString replStr((UBool)(length<0), src, length); // read-only alias - rep->handleReplaceBetween(start32, limit32, replStr); - int32_t newLength = rep->length(); - int32_t lengthDelta = newLength - oldLength; - - // Is the UText chunk buffer OK? - if (ut->chunkNativeLimit > start32) { - // this replace operation may have impacted the current chunk. - // invalidate it, which will force a reload on the next access. - invalidateChunk(ut); - } - - // set the iteration position to the end of the newly inserted replacement text. - int32_t newIndexPos = limit32 + lengthDelta; - repTextAccess(ut, newIndexPos, TRUE); - - return lengthDelta; -} - - -static void U_CALLCONV -repTextCopy(UText *ut, - int64_t start, int64_t limit, - int64_t destIndex, - UBool move, - UErrorCode *status) -{ - Replaceable *rep=(Replaceable *)ut->context; - int32_t length=rep->length(); - - if(U_FAILURE(*status)) { - return; - } - if (start>limit || (startcopy(start32, limit32, destIndex32); - if(destIndex32handleReplaceBetween(start32, limit32, UnicodeString()); - } else { - // copy - rep->copy(start32, limit32, destIndex32); - } - - // If the change to the text touched the region in the chunk buffer, - // invalidate the buffer. - int32_t firstAffectedIndex = destIndex32; - if (move && start32chunkNativeLimit) { - // changes may have affected range covered by the chunk - invalidateChunk(ut); - } - - // Put iteration position at the newly inserted (moved) block, - int32_t nativeIterIndex = destIndex32 + limit32 - start32; - if (move && destIndex32>start32) { - // moved a block of text towards the end of the string. - nativeIterIndex = destIndex32; - } - - // Set position, reload chunk if needed. - repTextAccess(ut, nativeIterIndex, TRUE); -} - -static const struct UTextFuncs repFuncs = -{ - sizeof(UTextFuncs), - 0, 0, 0, // Reserved alignment padding - repTextClone, - repTextLength, - repTextAccess, - repTextExtract, - repTextReplace, - repTextCopy, - NULL, // MapOffsetToNative, - NULL, // MapIndexToUTF16, - repTextClose, - NULL, // spare 1 - NULL, // spare 2 - NULL // spare 3 -}; - - -U_CAPI UText * U_EXPORT2 -utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status) -{ - if(U_FAILURE(*status)) { - return NULL; - } - if(rep==NULL) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - ut = utext_setup(ut, sizeof(ReplExtra), status); - if(U_FAILURE(*status)) { - return ut; - } - - ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE); - if(rep->hasMetaData()) { - ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA); - } - - ut->pFuncs = &repFuncs; - ut->context = rep; - return ut; -} - -U_CDECL_END - - - - - - - - -//------------------------------------------------------------------------------ -// -// UText implementation for UnicodeString (read/write) and -// for const UnicodeString (read only) -// (same implementation, only the flags are different) -// -// Use of UText data members: -// context pointer to UnicodeString -// p pointer to UnicodeString IF this UText owns the string -// and it must be deleted on close(). NULL otherwise. -// -//------------------------------------------------------------------------------ - -U_CDECL_BEGIN - - -static UText * U_CALLCONV -unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) { - // First do a generic shallow clone. Does everything needed for the UText struct itself. - dest = shallowTextClone(dest, src, status); - - // For deep clones, make a copy of the UnicodeSring. - // The copied UnicodeString storage is owned by the newly created UText clone. - // A non-NULL pointer in UText.p is the signal to the close() function to delete - // the UText. - // - if (deep && U_SUCCESS(*status)) { - const UnicodeString *srcString = (const UnicodeString *)src->context; - dest->context = new UnicodeString(*srcString); - dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); - - // with deep clone, the copy is writable, even when the source is not. - dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE); - } - return dest; -} - -static void U_CALLCONV -unistrTextClose(UText *ut) { - // Most of the work of close is done by the generic UText framework close. - // All that needs to be done here is delete the UnicodeString if the UText - // owns it. This occurs if the UText was created by cloning. - if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { - UnicodeString *str = (UnicodeString *)ut->context; - delete str; - ut->context = NULL; - } -} - - -static int64_t U_CALLCONV -unistrTextLength(UText *t) { - return ((const UnicodeString *)t->context)->length(); -} - - -static UBool U_CALLCONV -unistrTextAccess(UText *ut, int64_t index, UBool forward) { - int32_t length = ut->chunkLength; - ut->chunkOffset = pinIndex(index, length); - - // Check whether request is at the start or end - UBool retVal = (forward && index0); - return retVal; -} - - - -static int32_t U_CALLCONV -unistrTextExtract(UText *t, - int64_t start, int64_t limit, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) { - const UnicodeString *us=(const UnicodeString *)t->context; - int32_t length=us->length(); - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(destCapacity<0 || (dest==NULL && destCapacity>0)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - if(start<0 || start>limit) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - int32_t start32 = startgetChar32Start((int32_t)start) : length; - int32_t limit32 = limitgetChar32Start((int32_t)limit) : length; - - length=limit32-start32; - if (destCapacity>0 && dest!=NULL) { - int32_t trimmedLength = length; - if(trimmedLength>destCapacity) { - trimmedLength=destCapacity; - } - us->extract(start32, trimmedLength, dest); - t->chunkOffset = start32+trimmedLength; - } else { - t->chunkOffset = start32; - } - u_terminateUChars(dest, destCapacity, length, pErrorCode); - return length; -} - -static int32_t U_CALLCONV -unistrTextReplace(UText *ut, - int64_t start, int64_t limit, - const UChar *src, int32_t length, - UErrorCode *pErrorCode) { - UnicodeString *us=(UnicodeString *)ut->context; - int32_t oldLength; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(src==NULL && length!=0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - if(start>limit) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - oldLength=us->length(); - int32_t start32 = pinIndex(start, oldLength); - int32_t limit32 = pinIndex(limit, oldLength); - if (start32 < oldLength) { - start32 = us->getChar32Start(start32); - } - if (limit32 < oldLength) { - limit32 = us->getChar32Start(limit32); - } - - // replace - us->replace(start32, limit32-start32, src, length); - int32_t newLength = us->length(); - - // Update the chunk description. - ut->chunkContents = us->getBuffer(); - ut->chunkLength = newLength; - ut->chunkNativeLimit = newLength; - ut->nativeIndexingLimit = newLength; - - // Set iteration position to the point just following the newly inserted text. - int32_t lengthDelta = newLength - oldLength; - ut->chunkOffset = limit32 + lengthDelta; - - return lengthDelta; -} - -static void U_CALLCONV -unistrTextCopy(UText *ut, - int64_t start, int64_t limit, - int64_t destIndex, - UBool move, - UErrorCode *pErrorCode) { - UnicodeString *us=(UnicodeString *)ut->context; - int32_t length=us->length(); - - if(U_FAILURE(*pErrorCode)) { - return; - } - int32_t start32 = pinIndex(start, length); - int32_t limit32 = pinIndex(limit, length); - int32_t destIndex32 = pinIndex(destIndex, length); - - if( start32>limit32 || (start32copy(start32, limit32, destIndex32); - if(destIndex32remove(start32, segLength); - } else { - // copy - us->copy(start32, limit32, destIndex32); - } - - // update chunk description, set iteration position. - ut->chunkContents = us->getBuffer(); - if (move==FALSE) { - // copy operation, string length grows - ut->chunkLength += limit32-start32; - ut->chunkNativeLimit = ut->chunkLength; - ut->nativeIndexingLimit = ut->chunkLength; - } - - // Iteration position to end of the newly inserted text. - ut->chunkOffset = destIndex32+limit32-start32; - if (move && destIndex32>start32) { - ut->chunkOffset = destIndex32; - } - -} - -static const struct UTextFuncs unistrFuncs = -{ - sizeof(UTextFuncs), - 0, 0, 0, // Reserved alignment padding - unistrTextClone, - unistrTextLength, - unistrTextAccess, - unistrTextExtract, - unistrTextReplace, - unistrTextCopy, - NULL, // MapOffsetToNative, - NULL, // MapIndexToUTF16, - unistrTextClose, - NULL, // spare 1 - NULL, // spare 2 - NULL // spare 3 -}; - - - -U_CDECL_END - - -U_CAPI UText * U_EXPORT2 -utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) { - ut = utext_openConstUnicodeString(ut, s, status); - if (U_SUCCESS(*status)) { - ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE); - } - return ut; -} - - - -U_CAPI UText * U_EXPORT2 -utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) { - if (U_SUCCESS(*status) && s->isBogus()) { - // The UnicodeString is bogus, but we still need to detach the UText - // from whatever it was hooked to before, if anything. - utext_openUChars(ut, NULL, 0, status); - *status = U_ILLEGAL_ARGUMENT_ERROR; - return ut; - } - ut = utext_setup(ut, 0, status); - // note: use the standard (writable) function table for UnicodeString. - // The flag settings disable writing, so having the functions in - // the table is harmless. - if (U_SUCCESS(*status)) { - ut->pFuncs = &unistrFuncs; - ut->context = s; - ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS); - ut->chunkContents = s->getBuffer(); - ut->chunkLength = s->length(); - ut->chunkNativeStart = 0; - ut->chunkNativeLimit = ut->chunkLength; - ut->nativeIndexingLimit = ut->chunkLength; - } - return ut; -} - -//------------------------------------------------------------------------------ -// -// UText implementation for const UChar * strings -// -// Use of UText data members: -// context pointer to UnicodeString -// a length. -1 if not yet known. -// -// TODO: support 64 bit lengths. -// -//------------------------------------------------------------------------------ - -U_CDECL_BEGIN - - -static UText * U_CALLCONV -ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) { - // First do a generic shallow clone. - dest = shallowTextClone(dest, src, status); - - // For deep clones, make a copy of the string. - // The copied storage is owned by the newly created clone. - // A non-NULL pointer in UText.p is the signal to the close() function to delete - // it. - // - if (deep && U_SUCCESS(*status)) { - U_ASSERT(utext_nativeLength(dest) < INT32_MAX); - int32_t len = (int32_t)utext_nativeLength(dest); - - // The cloned string IS going to be NUL terminated, whether or not the original was. - const UChar *srcStr = (const UChar *)src->context; - UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar)); - if (copyStr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - int64_t i; - for (i=0; icontext = copyStr; - dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); - } - } - return dest; -} - - -static void U_CALLCONV -ucstrTextClose(UText *ut) { - // Most of the work of close is done by the generic UText framework close. - // All that needs to be done here is delete the string if the UText - // owns it. This occurs if the UText was created by cloning. - if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { - UChar *s = (UChar *)ut->context; - uprv_free(s); - ut->context = NULL; - } -} - - - -static int64_t U_CALLCONV -ucstrTextLength(UText *ut) { - if (ut->a < 0) { - // null terminated, we don't yet know the length. Scan for it. - // Access is not convenient for doing this - // because the current interation postion can't be changed. - const UChar *str = (const UChar *)ut->context; - for (;;) { - if (str[ut->chunkNativeLimit] == 0) { - break; - } - ut->chunkNativeLimit++; - } - ut->a = ut->chunkNativeLimit; - ut->chunkLength = (int32_t)ut->chunkNativeLimit; - ut->nativeIndexingLimit = ut->chunkLength; - ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - } - return ut->a; -} - - -static UBool U_CALLCONV -ucstrTextAccess(UText *ut, int64_t index, UBool forward) { - const UChar *str = (const UChar *)ut->context; - - // pin the requested index to the bounds of the string, - // and set current iteration position. - if (index<0) { - index = 0; - } else if (index < ut->chunkNativeLimit) { - // The request data is within the chunk as it is known so far. - // Put index on a code point boundary. - U16_SET_CP_START(str, 0, index); - } else if (ut->a >= 0) { - // We know the length of this string, and the user is requesting something - // at or beyond the length. Pin the requested index to the length. - index = ut->a; - } else { - // Null terminated string, length not yet known, and the requested index - // is beyond where we have scanned so far. - // Scan to 32 UChars beyond the requested index. The strategy here is - // to avoid fully scanning a long string when the caller only wants to - // see a few characters at its beginning. - int32_t scanLimit = (int32_t)index + 32; - if ((index + 32)>INT32_MAX || (index + 32)<0 ) { // note: int64 expression - scanLimit = INT32_MAX; - } - - int32_t chunkLimit = (int32_t)ut->chunkNativeLimit; - for (; chunkLimita = chunkLimit; - ut->chunkLength = chunkLimit; - ut->nativeIndexingLimit = chunkLimit; - if (index >= chunkLimit) { - index = chunkLimit; - } else { - U16_SET_CP_START(str, 0, index); - } - - ut->chunkNativeLimit = chunkLimit; - ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - goto breakout; - } - } - // We scanned through the next batch of UChars without finding the end. - U16_SET_CP_START(str, 0, index); - if (chunkLimit == INT32_MAX) { - // Scanned to the limit of a 32 bit length. - // Forceably trim the overlength string back so length fits in int32 - // TODO: add support for 64 bit strings. - ut->a = chunkLimit; - ut->chunkLength = chunkLimit; - ut->nativeIndexingLimit = chunkLimit; - if (index > chunkLimit) { - index = chunkLimit; - } - ut->chunkNativeLimit = chunkLimit; - ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - } else { - // The endpoint of a chunk must not be left in the middle of a surrogate pair. - // If the current end is on a lead surrogate, back the end up by one. - // It doesn't matter if the end char happens to be an unpaired surrogate, - // and it's simpler not to worry about it. - if (U16_IS_LEAD(str[chunkLimit-1])) { - --chunkLimit; - } - // Null-terminated chunk with end still unknown. - // Update the chunk length to reflect what has been scanned thus far. - // That the full length is still unknown is (still) flagged by - // ut->a being < 0. - ut->chunkNativeLimit = chunkLimit; - ut->nativeIndexingLimit = chunkLimit; - ut->chunkLength = chunkLimit; - } - - } -breakout: - U_ASSERT(index<=INT32_MAX); - ut->chunkOffset = (int32_t)index; - - // Check whether request is at the start or end - UBool retVal = (forward && indexchunkNativeLimit) || (!forward && index>0); - return retVal; -} - - - -static int32_t U_CALLCONV -ucstrTextExtract(UText *ut, - int64_t start, int64_t limit, - UChar *dest, int32_t destCapacity, - UErrorCode *pErrorCode) -{ - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - //const UChar *s=(const UChar *)ut->context; - int32_t si, di; - - int32_t start32; - int32_t limit32; - - // Access the start. Does two things we need: - // Pins 'start' to the length of the string, if it came in out-of-bounds. - // Snaps 'start' to the beginning of a code point. - ucstrTextAccess(ut, start, TRUE); - const UChar *s=ut->chunkContents; - start32 = ut->chunkOffset; - - int32_t strLength=(int32_t)ut->a; - if (strLength >= 0) { - limit32 = pinIndex(limit, strLength); - } else { - limit32 = pinIndex(limit, INT32_MAX); - } - di = 0; - for (si=start32; sia = si; // set string length for this UText - ut->chunkNativeLimit = si; - ut->chunkLength = si; - ut->nativeIndexingLimit = si; - strLength = si; - limit32 = si; - break; - } - U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */ - if (di=0) { - // We have filled the destination buffer, and the string length is known. - // Cut the loop short. There is no need to scan string termination. - di = limit32 - start32; - si = limit32; - break; - } - } - di++; - } - - // If the limit index points to a lead surrogate of a pair, - // add the corresponding trail surrogate to the destination. - if (si>0 && U16_IS_LEAD(s[si-1]) && - ((sichunkNativeLimit) { - ut->chunkOffset = si; - } else { - ucstrTextAccess(ut, si, TRUE); - } - - // Add a terminating NUL if space in the buffer permits, - // and set the error status as required. - u_terminateUChars(dest, destCapacity, di, pErrorCode); - return di; -} - -static const struct UTextFuncs ucstrFuncs = -{ - sizeof(UTextFuncs), - 0, 0, 0, // Reserved alignment padding - ucstrTextClone, - ucstrTextLength, - ucstrTextAccess, - ucstrTextExtract, - NULL, // Replace - NULL, // Copy - NULL, // MapOffsetToNative, - NULL, // MapIndexToUTF16, - ucstrTextClose, - NULL, // spare 1 - NULL, // spare 2 - NULL, // spare 3 -}; - -U_CDECL_END - -static const UChar gEmptyUString[] = {0}; - -U_CAPI UText * U_EXPORT2 -utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) { - if (U_FAILURE(*status)) { - return NULL; - } - if(s==NULL && length==0) { - s = gEmptyUString; - } - if (s==NULL || length < -1 || length>INT32_MAX) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - ut = utext_setup(ut, 0, status); - if (U_SUCCESS(*status)) { - ut->pFuncs = &ucstrFuncs; - ut->context = s; - ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS); - if (length==-1) { - ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); - } - ut->a = length; - ut->chunkContents = s; - ut->chunkNativeStart = 0; - ut->chunkNativeLimit = length>=0? length : 0; - ut->chunkLength = (int32_t)ut->chunkNativeLimit; - ut->chunkOffset = 0; - ut->nativeIndexingLimit = ut->chunkLength; - } - return ut; -} - - -//------------------------------------------------------------------------------ -// -// UText implementation for text from ICU CharacterIterators -// -// Use of UText data members: -// context pointer to the CharacterIterator -// a length of the full text. -// p pointer to buffer 1 -// b start index of local buffer 1 contents -// q pointer to buffer 2 -// c start index of local buffer 2 contents -// r pointer to the character iterator if the UText owns it. -// Null otherwise. -// -//------------------------------------------------------------------------------ -#define CIBufSize 16 - -U_CDECL_BEGIN -static void U_CALLCONV -charIterTextClose(UText *ut) { - // Most of the work of close is done by the generic UText framework close. - // All that needs to be done here is delete the CharacterIterator if the UText - // owns it. This occurs if the UText was created by cloning. - CharacterIterator *ci = (CharacterIterator *)ut->r; - delete ci; - ut->r = NULL; -} - -static int64_t U_CALLCONV -charIterTextLength(UText *ut) { - return (int32_t)ut->a; -} - -static UBool U_CALLCONV -charIterTextAccess(UText *ut, int64_t index, UBool forward) { - CharacterIterator *ci = (CharacterIterator *)ut->context; - - int32_t clippedIndex = (int32_t)index; - if (clippedIndex<0) { - clippedIndex=0; - } else if (clippedIndex>=ut->a) { - clippedIndex=(int32_t)ut->a; - } - int32_t neededIndex = clippedIndex; - if (!forward && neededIndex>0) { - // reverse iteration, want the position just before what was asked for. - neededIndex--; - } else if (forward && neededIndex==ut->a && neededIndex>0) { - // Forward iteration, don't ask for something past the end of the text. - neededIndex--; - } - - // Find the native index of the start of the buffer containing what we want. - neededIndex -= neededIndex % CIBufSize; - - UChar *buf = NULL; - UBool needChunkSetup = TRUE; - int i; - if (ut->chunkNativeStart == neededIndex) { - // The buffer we want is already the current chunk. - needChunkSetup = FALSE; - } else if (ut->b == neededIndex) { - // The first buffer (buffer p) has what we need. - buf = (UChar *)ut->p; - } else if (ut->c == neededIndex) { - // The second buffer (buffer q) has what we need. - buf = (UChar *)ut->q; - } else { - // Neither buffer already has what we need. - // Load new data from the character iterator. - // Use the buf that is not the current buffer. - buf = (UChar *)ut->p; - if (ut->p == ut->chunkContents) { - buf = (UChar *)ut->q; - } - ci->setIndex(neededIndex); - for (i=0; inextPostInc(); - if (i+neededIndex > ut->a) { - break; - } - } - } - - // We have a buffer with the data we need. - // Set it up as the current chunk, if it wasn't already. - if (needChunkSetup) { - ut->chunkContents = buf; - ut->chunkLength = CIBufSize; - ut->chunkNativeStart = neededIndex; - ut->chunkNativeLimit = neededIndex + CIBufSize; - if (ut->chunkNativeLimit > ut->a) { - ut->chunkNativeLimit = ut->a; - ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart); - } - ut->nativeIndexingLimit = ut->chunkLength; - U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize); - } - ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart; - UBool success = (forward? ut->chunkOffsetchunkLength : ut->chunkOffset>0); - return success; -} - -static UText * U_CALLCONV -charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) { - if (U_FAILURE(*status)) { - return NULL; - } - - if (deep) { - // There is no CharacterIterator API for cloning the underlying text storage. - *status = U_UNSUPPORTED_ERROR; - return NULL; - } else { - CharacterIterator *srcCI =(CharacterIterator *)src->context; - srcCI = srcCI->clone(); - dest = utext_openCharacterIterator(dest, srcCI, status); - if (U_FAILURE(*status)) { - return dest; - } - // cast off const on getNativeIndex. - // For CharacterIterator based UTexts, this is safe, the operation is const. - int64_t ix = utext_getNativeIndex((UText *)src); - utext_setNativeIndex(dest, ix); - dest->r = srcCI; // flags that this UText owns the CharacterIterator - } - return dest; -} - -static int32_t U_CALLCONV -charIterTextExtract(UText *ut, - int64_t start, int64_t limit, - UChar *dest, int32_t destCapacity, - UErrorCode *status) -{ - if(U_FAILURE(*status)) { - return 0; - } - if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) { - *status=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - int32_t length = (int32_t)ut->a; - int32_t start32 = pinIndex(start, length); - int32_t limit32 = pinIndex(limit, length); - int32_t desti = 0; - int32_t srci; - int32_t copyLimit; - - CharacterIterator *ci = (CharacterIterator *)ut->context; - ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed. - srci = ci->getIndex(); - copyLimit = srci; - while (srcinext32PostInc(); - int32_t len = U16_LENGTH(c); - U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */ - if (desti+len <= destCapacity) { - U16_APPEND_UNSAFE(dest, desti, c); - copyLimit = srci+len; - } else { - desti += len; - *status = U_BUFFER_OVERFLOW_ERROR; - } - srci += len; - } - - charIterTextAccess(ut, copyLimit, TRUE); - - u_terminateUChars(dest, destCapacity, desti, status); - return desti; -} - -static const struct UTextFuncs charIterFuncs = -{ - sizeof(UTextFuncs), - 0, 0, 0, // Reserved alignment padding - charIterTextClone, - charIterTextLength, - charIterTextAccess, - charIterTextExtract, - NULL, // Replace - NULL, // Copy - NULL, // MapOffsetToNative, - NULL, // MapIndexToUTF16, - charIterTextClose, - NULL, // spare 1 - NULL, // spare 2 - NULL // spare 3 -}; -U_CDECL_END - - -U_CAPI UText * U_EXPORT2 -utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) { - if (U_FAILURE(*status)) { - return NULL; - } - - if (ci->startIndex() > 0) { - // No support for CharacterIterators that do not start indexing from zero. - *status = U_UNSUPPORTED_ERROR; - return NULL; - } - - // Extra space in UText for 2 buffers of CIBufSize UChars each. - int32_t extraSpace = 2 * CIBufSize * sizeof(UChar); - ut = utext_setup(ut, extraSpace, status); - if (U_SUCCESS(*status)) { - ut->pFuncs = &charIterFuncs; - ut->context = ci; - ut->providerProperties = 0; - ut->a = ci->endIndex(); // Length of text - ut->p = ut->pExtra; // First buffer - ut->b = -1; // Native index of first buffer contents - ut->q = (UChar*)ut->pExtra+CIBufSize; // Second buffer - ut->c = -1; // Native index of second buffer contents - - // Initialize current chunk contents to be empty. - // First access will fault something in. - // Note: The initial nativeStart and chunkOffset must sum to zero - // so that getNativeIndex() will correctly compute to zero - // if no call to Access() has ever been made. They can't be both - // zero without Access() thinking that the chunk is valid. - ut->chunkContents = (UChar *)ut->p; - ut->chunkNativeStart = -1; - ut->chunkOffset = 1; - ut->chunkNativeLimit = 0; - ut->chunkLength = 0; - ut->nativeIndexingLimit = ut->chunkOffset; // enables native indexing - } - return ut; -} diff --git a/deps/node/deps/icu-small/source/common/utf_impl.cpp b/deps/node/deps/icu-small/source/common/utf_impl.cpp deleted file mode 100644 index 9dd241a1..00000000 --- a/deps/node/deps/icu-small/source/common/utf_impl.cpp +++ /dev/null @@ -1,329 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1999-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: utf_impl.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep13 -* created by: Markus W. Scherer -* -* This file provides implementation functions for macros in the utfXX.h -* that would otherwise be too long as macros. -*/ - -/* set import/export definitions */ -#ifndef U_UTF8_IMPL -# define U_UTF8_IMPL -#endif - -#include "unicode/utypes.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "uassert.h" - -/* - * Table of the number of utf8 trail bytes, indexed by the lead byte. - * Used by the deprecated macro UTF8_COUNT_TRAIL_BYTES, defined in utf_old.h - * - * The current macro, U8_COUNT_TRAIL_BYTES, does _not_ use this table. - * - * Note that this table cannot be removed, even if UTF8_COUNT_TRAIL_BYTES were - * changed to no longer use it. References to the table from expansions of UTF8_COUNT_TRAIL_BYTES - * may exist in old client code that must continue to run with newer icu library versions. - * - * This table could be replaced on many machines by - * a few lines of assembler code using an - * "index of first 0-bit from msb" instruction and - * one or two more integer instructions. - * - * For example, on an i386, do something like - * - MOV AL, leadByte - * - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0) - * - MOV AH, 0 - * - BSR BX, AX (16-bit) - * - MOV AX, 6 (result) - * - JZ finish (ZF==1 if leadByte==0xff) - * - SUB AX, BX (result) - * -finish: - * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB) - */ -extern "C" U_EXPORT const uint8_t -utf8_countTrailBytes[256]={ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - // illegal C0 & C1 - // 2-byte lead bytes C2..DF - 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - // 3-byte lead bytes E0..EF - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - // 4-byte lead bytes F0..F4 - // illegal F5..FF - 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static const UChar32 -utf8_errorValue[6]={ - // Same values as UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, - // but without relying on the obsolete unicode/utf_old.h. - 0x15, 0x9f, 0xffff, - 0x10ffff -}; - -static UChar32 -errorValue(int32_t count, int8_t strict) { - if(strict>=0) { - return utf8_errorValue[count]; - } else if(strict==-3) { - return 0xfffd; - } else { - return U_SENTINEL; - } -} - -/* - * Handle the non-inline part of the U8_NEXT() and U8_NEXT_FFFD() macros - * and their obsolete sibling UTF8_NEXT_CHAR_SAFE(). - * - * U8_NEXT() supports NUL-terminated strings indicated via length<0. - * - * The "strict" parameter controls the error behavior: - * <0 "Safe" behavior of U8_NEXT(): - * -1: All illegal byte sequences yield U_SENTINEL=-1. - * -2: Same as -1, except for lenient treatment of surrogate code points as legal. - * Some implementations use this for roundtripping of - * Unicode 16-bit strings that are not well-formed UTF-16, that is, they - * contain unpaired surrogates. - * -3: All illegal byte sequences yield U+FFFD. - * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE): - * All illegal byte sequences yield a positive code point such that this - * result code point would be encoded with the same number of bytes as - * the illegal sequence. - * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE): - * Same as the obsolete "safe" behavior, but non-characters are also treated - * like illegal sequences. - * - * Note that a UBool is the same as an int8_t. - */ -U_CAPI UChar32 U_EXPORT2 -utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { - // *pi is one after byte c. - int32_t i=*pi; - // length can be negative for NUL-terminated strings: Read and validate one byte at a time. - if(i==length || c>0xf4) { - // end of string, or not a lead byte - } else if(c>=0xf0) { - // Test for 4-byte sequences first because - // U8_NEXT() handles shorter valid sequences inline. - uint8_t t1=s[i], t2, t3; - c&=7; - if(U8_IS_VALID_LEAD4_AND_T1(c, t1) && - ++i!=length && (t2=s[i]-0x80)<=0x3f && - ++i!=length && (t3=s[i]-0x80)<=0x3f) { - ++i; - c=(c<<18)|((t1&0x3f)<<12)|(t2<<6)|t3; - // strict: forbid non-characters like U+fffe - if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { - *pi=i; - return c; - } - } - } else if(c>=0xe0) { - c&=0xf; - if(strict!=-2) { - uint8_t t1=s[i], t2; - if(U8_IS_VALID_LEAD3_AND_T1(c, t1) && - ++i!=length && (t2=s[i]-0x80)<=0x3f) { - ++i; - c=(c<<12)|((t1&0x3f)<<6)|t2; - // strict: forbid non-characters like U+fffe - if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { - *pi=i; - return c; - } - } - } else { - // strict=-2 -> lenient: allow surrogates - uint8_t t1=s[i]-0x80, t2; - if(t1<=0x3f && (c>0 || t1>=0x20) && - ++i!=length && (t2=s[i]-0x80)<=0x3f) { - *pi=i+1; - return (c<<12)|(t1<<6)|t2; - } - } - } else if(c>=0xc2) { - uint8_t t1=s[i]-0x80; - if(t1<=0x3f) { - *pi=i+1; - return ((c-0xc0)<<6)|t1; - } - } // else 0x80<=c<0xc2 is not a lead byte - - /* error handling */ - c=errorValue(i-*pi, strict); - *pi=i; - return c; -} - -U_CAPI int32_t U_EXPORT2 -utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError) { - if((uint32_t)(c)<=0x7ff) { - if((i)+1<(length)) { - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); - return i; - } - } else if((uint32_t)(c)<=0xffff) { - /* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. */ - if((i)+2<(length) && !U_IS_SURROGATE(c)) { - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); - return i; - } - } else if((uint32_t)(c)<=0x10ffff) { - if((i)+3<(length)) { - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); - return i; - } - } - /* c>0x10ffff or not enough space, write an error value */ - if(pIsError!=NULL) { - *pIsError=TRUE; - } else { - length-=i; - if(length>0) { - int32_t offset; - if(length>3) { - length=3; - } - s+=i; - offset=0; - c=utf8_errorValue[length-1]; - U8_APPEND_UNSAFE(s, offset, c); - i=i+offset; - } - } - return i; -} - -U_CAPI UChar32 U_EXPORT2 -utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) { - // *pi is the index of byte c. - int32_t i=*pi; - if(U8_IS_TRAIL(c) && i>start) { - uint8_t b1=s[--i]; - if(U8_IS_LEAD(b1)) { - if(b1<0xe0) { - *pi=i; - return ((b1-0xc0)<<6)|(c&0x3f); - } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) { - // Truncated 3- or 4-byte sequence. - *pi=i; - return errorValue(1, strict); - } - } else if(U8_IS_TRAIL(b1) && i>start) { - // Extract the value bits from the last trail byte. - c&=0x3f; - uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<=0xf4) { - if(b2<0xf0) { - b2&=0xf; - if(strict!=-2) { - if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { - *pi=i; - c=(b2<<12)|((b1&0x3f)<<6)|c; - if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { - return c; - } else { - // strict: forbid non-characters like U+fffe - return errorValue(2, strict); - } - } - } else { - // strict=-2 -> lenient: allow surrogates - b1-=0x80; - if((b2>0 || b1>=0x20)) { - *pi=i; - return (b2<<12)|(b1<<6)|c; - } - } - } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - // Truncated 4-byte sequence. - *pi=i; - return errorValue(2, strict); - } - } else if(U8_IS_TRAIL(b2) && i>start) { - uint8_t b3=s[--i]; - if(0xf0<=b3 && b3<=0xf4) { - b3&=7; - if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { - *pi=i; - c=(b3<<18)|((b2&0x3f)<<12)|((b1&0x3f)<<6)|c; - if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { - return c; - } else { - // strict: forbid non-characters like U+fffe - return errorValue(3, strict); - } - } - } - } - } - } - return errorValue(0, strict); -} - -U_CAPI int32_t U_EXPORT2 -utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) { - // Same as utf8_prevCharSafeBody(..., strict=-1) minus assembling code points. - int32_t orig_i=i; - uint8_t c=s[i]; - if(U8_IS_TRAIL(c) && i>start) { - uint8_t b1=s[--i]; - if(U8_IS_LEAD(b1)) { - if(b1<0xe0 || - (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) { - return i; - } - } else if(U8_IS_TRAIL(b1) && i>start) { - uint8_t b2=s[--i]; - if(0xe0<=b2 && b2<=0xf4) { - if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { - return i; - } - } else if(U8_IS_TRAIL(b2) && i>start) { - uint8_t b3=s[--i]; - if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { - return i; - } - } - } - } - return orig_i; -} diff --git a/deps/node/deps/icu-small/source/common/util.cpp b/deps/node/deps/icu-small/source/common/util.cpp deleted file mode 100644 index 838a201a..00000000 --- a/deps/node/deps/icu-small/source/common/util.cpp +++ /dev/null @@ -1,411 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2001-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 11/19/2001 aliu Creation. -********************************************************************** -*/ - -#include "unicode/unimatch.h" -#include "unicode/utf16.h" -#include "patternprops.h" -#include "util.h" - -// Define UChar constants using hex for EBCDIC compatibility - -static const UChar BACKSLASH = 0x005C; /*\*/ -static const UChar UPPER_U = 0x0055; /*U*/ -static const UChar LOWER_U = 0x0075; /*u*/ -static const UChar APOSTROPHE = 0x0027; // '\'' -static const UChar SPACE = 0x0020; // ' ' - -// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" -static const UChar DIGITS[] = { - 48,49,50,51,52,53,54,55,56,57, - 65,66,67,68,69,70,71,72,73,74, - 75,76,77,78,79,80,81,82,83,84, - 85,86,87,88,89,90 -}; - -U_NAMESPACE_BEGIN - -UnicodeString& ICU_Utility::appendNumber(UnicodeString& result, int32_t n, - int32_t radix, int32_t minDigits) { - if (radix < 2 || radix > 36) { - // Bogus radix - return result.append((UChar)63/*?*/); - } - // Handle negatives - if (n < 0) { - n = -n; - result.append((UChar)45/*-*/); - } - // First determine the number of digits - int32_t nn = n; - int32_t r = 1; - while (nn >= radix) { - nn /= radix; - r *= radix; - --minDigits; - } - // Now generate the digits - while (--minDigits > 0) { - result.append(DIGITS[0]); - } - while (r > 0) { - int32_t digit = n / r; - result.append(DIGITS[digit]); - n -= digit * r; - r /= radix; - } - return result; -} - -/** - * Return true if the character is NOT printable ASCII. - */ -UBool ICU_Utility::isUnprintable(UChar32 c) { - return !(c >= 0x20 && c <= 0x7E); -} - -/** - * Escape unprintable characters using \uxxxx notation for U+0000 to - * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is - * printable ASCII, then do nothing and return FALSE. Otherwise, - * append the escaped notation and return TRUE. - */ -UBool ICU_Utility::escapeUnprintable(UnicodeString& result, UChar32 c) { - if (isUnprintable(c)) { - result.append(BACKSLASH); - if (c & ~0xFFFF) { - result.append(UPPER_U); - result.append(DIGITS[0xF&(c>>28)]); - result.append(DIGITS[0xF&(c>>24)]); - result.append(DIGITS[0xF&(c>>20)]); - result.append(DIGITS[0xF&(c>>16)]); - } else { - result.append(LOWER_U); - } - result.append(DIGITS[0xF&(c>>12)]); - result.append(DIGITS[0xF&(c>>8)]); - result.append(DIGITS[0xF&(c>>4)]); - result.append(DIGITS[0xF&c]); - return TRUE; - } - return FALSE; -} - -/** - * Returns the index of a character, ignoring quoted text. - * For example, in the string "abc'hide'h", the 'h' in "hide" will not be - * found by a search for 'h'. - */ -// FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. -/* -int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text, - int32_t start, int32_t limit, - UChar charToFind) { - for (int32_t i=start; i= pos); -//? -//? if (!isForward) { -//? --pos; // pos is a limit, so back up by one -//? } -//? -//? while (pos != stop && -//? PatternProps::isWhiteSpace(c = text.char32At(pos))) { -//? if (isForward) { -//? pos += U16_LENGTH(c); -//? } else { -//? pos -= U16_LENGTH(c); -//? } -//? } -//? -//? if (!isForward) { -//? ++pos; // make pos back into a limit -//? } -//? -//? return pos; -//?} - -/** - * Parse a single non-whitespace character 'ch', optionally - * preceded by whitespace. - * @param id the string to be parsed - * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the - * offset of the first character to be parsed. On output, pos[0] - * is the index after the last parsed character. If the parse - * fails, pos[0] will be unchanged. - * @param ch the non-whitespace character to be parsed. - * @return true if 'ch' is seen preceded by zero or more - * whitespace characters. - */ -UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, UChar ch) { - int32_t start = pos; - skipWhitespace(id, pos, TRUE); - if (pos == id.length() || - id.charAt(pos) != ch) { - pos = start; - return FALSE; - } - ++pos; - return TRUE; -} - -/** - * Parse a pattern string within the given Replaceable and a parsing - * pattern. Characters are matched literally and case-sensitively - * except for the following special characters: - * - * ~ zero or more Pattern_White_Space chars - * - * If end of pattern is reached with all matches along the way, - * pos is advanced to the first unparsed index and returned. - * Otherwise -1 is returned. - * @param pat pattern that controls parsing - * @param text text to be parsed, starting at index - * @param index offset to first character to parse - * @param limit offset after last character to parse - * @return index after last parsed character, or -1 on parse failure. - */ -int32_t ICU_Utility::parsePattern(const UnicodeString& pat, - const Replaceable& text, - int32_t index, - int32_t limit) { - int32_t ipat = 0; - - // empty pattern matches immediately - if (ipat == pat.length()) { - return index; - } - - UChar32 cpat = pat.char32At(ipat); - - while (index < limit) { - UChar32 c = text.char32At(index); - - // parse \s* - if (cpat == 126 /*~*/) { - if (PatternProps::isWhiteSpace(c)) { - index += U16_LENGTH(c); - continue; - } else { - if (++ipat == pat.length()) { - return index; // success; c unparsed - } - // fall thru; process c again with next cpat - } - } - - // parse literal - else if (c == cpat) { - index += U16_LENGTH(c); - ipat += U16_LENGTH(cpat); - if (ipat == pat.length()) { - return index; // success; c parsed - } - // fall thru; get next cpat - } - - // match failure of literal - else { - return -1; - } - - cpat = pat.char32At(ipat); - } - - return -1; // text ended before end of pat -} - -/** - * Append a character to a rule that is being built up. To flush - * the quoteBuf to rule, make one final call with isLiteral == TRUE. - * If there is no final character, pass in (UChar32)-1 as c. - * @param rule the string to append the character to - * @param c the character to append, or (UChar32)-1 if none. - * @param isLiteral if true, then the given character should not be - * quoted or escaped. Usually this means it is a syntactic element - * such as > or $ - * @param escapeUnprintable if true, then unprintable characters - * should be escaped using \uxxxx or \Uxxxxxxxx. These escapes will - * appear outside of quotes. - * @param quoteBuf a buffer which is used to build up quoted - * substrings. The caller should initially supply an empty buffer, - * and thereafter should not modify the buffer. The buffer should be - * cleared out by, at the end, calling this method with a literal - * character. - */ -void ICU_Utility::appendToRule(UnicodeString& rule, - UChar32 c, - UBool isLiteral, - UBool escapeUnprintable, - UnicodeString& quoteBuf) { - // If we are escaping unprintables, then escape them outside - // quotes. \u and \U are not recognized within quotes. The same - // logic applies to literals, but literals are never escaped. - if (isLiteral || - (escapeUnprintable && ICU_Utility::isUnprintable(c))) { - if (quoteBuf.length() > 0) { - // We prefer backslash APOSTROPHE to double APOSTROPHE - // (more readable, less similar to ") so if there are - // double APOSTROPHEs at the ends, we pull them outside - // of the quote. - - // If the first thing in the quoteBuf is APOSTROPHE - // (doubled) then pull it out. - while (quoteBuf.length() >= 2 && - quoteBuf.charAt(0) == APOSTROPHE && - quoteBuf.charAt(1) == APOSTROPHE) { - rule.append(BACKSLASH).append(APOSTROPHE); - quoteBuf.remove(0, 2); - } - // If the last thing in the quoteBuf is APOSTROPHE - // (doubled) then remove and count it and add it after. - int32_t trailingCount = 0; - while (quoteBuf.length() >= 2 && - quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE && - quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) { - quoteBuf.truncate(quoteBuf.length()-2); - ++trailingCount; - } - if (quoteBuf.length() > 0) { - rule.append(APOSTROPHE); - rule.append(quoteBuf); - rule.append(APOSTROPHE); - quoteBuf.truncate(0); - } - while (trailingCount-- > 0) { - rule.append(BACKSLASH).append(APOSTROPHE); - } - } - if (c != (UChar32)-1) { - /* Since spaces are ignored during parsing, they are - * emitted only for readability. We emit one here - * only if there isn't already one at the end of the - * rule. - */ - if (c == SPACE) { - int32_t len = rule.length(); - if (len > 0 && rule.charAt(len-1) != c) { - rule.append(c); - } - } else if (!escapeUnprintable || !ICU_Utility::escapeUnprintable(rule, c)) { - rule.append(c); - } - } - } - - // Escape ' and '\' and don't begin a quote just for them - else if (quoteBuf.length() == 0 && - (c == APOSTROPHE || c == BACKSLASH)) { - rule.append(BACKSLASH); - rule.append(c); - } - - // Specials (printable ascii that isn't [0-9a-zA-Z]) and - // whitespace need quoting. Also append stuff to quotes if we are - // building up a quoted substring already. - else if (quoteBuf.length() > 0 || - (c >= 0x0021 && c <= 0x007E && - !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || - (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || - (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || - PatternProps::isWhiteSpace(c)) { - quoteBuf.append(c); - // Double ' within a quote - if (c == APOSTROPHE) { - quoteBuf.append(c); - } - } - - // Otherwise just append - else { - rule.append(c); - } -} - -void ICU_Utility::appendToRule(UnicodeString& rule, - const UnicodeString& text, - UBool isLiteral, - UBool escapeUnprintable, - UnicodeString& quoteBuf) { - for (int32_t i=0; itoPattern(pat, escapeUnprintable), - TRUE, escapeUnprintable, quoteBuf); - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/util.h b/deps/node/deps/icu-small/source/common/util.h deleted file mode 100644 index 92cdc9ef..00000000 --- a/deps/node/deps/icu-small/source/common/util.h +++ /dev/null @@ -1,248 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ********************************************************************** - * Copyright (c) 2001-2011, International Business Machines - * Corporation and others. All Rights Reserved. - ********************************************************************** - * Date Name Description - * 11/19/2001 aliu Creation. - ********************************************************************** - */ - -#ifndef ICU_UTIL_H -#define ICU_UTIL_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/unistr.h" - -//-------------------------------------------------------------------- -// class ICU_Utility -// i18n utility functions, scoped into the class ICU_Utility. -//-------------------------------------------------------------------- - -U_NAMESPACE_BEGIN - -class UnicodeMatcher; - -class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ { - public: - - /** - * Append a number to the given UnicodeString in the given radix. - * Standard digits '0'-'9' are used and letters 'A'-'Z' for - * radices 11 through 36. - * @param result the digits of the number are appended here - * @param n the number to be converted to digits; may be negative. - * If negative, a '-' is prepended to the digits. - * @param radix a radix from 2 to 36 inclusive. - * @param minDigits the minimum number of digits, not including - * any '-', to produce. Values less than 2 have no effect. One - * digit is always emitted regardless of this parameter. - * @return a reference to result - */ - static UnicodeString& appendNumber(UnicodeString& result, int32_t n, - int32_t radix = 10, - int32_t minDigits = 1); - - /** Returns a bogus UnicodeString by value. */ - static inline UnicodeString makeBogusString() { - UnicodeString result; - result.setToBogus(); - return result; - } - - /** - * Return true if the character is NOT printable ASCII. - * - * This method should really be in UnicodeString (or similar). For - * now, we implement it here and share it with friend classes. - */ - static UBool isUnprintable(UChar32 c); - - /** - * Escape unprintable characters using \uxxxx notation for U+0000 to - * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is - * printable ASCII, then do nothing and return FALSE. Otherwise, - * append the escaped notation and return TRUE. - */ - static UBool escapeUnprintable(UnicodeString& result, UChar32 c); - - /** - * Returns the index of a character, ignoring quoted text. - * For example, in the string "abc'hide'h", the 'h' in "hide" will not be - * found by a search for 'h'. - * @param text text to be searched - * @param start the beginning index, inclusive; 0 <= start - * <= limit. - * @param limit the ending index, exclusive; start <= limit - * <= text.length(). - * @param c character to search for - * @return Offset of the first instance of c, or -1 if not found. - */ -//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. -// static int32_t quotedIndexOf(const UnicodeString& text, -// int32_t start, int32_t limit, -// UChar c); - - /** - * Skip over a sequence of zero or more white space characters at pos. - * @param advance if true, advance pos to the first non-white-space - * character at or after pos, or str.length(), if there is none. - * Otherwise leave pos unchanged. - * @return the index of the first non-white-space character at or - * after pos, or str.length(), if there is none. - */ - static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos, - UBool advance = FALSE); - - /** - * Skip over Pattern_White_Space in a Replaceable. - * Skipping may be done in the forward or - * reverse direction. In either case, the leftmost index will be - * inclusive, and the rightmost index will be exclusive. That is, - * given a range defined as [start, limit), the call - * skipWhitespace(text, start, limit) will advance start past leading - * whitespace, whereas the call skipWhitespace(text, limit, start), - * will back up limit past trailing whitespace. - * @param text the text to be analyzed - * @param pos either the start or limit of a range of 'text', to skip - * leading or trailing whitespace, respectively - * @param stop either the limit or start of a range of 'text', to skip - * leading or trailing whitespace, respectively - * @return the new start or limit, depending on what was passed in to - * 'pos' - */ -//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. -//? static int32_t skipWhitespace(const Replaceable& text, -//? int32_t pos, int32_t stop); - - /** - * Parse a single non-whitespace character 'ch', optionally - * preceded by whitespace. - * @param id the string to be parsed - * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the - * offset of the first character to be parsed. On output, pos[0] - * is the index after the last parsed character. If the parse - * fails, pos[0] will be unchanged. - * @param ch the non-whitespace character to be parsed. - * @return true if 'ch' is seen preceded by zero or more - * whitespace characters. - */ - static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch); - - /** - * Parse a pattern string starting at offset pos. Keywords are - * matched case-insensitively. Spaces may be skipped and may be - * optional or required. Integer values may be parsed, and if - * they are, they will be returned in the given array. If - * successful, the offset of the next non-space character is - * returned. On failure, -1 is returned. - * @param pattern must only contain lowercase characters, which - * will match their uppercase equivalents as well. A space - * character matches one or more required spaces. A '~' character - * matches zero or more optional spaces. A '#' character matches - * an integer and stores it in parsedInts, which the caller must - * ensure has enough capacity. - * @param parsedInts array to receive parsed integers. Caller - * must ensure that parsedInts.length is >= the number of '#' - * signs in 'pattern'. - * @return the position after the last character parsed, or -1 if - * the parse failed - */ - static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit, - const UnicodeString& pattern, int32_t* parsedInts); - - /** - * Parse a pattern string within the given Replaceable and a parsing - * pattern. Characters are matched literally and case-sensitively - * except for the following special characters: - * - * ~ zero or more Pattern_White_Space chars - * - * If end of pattern is reached with all matches along the way, - * pos is advanced to the first unparsed index and returned. - * Otherwise -1 is returned. - * @param pat pattern that controls parsing - * @param text text to be parsed, starting at index - * @param index offset to first character to parse - * @param limit offset after last character to parse - * @return index after last parsed character, or -1 on parse failure. - */ - static int32_t parsePattern(const UnicodeString& pat, - const Replaceable& text, - int32_t index, - int32_t limit); - - /** - * Parse an integer at pos, either of the form \d+ or of the form - * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, - * or octal format. - * @param pos INPUT-OUTPUT parameter. On input, the first - * character to parse. On output, the character after the last - * parsed character. - */ - static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit); - - /** - * Parse a Unicode identifier from the given string at the given - * position. Return the identifier, or an empty string if there - * is no identifier. - * @param str the string to parse - * @param pos INPUT-OUPUT parameter. On INPUT, pos is the - * first character to examine. It must be less than str.length(), - * and it must not point to a whitespace character. That is, must - * have pos < str.length() and - * !UCharacter::isWhitespace(str.char32At(pos)). On - * OUTPUT, the position after the last parsed character. - * @return the Unicode identifier, or an empty string if there is - * no valid identifier at pos. - */ - static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos); - - /** - * Parse an unsigned 31-bit integer at the given offset. Use - * UCharacter.digit() to parse individual characters into digits. - * @param text the text to be parsed - * @param pos INPUT-OUTPUT parameter. On entry, pos is the - * offset within text at which to start parsing; it should point - * to a valid digit. On exit, pos is the offset after the last - * parsed character. If the parse failed, it will be unchanged on - * exit. Must be >= 0 on entry. - * @param radix the radix in which to parse; must be >= 2 and <= - * 36. - * @return a non-negative parsed number, or -1 upon parse failure. - * Parse fails if there are no digits, that is, if pos does not - * point to a valid digit on entry, or if the number to be parsed - * does not fit into a 31-bit unsigned integer. - */ - static int32_t parseNumber(const UnicodeString& text, - int32_t& pos, int8_t radix); - - static void appendToRule(UnicodeString& rule, - UChar32 c, - UBool isLiteral, - UBool escapeUnprintable, - UnicodeString& quoteBuf); - - static void appendToRule(UnicodeString& rule, - const UnicodeString& text, - UBool isLiteral, - UBool escapeUnprintable, - UnicodeString& quoteBuf); - - static void appendToRule(UnicodeString& rule, - const UnicodeMatcher* matcher, - UBool escapeUnprintable, - UnicodeString& quoteBuf); - -private: - // do not instantiate - ICU_Utility(); -}; - -U_NAMESPACE_END - -#endif -//eof diff --git a/deps/node/deps/icu-small/source/common/util_props.cpp b/deps/node/deps/icu-small/source/common/util_props.cpp deleted file mode 100644 index 36057a60..00000000 --- a/deps/node/deps/icu-small/source/common/util_props.cpp +++ /dev/null @@ -1,216 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (c) 2001-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 11/19/2001 aliu Creation. -********************************************************************** -*/ - -#include "unicode/uchar.h" -#include "unicode/utf16.h" -#include "patternprops.h" -#include "util.h" - -U_NAMESPACE_BEGIN - -/** - * Parse an integer at pos, either of the form \d+ or of the form - * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, - * or octal format. - * @param pos INPUT-OUTPUT parameter. On input, the first - * character to parse. On output, the character after the last - * parsed character. - */ -int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) { - int32_t count = 0; - int32_t value = 0; - int32_t p = pos; - int8_t radix = 10; - - if (p < limit && rule.charAt(p) == 48 /*0*/) { - if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) { - p += 2; - radix = 16; - } - else { - p++; - count = 1; - radix = 8; - } - } - - while (p < limit) { - int32_t d = u_digit(rule.charAt(p++), radix); - if (d < 0) { - --p; - break; - } - ++count; - int32_t v = (value * radix) + d; - if (v <= value) { - // If there are too many input digits, at some point - // the value will go negative, e.g., if we have seen - // "0x8000000" already and there is another '0', when - // we parse the next 0 the value will go negative. - return 0; - } - value = v; - } - if (count > 0) { - pos = p; - } - return value; -} - -/** - * Parse a pattern string starting at offset pos. Keywords are - * matched case-insensitively. Spaces may be skipped and may be - * optional or required. Integer values may be parsed, and if - * they are, they will be returned in the given array. If - * successful, the offset of the next non-space character is - * returned. On failure, -1 is returned. - * @param pattern must only contain lowercase characters, which - * will match their uppercase equivalents as well. A space - * character matches one or more required spaces. A '~' character - * matches zero or more optional spaces. A '#' character matches - * an integer and stores it in parsedInts, which the caller must - * ensure has enough capacity. - * @param parsedInts array to receive parsed integers. Caller - * must ensure that parsedInts.length is >= the number of '#' - * signs in 'pattern'. - * @return the position after the last character parsed, or -1 if - * the parse failed - */ -int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit, - const UnicodeString& pattern, int32_t* parsedInts) { - // TODO Update this to handle surrogates - int32_t p; - int32_t intCount = 0; // number of integers parsed - for (int32_t i=0; i= limit) { - return -1; - } - c = rule.charAt(pos++); - if (!PatternProps::isWhiteSpace(c)) { - return -1; - } - // FALL THROUGH to skipWhitespace - U_FALLTHROUGH; - case 126 /*'~'*/: - pos = skipWhitespace(rule, pos); - break; - case 35 /*'#'*/: - p = pos; - parsedInts[intCount++] = parseInteger(rule, p, limit); - if (p == pos) { - // Syntax error; failed to parse integer - return -1; - } - pos = p; - break; - default: - if (pos >= limit) { - return -1; - } - c = (UChar) u_tolower(rule.charAt(pos++)); - if (c != cpat) { - return -1; - } - break; - } - } - return pos; -} - -/** - * Parse a Unicode identifier from the given string at the given - * position. Return the identifier, or an empty string if there - * is no identifier. - * @param str the string to parse - * @param pos INPUT-OUPUT parameter. On INPUT, pos is the - * first character to examine. It must be less than str.length(), - * and it must not point to a whitespace character. That is, must - * have pos < str.length(). On - * OUTPUT, the position after the last parsed character. - * @return the Unicode identifier, or an empty string if there is - * no valid identifier at pos. - */ -UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) { - // assert(pos < str.length()); - UnicodeString buf; - int p = pos; - while (p < str.length()) { - UChar32 ch = str.char32At(p); - if (buf.length() == 0) { - if (u_isIDStart(ch)) { - buf.append(ch); - } else { - buf.truncate(0); - return buf; - } - } else { - if (u_isIDPart(ch)) { - buf.append(ch); - } else { - break; - } - } - p += U16_LENGTH(ch); - } - pos = p; - return buf; -} - -/** - * Parse an unsigned 31-bit integer at the given offset. Use - * UCharacter.digit() to parse individual characters into digits. - * @param text the text to be parsed - * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the - * offset within text at which to start parsing; it should point - * to a valid digit. On exit, pos[0] is the offset after the last - * parsed character. If the parse failed, it will be unchanged on - * exit. Must be >= 0 on entry. - * @param radix the radix in which to parse; must be >= 2 and <= - * 36. - * @return a non-negative parsed number, or -1 upon parse failure. - * Parse fails if there are no digits, that is, if pos[0] does not - * point to a valid digit on entry, or if the number to be parsed - * does not fit into a 31-bit unsigned integer. - */ -int32_t ICU_Utility::parseNumber(const UnicodeString& text, - int32_t& pos, int8_t radix) { - // assert(pos[0] >= 0); - // assert(radix >= 2); - // assert(radix <= 36); - int32_t n = 0; - int32_t p = pos; - while (p < text.length()) { - UChar32 ch = text.char32At(p); - int32_t d = u_digit(ch, radix); - if (d < 0) { - break; - } - n = radix*n + d; - // ASSUME that when a 32-bit integer overflows it becomes - // negative. E.g., 214748364 * 10 + 8 => negative value. - if (n < 0) { - return -1; - } - ++p; - } - if (p == pos) { - return -1; - } - pos = p; - return n; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/utrace.cpp b/deps/node/deps/icu-small/source/common/utrace.cpp deleted file mode 100644 index 7d0ddc6f..00000000 --- a/deps/node/deps/icu-small/source/common/utrace.cpp +++ /dev/null @@ -1,492 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2003-2014, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: utrace.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -*/ - -#include "unicode/utrace.h" -#include "utracimp.h" -#include "cstring.h" -#include "uassert.h" -#include "ucln_cmn.h" - - -static UTraceEntry *pTraceEntryFunc = NULL; -static UTraceExit *pTraceExitFunc = NULL; -static UTraceData *pTraceDataFunc = NULL; -static const void *gTraceContext = NULL; - -/** - * \var utrace_level - * Trace level variable. Negative for "off". - */ -static int32_t -utrace_level = UTRACE_ERROR; - -U_CAPI void U_EXPORT2 -utrace_entry(int32_t fnNumber) { - if (pTraceEntryFunc != NULL) { - (*pTraceEntryFunc)(gTraceContext, fnNumber); - } -} - - -static const char gExitFmt[] = "Returns."; -static const char gExitFmtValue[] = "Returns %d."; -static const char gExitFmtStatus[] = "Returns. Status = %d."; -static const char gExitFmtValueStatus[] = "Returns %d. Status = %d."; -static const char gExitFmtPtrStatus[] = "Returns %d. Status = %p."; - -U_CAPI void U_EXPORT2 -utrace_exit(int32_t fnNumber, int32_t returnType, ...) { - if (pTraceExitFunc != NULL) { - va_list args; - const char *fmt; - - switch (returnType) { - case 0: - fmt = gExitFmt; - break; - case UTRACE_EXITV_I32: - fmt = gExitFmtValue; - break; - case UTRACE_EXITV_STATUS: - fmt = gExitFmtStatus; - break; - case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS: - fmt = gExitFmtValueStatus; - break; - case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS: - fmt = gExitFmtPtrStatus; - break; - default: - U_ASSERT(FALSE); - fmt = gExitFmt; - } - - va_start(args, returnType); - (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args); - va_end(args); - } -} - - - -U_CAPI void U_EXPORT2 -utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) { - if (pTraceDataFunc != NULL) { - va_list args; - va_start(args, fmt ); - (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args); - va_end(args); - } -} - - -static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) { - int32_t i; - /* Check whether a start of line indenting is needed. Three cases: - * 1. At the start of the first line (output index == 0). - * 2. At the start of subsequent lines (preceeding char in buffer == '\n') - * 3. When preflighting buffer len (buffer capacity is exceeded), when - * a \n is output. Ideally we wouldn't do the indent until the following char - * is received, but that won't work because there's no place to remember that - * the preceding char was \n. Meaning that we may overstimate the - * buffer size needed. No harm done. - */ - if (*outIx==0 || /* case 1. */ - (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') || /* case 2. */ - (c=='\n' && *outIx>=capacity)) /* case 3 */ - { - /* At the start of a line. Indent. */ - for(i=0; i= 0; shiftCount-=4) { - char c = gHexChars[(val >> shiftCount) & 0xf]; - outputChar(c, outBuf, outIx, capacity, 0); - } -} - -/* Output a pointer value in hex. Work with any size of pointer */ -static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) { - uint32_t i; - int32_t incVal = 1; /* +1 for big endian, -1 for little endian */ - char *p = (char *)&val; /* point to current byte to output in the ptr val */ - -#if !U_IS_BIG_ENDIAN - /* Little Endian. Move p to most significant end of the value */ - incVal = -1; - p += sizeof(void *) - 1; -#endif - - /* Loop through the bytes of the ptr as it sits in memory, from - * most significant to least significant end */ - for (i=0; i 0) { - outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity); - outputChar(' ', outBuf, &outIx, capacity, indent); - } - if (vectorLen == -1 && longArg == 0) { - break; - } - } - } - outputChar('[', outBuf, &outIx, capacity, indent); - outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity); - outputChar(']', outBuf, &outIx, capacity, indent); - } - break; - - - default: - /* %. in format string, where . is some character not in the set - * of recognized format chars. Just output it as if % wasn't there. - * (Covers "%%" outputing a single '%') - */ - outputChar(fmtC, outBuf, &outIx, capacity, indent); - } - } - outputChar(0, outBuf, &outIx, capacity, indent); /* Make sure that output is null terminated */ - return outIx + 1; /* outIx + 1 because outIx does not increment when outputing final null. */ -} - - - - -U_CAPI int32_t U_EXPORT2 -utrace_format(char *outBuf, int32_t capacity, - int32_t indent, const char *fmt, ...) { - int32_t retVal; - va_list args; - va_start(args, fmt ); - retVal = utrace_vformat(outBuf, capacity, indent, fmt, args); - va_end(args); - return retVal; -} - - -U_CAPI void U_EXPORT2 -utrace_setFunctions(const void *context, - UTraceEntry *e, UTraceExit *x, UTraceData *d) { - pTraceEntryFunc = e; - pTraceExitFunc = x; - pTraceDataFunc = d; - gTraceContext = context; -} - - -U_CAPI void U_EXPORT2 -utrace_getFunctions(const void **context, - UTraceEntry **e, UTraceExit **x, UTraceData **d) { - *e = pTraceEntryFunc; - *x = pTraceExitFunc; - *d = pTraceDataFunc; - *context = gTraceContext; -} - -U_CAPI void U_EXPORT2 -utrace_setLevel(int32_t level) { - if (level < UTRACE_OFF) { - level = UTRACE_OFF; - } - if (level > UTRACE_VERBOSE) { - level = UTRACE_VERBOSE; - } - utrace_level = level; -} - -U_CAPI int32_t U_EXPORT2 -utrace_getLevel() { - return utrace_level; -} - - -U_CFUNC UBool -utrace_cleanup() { - pTraceEntryFunc = NULL; - pTraceExitFunc = NULL; - pTraceDataFunc = NULL; - utrace_level = UTRACE_OFF; - gTraceContext = NULL; - return TRUE; -} - - -static const char * const -trFnName[] = { - "u_init", - "u_cleanup", - NULL -}; - - -static const char * const -trConvNames[] = { - "ucnv_open", - "ucnv_openPackage", - "ucnv_openAlgorithmic", - "ucnv_clone", - "ucnv_close", - "ucnv_flushCache", - "ucnv_load", - "ucnv_unload", - NULL -}; - - -static const char * const -trCollNames[] = { - "ucol_open", - "ucol_close", - "ucol_strcoll", - "ucol_getSortKey", - "ucol_getLocale", - "ucol_nextSortKeyPart", - "ucol_strcollIter", - "ucol_openFromShortString", - "ucol_strcollUTF8", - NULL -}; - - -U_CAPI const char * U_EXPORT2 -utrace_functionName(int32_t fnNumber) { - if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) { - return trFnName[fnNumber]; - } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) { - return trConvNames[fnNumber - UTRACE_CONVERSION_START]; - } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){ - return trCollNames[fnNumber - UTRACE_COLLATION_START]; - } else { - return "[BOGUS Trace Function Number]"; - } -} diff --git a/deps/node/deps/icu-small/source/common/utracimp.h b/deps/node/deps/icu-small/source/common/utracimp.h deleted file mode 100644 index c2819830..00000000 --- a/deps/node/deps/icu-small/source/common/utracimp.h +++ /dev/null @@ -1,372 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2003-2009, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: utracimp.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2003aug06 -* created by: Markus W. Scherer -* -* Internal header for ICU tracing/logging. -* -* -* Various notes: -* - using a trace level variable to only call trace functions -* when the level is sufficient -* - using the same variable for tracing on/off to never make a function -* call when off -* - the function number is put into a local variable by the entry macro -* and used implicitly to avoid copy&paste/typing mistakes by the developer -* - the application must call utrace_setFunctions() and pass in -* implementations for the trace functions -* - ICU trace macros call ICU functions that route through the function -* pointers if they have been set; -* this avoids an indirection at the call site -* (which would cost more code for another check and for the indirection) -* -* ### TODO Issues: -* - Verify that va_list is portable among compilers for the same platform. -* va_list should be portable because printf() would fail otherwise! -* - Should enum values like UTraceLevel be passed into int32_t-type arguments, -* or should enum types be used? -*/ - -#ifndef __UTRACIMP_H__ -#define __UTRACIMP_H__ - -#include "unicode/utrace.h" -#include - -U_CDECL_BEGIN - -/** - * Traced Function Exit return types. - * Flags indicating the number and types of varargs included in a call - * to a UTraceExit function. - * Bits 0-3: The function return type. First variable param. - * Bit 4: Flag for presence of U_ErrorCode status param. - * @internal - */ -typedef enum UTraceExitVal { - /** The traced function returns no value @internal */ - UTRACE_EXITV_NONE = 0, - /** The traced function returns an int32_t, or compatible, type. @internal */ - UTRACE_EXITV_I32 = 1, - /** The traced function returns a pointer @internal */ - UTRACE_EXITV_PTR = 2, - /** The traced function returns a UBool @internal */ - UTRACE_EXITV_BOOL = 3, - /** Mask to extract the return type values from a UTraceExitVal @internal */ - UTRACE_EXITV_MASK = 0xf, - /** Bit indicating that the traced function includes a UErrorCode parameter @internal */ - UTRACE_EXITV_STATUS = 0x10 -} UTraceExitVal; - -/** - * Trace function for the entry point of a function. - * Do not use directly, use UTRACE_ENTRY instead. - * @param fnNumber The UTraceFunctionNumber for the current function. - * @internal - */ -U_CAPI void U_EXPORT2 -utrace_entry(int32_t fnNumber); - -/** - * Trace function for each exit point of a function. - * Do not use directly, use UTRACE_EXIT* instead. - * @param fnNumber The UTraceFunctionNumber for the current function. - * @param returnType The type of the value returned by the function. - * @param errorCode The UErrorCode value at function exit. See UTRACE_EXIT. - * @internal - */ -U_CAPI void U_EXPORT2 -utrace_exit(int32_t fnNumber, int32_t returnType, ...); - - -/** - * Trace function used inside functions that have a UTRACE_ENTRY() statement. - * Do not use directly, use UTRACE_DATAX() macros instead. - * - * @param utraceFnNumber The number of the current function, from the local - * variable of the same name. - * @param level The trace level for this message. - * @param fmt The trace format string. - * - * @internal - */ -U_CAPI void U_EXPORT2 -utrace_data(int32_t utraceFnNumber, int32_t level, const char *fmt, ...); - -U_CDECL_END - -#if U_ENABLE_TRACING - -/** - * Boolean expression to see if ICU tracing is turned on - * to at least the specified level. - * @internal - */ -#define UTRACE_LEVEL(level) (utrace_getLevel()>=(level)) - -/** - * Flag bit in utraceFnNumber, the local variable added to each function - * with tracing code to contains the function number. - * - * Set the flag if the function's entry is traced, which will cause the - * function's exit to also be traced. utraceFnNumber is uncoditionally - * set at entry, whether or not the entry is traced, so that it will - * always be available for error trace output. - * @internal - */ -#define UTRACE_TRACED_ENTRY 0x80000000 - -/** - * Trace statement for the entry point of a function. - * Stores the function number in a local variable. - * In C code, must be placed immediately after the last variable declaration. - * Must be matched with UTRACE_EXIT() at all function exit points. - * - * Tracing should start with UTRACE_ENTRY after checking for - * U_FAILURE at function entry, so that if a function returns immediately - * because of a pre-existing error condition, it does not show up in the trace, - * consistent with ICU's error handling model. - * - * @param fnNumber The UTraceFunctionNumber for the current function. - * @internal - */ -#define UTRACE_ENTRY(fnNumber) \ - int32_t utraceFnNumber=(fnNumber); \ - if(utrace_getLevel()>=UTRACE_INFO) { \ - utrace_entry(fnNumber); \ - utraceFnNumber |= UTRACE_TRACED_ENTRY; \ - } - - -/** - * Trace statement for the entry point of open and close functions. - * Produces trace output at a less verbose setting than plain UTRACE_ENTRY - * Stores the function number in a local variable. - * In C code, must be placed immediately after the last variable declaration. - * Must be matched with UTRACE_EXIT() at all function exit points. - * - * @param fnNumber The UTraceFunctionNumber for the current function. - * @internal - */ -#define UTRACE_ENTRY_OC(fnNumber) \ - int32_t utraceFnNumber=(fnNumber); \ - if(utrace_getLevel()>=UTRACE_OPEN_CLOSE) { \ - utrace_entry(fnNumber); \ - utraceFnNumber |= UTRACE_TRACED_ENTRY; \ - } - -/** - * Trace statement for each exit point of a function that has a UTRACE_ENTRY() - * statement. - * - * @param errorCode The function's ICU UErrorCode value at function exit, - * or U_ZERO_ERROR if the function does not use a UErrorCode. - * 0==U_ZERO_ERROR indicates success, - * positive values an error (see u_errorName()), - * negative values an informational status. - * - * @internal - */ -#define UTRACE_EXIT() \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ - utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_NONE); \ - }} - -/** - * Trace statement for each exit point of a function that has a UTRACE_ENTRY() - * statement, and that returns a value. - * - * @param val The function's return value, int32_t or comatible type. - * - * @internal - */ -#define UTRACE_EXIT_VALUE(val) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ - utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_I32, val); \ - }} - -#define UTRACE_EXIT_STATUS(status) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ - utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_STATUS, status); \ - }} - -#define UTRACE_EXIT_VALUE_STATUS(val, status) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ - utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS), val, status); \ - }} - -#define UTRACE_EXIT_PTR_STATUS(ptr, status) \ - {if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ - utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS), ptr, status); \ - }} - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes no data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA0(level, fmt) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes one data argument. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA1(level, fmt, a) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes two data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA2(level, fmt, a, b) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a), (b)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes three data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA3(level, fmt, a, b, c) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes four data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA4(level, fmt, a, b, c, d) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes five data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA5(level, fmt, a, b, c, d, e) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes six data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes seven data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes eight data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h)); \ - } - -/** - * Trace statement used inside functions that have a UTRACE_ENTRY() statement. - * Takes nine data arguments. - * The number of arguments for this macro must match the number of inserts - * in the format string. Vector inserts count as two arguments. - * Calls utrace_data() if the level is high enough. - * @internal - */ -#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) \ - if(UTRACE_LEVEL(level)) { \ - utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h), (i)); \ - } - -#else - -/* - * When tracing is disabled, the following macros become empty - */ - -#define UTRACE_LEVEL(level) 0 -#define UTRACE_ENTRY(fnNumber) -#define UTRACE_ENTRY_OC(fnNumber) -#define UTRACE_EXIT() -#define UTRACE_EXIT_VALUE(val) -#define UTRACE_EXIT_STATUS(status) -#define UTRACE_EXIT_VALUE_STATUS(val, status) -#define UTRACE_EXIT_PTR_STATUS(ptr, status) -#define UTRACE_DATA0(level, fmt) -#define UTRACE_DATA1(level, fmt, a) -#define UTRACE_DATA2(level, fmt, a, b) -#define UTRACE_DATA3(level, fmt, a, b, c) -#define UTRACE_DATA4(level, fmt, a, b, c, d) -#define UTRACE_DATA5(level, fmt, a, b, c, d, e) -#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) -#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) -#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) -#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) - -#endif - -#endif diff --git a/deps/node/deps/icu-small/source/common/utrie.cpp b/deps/node/deps/icu-small/source/common/utrie.cpp deleted file mode 100644 index ecf9b1cb..00000000 --- a/deps/node/deps/icu-small/source/common/utrie.cpp +++ /dev/null @@ -1,1234 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: utrie.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001oct20 -* created by: Markus W. Scherer -* -* This is a common implementation of a "folded" trie. -* It is a kind of compressed, serializable table of 16- or 32-bit values associated with -* Unicode code points (0..0x10ffff). -*/ - -#ifdef UTRIE_DEBUG -# include -#endif - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "utrie.h" - -/* miscellaneous ------------------------------------------------------------ */ - -#undef ABS -#define ABS(x) ((x)>=0 ? (x) : -(x)) - -static inline UBool -equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { - while(length>0 && *s==*t) { - ++s; - ++t; - --length; - } - return (UBool)(length==0); -} - -/* Building a trie ----------------------------------------------------------*/ - -U_CAPI UNewTrie * U_EXPORT2 -utrie_open(UNewTrie *fillIn, - uint32_t *aliasData, int32_t maxDataLength, - uint32_t initialValue, uint32_t leadUnitValue, - UBool latin1Linear) { - UNewTrie *trie; - int32_t i, j; - - if( maxDataLengthisAllocated= (UBool)(fillIn==NULL); - - if(aliasData!=NULL) { - trie->data=aliasData; - trie->isDataAllocated=FALSE; - } else { - trie->data=(uint32_t *)uprv_malloc(maxDataLength*4); - if(trie->data==NULL) { - uprv_free(trie); - return NULL; - } - trie->isDataAllocated=TRUE; - } - - /* preallocate and reset the first data block (block index 0) */ - j=UTRIE_DATA_BLOCK_LENGTH; - - if(latin1Linear) { - /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */ - /* made sure above that maxDataLength>=1024 */ - - /* set indexes to point to consecutive data blocks */ - i=0; - do { - /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */ - trie->index[i++]=j; - j+=UTRIE_DATA_BLOCK_LENGTH; - } while(i<(256>>UTRIE_SHIFT)); - } - - /* reset the initially allocated blocks to the initial value */ - trie->dataLength=j; - while(j>0) { - trie->data[--j]=initialValue; - } - - trie->leadUnitValue=leadUnitValue; - trie->indexLength=UTRIE_MAX_INDEX_LENGTH; - trie->dataCapacity=maxDataLength; - trie->isLatin1Linear=latin1Linear; - trie->isCompacted=FALSE; - return trie; -} - -U_CAPI UNewTrie * U_EXPORT2 -utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) { - UNewTrie *trie; - UBool isDataAllocated; - - /* do not clone if other is not valid or already compacted */ - if(other==NULL || other->data==NULL || other->isCompacted) { - return NULL; - } - - /* clone data */ - if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) { - isDataAllocated=FALSE; - } else { - aliasDataCapacity=other->dataCapacity; - aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4); - if(aliasData==NULL) { - return NULL; - } - isDataAllocated=TRUE; - } - - trie=utrie_open(fillIn, aliasData, aliasDataCapacity, - other->data[0], other->leadUnitValue, - other->isLatin1Linear); - if(trie==NULL) { - uprv_free(aliasData); - } else { - uprv_memcpy(trie->index, other->index, sizeof(trie->index)); - uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); - trie->dataLength=other->dataLength; - trie->isDataAllocated=isDataAllocated; - } - - return trie; -} - -U_CAPI void U_EXPORT2 -utrie_close(UNewTrie *trie) { - if(trie!=NULL) { - if(trie->isDataAllocated) { - uprv_free(trie->data); - trie->data=NULL; - } - if(trie->isAllocated) { - uprv_free(trie); - } - } -} - -U_CAPI uint32_t * U_EXPORT2 -utrie_getData(UNewTrie *trie, int32_t *pLength) { - if(trie==NULL || pLength==NULL) { - return NULL; - } - - *pLength=trie->dataLength; - return trie->data; -} - -static int32_t -utrie_allocDataBlock(UNewTrie *trie) { - int32_t newBlock, newTop; - - newBlock=trie->dataLength; - newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH; - if(newTop>trie->dataCapacity) { - /* out of memory in the data array */ - return -1; - } - trie->dataLength=newTop; - return newBlock; -} - -/** - * No error checking for illegal arguments. - * - * @return -1 if no new data block available (out of memory in data array) - * @internal - */ -static int32_t -utrie_getDataBlock(UNewTrie *trie, UChar32 c) { - int32_t indexValue, newBlock; - - c>>=UTRIE_SHIFT; - indexValue=trie->index[c]; - if(indexValue>0) { - return indexValue; - } - - /* allocate a new data block */ - newBlock=utrie_allocDataBlock(trie); - if(newBlock<0) { - /* out of memory in the data array */ - return -1; - } - trie->index[c]=newBlock; - - /* copy-on-write for a block from a setRange() */ - uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH); - return newBlock; -} - -/** - * @return TRUE if the value was successfully set - */ -U_CAPI UBool U_EXPORT2 -utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) { - int32_t block; - - /* valid, uncompacted trie and valid c? */ - if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { - return FALSE; - } - - block=utrie_getDataBlock(trie, c); - if(block<0) { - return FALSE; - } - - trie->data[block+(c&UTRIE_MASK)]=value; - return TRUE; -} - -U_CAPI uint32_t U_EXPORT2 -utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) { - int32_t block; - - /* valid, uncompacted trie and valid c? */ - if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { - if(pInBlockZero!=NULL) { - *pInBlockZero=TRUE; - } - return 0; - } - - block=trie->index[c>>UTRIE_SHIFT]; - if(pInBlockZero!=NULL) { - *pInBlockZero= (UBool)(block==0); - } - - return trie->data[ABS(block)+(c&UTRIE_MASK)]; -} - -/** - * @internal - */ -static void -utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, - uint32_t value, uint32_t initialValue, UBool overwrite) { - uint32_t *pLimit; - - pLimit=block+limit; - block+=start; - if(overwrite) { - while(blockisCompacted || - (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit - ) { - return FALSE; - } - if(start==limit) { - return TRUE; /* nothing to do */ - } - - initialValue=trie->data[0]; - if(start&UTRIE_MASK) { - UChar32 nextStart; - - /* set partial block at [start..following block boundary[ */ - block=utrie_getDataBlock(trie, start); - if(block<0) { - return FALSE; - } - - nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK; - if(nextStart<=limit) { - utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH, - value, initialValue, overwrite); - start=nextStart; - } else { - utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK, - value, initialValue, overwrite); - return TRUE; - } - } - - /* number of positions in the last, partial block */ - rest=limit&UTRIE_MASK; - - /* round down limit to a block boundary */ - limit&=~UTRIE_MASK; - - /* iterate over all-value blocks */ - if(value==initialValue) { - repeatBlock=0; - } else { - repeatBlock=-1; - } - while(startindex[start>>UTRIE_SHIFT]; - if(block>0) { - /* already allocated, fill in value */ - utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite); - } else if(trie->data[-block]!=value && (block==0 || overwrite)) { - /* set the repeatBlock instead of the current block 0 or range block */ - if(repeatBlock>=0) { - trie->index[start>>UTRIE_SHIFT]=-repeatBlock; - } else { - /* create and set and fill the repeatBlock */ - repeatBlock=utrie_getDataBlock(trie, start); - if(repeatBlock<0) { - return FALSE; - } - - /* set the negative block number to indicate that it is a repeat block */ - trie->index[start>>UTRIE_SHIFT]=-repeatBlock; - utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE); - } - } - - start+=UTRIE_DATA_BLOCK_LENGTH; - } - - if(rest>0) { - /* set partial block at [last block boundary..limit[ */ - block=utrie_getDataBlock(trie, start); - if(block<0) { - return FALSE; - } - - utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite); - } - - return TRUE; -} - -static int32_t -_findSameIndexBlock(const int32_t *idx, int32_t indexLength, - int32_t otherBlock) { - int32_t block, i; - - for(block=UTRIE_BMP_INDEX_LENGTH; blockindex; - - /* copy the lead surrogate indexes into a temporary array */ - uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT); - - /* - * set all values for lead surrogate code *units* to leadUnitValue - * so that, by default, runtime lookups will find no data for associated - * supplementary code points, unless there is data for such code points - * which will result in a non-zero folding value below that is set for - * the respective lead units - * - * the above saved the indexes for surrogate code *points* - * fill the indexes with simplified code from utrie_setRange32() - */ - if(trie->leadUnitValue==trie->data[0]) { - block=0; /* leadUnitValue==initialValue, use all-initial-value block */ - } else { - /* create and fill the repeatBlock */ - block=utrie_allocDataBlock(trie); - if(block<0) { - /* data table overflow */ - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE); - block=-block; /* negative block number to indicate that it is a repeat block */ - } - for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) { - trie->index[c]=block; - } - - /* - * Fold significant index values into the area just after the BMP indexes. - * In case the first lead surrogate has significant data, - * its index block must be used first (in which case the folding is a no-op). - * Later all folded index blocks are moved up one to insert the copied - * lead surrogate indexes. - */ - indexLength=UTRIE_BMP_INDEX_LENGTH; - - /* search for any index (stage 1) entries for supplementary code points */ - for(c=0x10000; c<0x110000;) { - if(idx[c>>UTRIE_SHIFT]!=0) { - /* there is data, treat the full block for a lead surrogate */ - c&=~0x3ff; - -#ifdef UTRIE_DEBUG - ++countLeadCUWithData; - /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */ -#endif - - /* is there an identical index block? */ - block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT); - - /* - * get a folded value for [c..c+0x400[ and, - * if different from the value for the lead surrogate code point, - * set it for the lead surrogate code unit - */ - value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT); - if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) { - if(!utrie_set32(trie, U16_LEAD(c), value)) { - /* data table overflow */ - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - - /* if we did not find an identical index block... */ - if(block==indexLength) { - /* move the actual index (stage 1) entries from the supplementary position to the new one */ - uprv_memmove(idx+indexLength, - idx+(c>>UTRIE_SHIFT), - 4*UTRIE_SURROGATE_BLOCK_COUNT); - indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; - } - } - c+=0x400; - } else { - c+=UTRIE_DATA_BLOCK_LENGTH; - } - } -#ifdef UTRIE_DEBUG - if(countLeadCUWithData>0) { - printf("supplementary data for %d lead surrogates\n", countLeadCUWithData); - } -#endif - - /* - * index array overflow? - * This is to guarantee that a folding offset is of the form - * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. - * If the index is too large, then n>=1024 and more than 10 bits are necessary. - * - * In fact, it can only ever become n==1024 with completely unfoldable data and - * the additional block of duplicated values for lead surrogates. - */ - if(indexLength>=UTRIE_MAX_INDEX_LENGTH) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - - /* - * make space for the lead surrogate index block and - * insert it between the BMP indexes and the folded ones - */ - uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT, - idx+UTRIE_BMP_INDEX_LENGTH, - 4*(indexLength-UTRIE_BMP_INDEX_LENGTH)); - uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH, - leadIndexes, - 4*UTRIE_SURROGATE_BLOCK_COUNT); - indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; - -#ifdef UTRIE_DEBUG - printf("trie index count: BMP %ld all Unicode %ld folded %ld\n", - UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength); -#endif - - trie->indexLength=indexLength; -} - -/* - * Set a value in the trie index map to indicate which data block - * is referenced and which one is not. - * utrie_compact() will remove data blocks that are not used at all. - * Set - * - 0 if it is used - * - -1 if it is not used - */ -static void -_findUnusedBlocks(UNewTrie *trie) { - int32_t i; - - /* fill the entire map with "not used" */ - uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4); - - /* mark each block that _is_ used with 0 */ - for(i=0; iindexLength; ++i) { - trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0; - } - - /* never move the all-initial-value block 0 */ - trie->map[0]=0; -} - -static int32_t -_findSameDataBlock(const uint32_t *data, int32_t dataLength, - int32_t otherBlock, int32_t step) { - int32_t block; - - /* ensure that we do not even partially get past dataLength */ - dataLength-=UTRIE_DATA_BLOCK_LENGTH; - - for(block=0; block<=dataLength; block+=step) { - if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) { - return block; - } - } - return -1; -} - -/* - * Compact a folded build-time trie. - * - * The compaction - * - removes blocks that are identical with earlier ones - * - overlaps adjacent blocks as much as possible (if overlap==TRUE) - * - moves blocks in steps of the data granularity - * - moves and overlaps blocks that overlap with multiple values in the overlap region - * - * It does not - * - try to move and overlap blocks that are not already adjacent - */ -static void -utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) { - int32_t i, start, newStart, overlapStart; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - - /* valid, uncompacted trie? */ - if(trie==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if(trie->isCompacted) { - return; /* nothing left to do */ - } - - /* compaction */ - - /* initialize the index map with "block is used/unused" flags */ - _findUnusedBlocks(trie); - - /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */ - if(trie->isLatin1Linear && UTRIE_SHIFT<=8) { - overlapStart=UTRIE_DATA_BLOCK_LENGTH+256; - } else { - overlapStart=UTRIE_DATA_BLOCK_LENGTH; - } - - newStart=UTRIE_DATA_BLOCK_LENGTH; - for(start=newStart; startdataLength;) { - /* - * start: index of first entry of current block - * newStart: index where the current block is to be moved - * (right after current end of already-compacted data) - */ - - /* skip blocks that are not used */ - if(trie->map[start>>UTRIE_SHIFT]<0) { - /* advance start to the next block */ - start+=UTRIE_DATA_BLOCK_LENGTH; - - /* leave newStart with the previous block! */ - continue; - } - - /* search for an identical block */ - if( start>=overlapStart && - (i=_findSameDataBlock(trie->data, newStart, start, - overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH)) - >=0 - ) { - /* found an identical block, set the other block's index value for the current block */ - trie->map[start>>UTRIE_SHIFT]=i; - - /* advance start to the next block */ - start+=UTRIE_DATA_BLOCK_LENGTH; - - /* leave newStart with the previous block! */ - continue; - } - - /* see if the beginning of this block can be overlapped with the end of the previous block */ - if(overlap && start>=overlapStart) { - /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ - for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY; - i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i); - i-=UTRIE_DATA_GRANULARITY) {} - } else { - i=0; - } - - if(i>0) { - /* some overlap */ - trie->map[start>>UTRIE_SHIFT]=newStart-i; - - /* move the non-overlapping indexes to their new positions */ - start+=i; - for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) { - trie->data[newStart++]=trie->data[start++]; - } - } else if(newStartmap[start>>UTRIE_SHIFT]=newStart; - for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) { - trie->data[newStart++]=trie->data[start++]; - } - } else /* no overlap && newStart==start */ { - trie->map[start>>UTRIE_SHIFT]=start; - newStart+=UTRIE_DATA_BLOCK_LENGTH; - start=newStart; - } - } - - /* now adjust the index (stage 1) table */ - for(i=0; iindexLength; ++i) { - trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]; - } - -#ifdef UTRIE_DEBUG - /* we saved some space */ - printf("compacting trie: count of 32-bit words %lu->%lu\n", - (long)trie->dataLength, (long)newStart); -#endif - - trie->dataLength=newStart; -} - -/* serialization ------------------------------------------------------------ */ - -/* - * Default function for the folding value: - * Just store the offset (16 bits) if there is any non-initial-value entry. - * - * The offset parameter is never 0. - * Returning the offset itself is safe for UTRIE_SHIFT>=5 because - * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800 - * which fits into 16-bit trie values; - * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases. - * - * Theoretically, it would be safer for all possible UTRIE_SHIFT including - * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS - * which would always result in a value of 0x40..0x43f - * (start/end 1k blocks of supplementary Unicode code points). - * However, this would be uglier, and would not work for some existing - * binary data file formats. - * - * Also, we do not plan to change UTRIE_SHIFT because it would change binary - * data file formats, and we would probably not make it smaller because of - * the then even larger BMP index length even for empty tries. - */ -static uint32_t U_CALLCONV -defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { - uint32_t value, initialValue; - UChar32 limit; - UBool inBlockZero; - - initialValue=trie->data[0]; - limit=start+0x400; - while(start0 && dt==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - if(getFoldedValue==NULL) { - getFoldedValue=defaultGetFoldedValue; - } - - data = (uint8_t*)dt; - /* fold and compact if necessary, also checks that indexLength is within limits */ - if(!trie->isCompacted) { - /* compact once without overlap to improve folding */ - utrie_compact(trie, FALSE, pErrorCode); - - /* fold the supplementary part of the index array */ - utrie_fold(trie, getFoldedValue, pErrorCode); - - /* compact again with overlap for minimum data array length */ - utrie_compact(trie, TRUE, pErrorCode); - - trie->isCompacted=TRUE; - if(U_FAILURE(*pErrorCode)) { - return 0; - } - } - - /* is dataLength within limits? */ - if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - } - - length=sizeof(UTrieHeader)+2*trie->indexLength; - if(reduceTo16Bits) { - length+=2*trie->dataLength; - } else { - length+=4*trie->dataLength; - } - - if(length>capacity) { - return length; /* preflighting */ - } - -#ifdef UTRIE_DEBUG - printf("**UTrieLengths(serialize)** index:%6ld data:%6ld serialized:%6ld\n", - (long)trie->indexLength, (long)trie->dataLength, (long)length); -#endif - - /* set the header fields */ - header=(UTrieHeader *)data; - data+=sizeof(UTrieHeader); - - header->signature=0x54726965; /* "Trie" */ - header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<options|=UTRIE_OPTIONS_DATA_IS_32_BIT; - } - if(trie->isLatin1Linear) { - header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR; - } - - header->indexLength=trie->indexLength; - header->dataLength=trie->dataLength; - - /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */ - if(reduceTo16Bits) { - /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */ - p=(uint32_t *)trie->index; - dest16=(uint16_t *)data; - for(i=trie->indexLength; i>0; --i) { - *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT); - } - - /* write 16-bit data values */ - p=trie->data; - for(i=trie->dataLength; i>0; --i) { - *dest16++=(uint16_t)*p++; - } - } else { - /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ - p=(uint32_t *)trie->index; - dest16=(uint16_t *)data; - for(i=trie->indexLength; i>0; --i) { - *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT); - } - - /* write 32-bit data values */ - uprv_memcpy(dest16, trie->data, 4*(size_t)trie->dataLength); - } - - return length; -} - -/* inverse to defaultGetFoldedValue() */ -U_CAPI int32_t U_EXPORT2 -utrie_defaultGetFoldingOffset(uint32_t data) { - return (int32_t)data; -} - -U_CAPI int32_t U_EXPORT2 -utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) { - const UTrieHeader *header; - const uint16_t *p16; - uint32_t options; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return -1; - } - - /* enough data for a trie header? */ - if(length<(int32_t)sizeof(UTrieHeader)) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return -1; - } - - /* check the signature */ - header=(const UTrieHeader *)data; - if(header->signature!=0x54726965) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return -1; - } - - /* get the options and check the shift values */ - options=header->options; - if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || - ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT - ) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return -1; - } - trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0); - - /* get the length values */ - trie->indexLength=header->indexLength; - trie->dataLength=header->dataLength; - - length-=(int32_t)sizeof(UTrieHeader); - - /* enough data for the index? */ - if(length<2*trie->indexLength) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return -1; - } - p16=(const uint16_t *)(header+1); - trie->index=p16; - p16+=trie->indexLength; - length-=2*trie->indexLength; - - /* get the data */ - if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) { - if(length<4*trie->dataLength) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return -1; - } - trie->data32=(const uint32_t *)p16; - trie->initialValue=trie->data32[0]; - length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength; - } else { - if(length<2*trie->dataLength) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return -1; - } - - /* the "data16" data is used via the index pointer */ - trie->data32=NULL; - trie->initialValue=trie->index[trie->indexLength]; - length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength; - } - - trie->getFoldingOffset=utrie_defaultGetFoldingOffset; - - return length; -} - -U_CAPI int32_t U_EXPORT2 -utrie_unserializeDummy(UTrie *trie, - void *data, int32_t length, - uint32_t initialValue, uint32_t leadUnitValue, - UBool make16BitTrie, - UErrorCode *pErrorCode) { - uint16_t *p16; - int32_t actualLength, latin1Length, i, limit; - uint16_t block; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return -1; - } - - /* calculate the actual size of the dummy trie data */ - - /* max(Latin-1, block 0) */ - latin1Length= 256; /*UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH;*/ - - trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT; - trie->dataLength=latin1Length; - if(leadUnitValue!=initialValue) { - trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH; - } - - actualLength=trie->indexLength*2; - if(make16BitTrie) { - actualLength+=trie->dataLength*2; - } else { - actualLength+=trie->dataLength*4; - } - - /* enough space for the dummy trie? */ - if(lengthisLatin1Linear=TRUE; - trie->initialValue=initialValue; - - /* fill the index and data arrays */ - p16=(uint16_t *)data; - trie->index=p16; - - if(make16BitTrie) { - /* indexes to block 0 */ - block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT); - limit=trie->indexLength; - for(i=0; i>UTRIE_INDEX_SHIFT); - i=0xd800>>UTRIE_SHIFT; - limit=0xdc00>>UTRIE_SHIFT; - for(; idata32=NULL; - - /* Latin-1 data */ - p16+=trie->indexLength; - for(i=0; iindexLength*2); - - if(leadUnitValue!=initialValue) { - /* indexes for lead surrogate code units to the block after Latin-1 */ - block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); - i=0xd800>>UTRIE_SHIFT; - limit=0xdc00>>UTRIE_SHIFT; - for(; idata32=p32=(uint32_t *)(p16+trie->indexLength); - - /* Latin-1 data */ - for(i=0; igetFoldingOffset=utrie_defaultGetFoldingOffset; - - return actualLength; -} - -/* enumeration -------------------------------------------------------------- */ - -/* default UTrieEnumValue() returns the input value itself */ -static uint32_t U_CALLCONV -enumSameValue(const void * /*context*/, uint32_t value) { - return value; -} - -/** - * Enumerate all ranges of code points with the same relevant values. - * The values are transformed from the raw trie entries by the enumValue function. - */ -U_CAPI void U_EXPORT2 -utrie_enum(const UTrie *trie, - UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) { - const uint32_t *data32; - const uint16_t *idx; - - uint32_t value, prevValue, initialValue; - UChar32 c, prev; - int32_t l, i, j, block, prevBlock, nullBlock, offset; - - /* check arguments */ - if(trie==NULL || trie->index==NULL || enumRange==NULL) { - return; - } - if(enumValue==NULL) { - enumValue=enumSameValue; - } - - idx=trie->index; - data32=trie->data32; - - /* get the enumeration value that corresponds to an initial-value trie data entry */ - initialValue=enumValue(context, trie->initialValue); - - if(data32==NULL) { - nullBlock=trie->indexLength; - } else { - nullBlock=0; - } - - /* set variables for previous range */ - prevBlock=nullBlock; - prev=0; - prevValue=initialValue; - - /* enumerate BMP - the main loop enumerates data blocks */ - for(i=0, c=0; c<=0xffff; ++i) { - if(c==0xd800) { - /* skip lead surrogate code _units_, go to lead surr. code _points_ */ - i=UTRIE_BMP_INDEX_LENGTH; - } else if(c==0xdc00) { - /* go back to regular BMP code points */ - i=c>>UTRIE_SHIFT; - } - - block=idx[i]<0) { - /* the block is not filled with all the same value */ - prevBlock=-1; - } - prev=c; - prevValue=value; - } - ++c; - } - } - } - - /* enumerate supplementary code points */ - for(l=0xd800; l<0xdc00;) { - /* lead surrogate access */ - offset=idx[l>>UTRIE_SHIFT]<getFoldingOffset(value); - if(offset<=0) { - /* no data for this lead surrogate */ - if(prevValue!=initialValue) { - if(prev0) { - /* the block is not filled with all the same value */ - prevBlock=-1; - } - prev=c; - prevValue=value; - } - ++c; - } - } - } while(++i>UTRIE_SHIFT, - - /** - * Shift size for shifting left the index array values. - * Increases possible data size with 16-bit index values at the cost - * of compactability. - * This requires blocks of stage 2 data to be aligned by UTRIE_DATA_GRANULARITY. - * 0..UTRIE_SHIFT - */ - UTRIE_INDEX_SHIFT=2, - - /** The alignment size of a stage 2 data block. Also the granularity for compaction. */ - UTRIE_DATA_GRANULARITY=1<>UTRIE_SHIFT - */ - UTRIE_SURROGATE_BLOCK_COUNT=(1<>UTRIE_SHIFT -}; - -/** - * Length of the index (stage 1) array before folding. - * Maximum number of Unicode code points (0x110000) shifted right by UTRIE_SHIFT. - */ -#define UTRIE_MAX_INDEX_LENGTH (0x110000>>UTRIE_SHIFT) - -/** - * Maximum length of the runtime data (stage 2) array. - * Limited by 16-bit index values that are left-shifted by UTRIE_INDEX_SHIFT. - */ -#define UTRIE_MAX_DATA_LENGTH (0x10000<=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate - */ -typedef int32_t U_CALLCONV -UTrieGetFoldingOffset(uint32_t data); - -/** - * Run-time Trie structure. - * - * Either the data table is 16 bits wide and accessed via the index - * pointer, with each index item increased by indexLength; - * in this case, data32==NULL. - * - * Or the data table is 32 bits wide and accessed via the data32 pointer. - */ -struct UTrie { - const uint16_t *index; - const uint32_t *data32; /* NULL if 16b data is used via index */ - - /** - * This function is not used in _FROM_LEAD, _FROM_BMP, and _FROM_OFFSET_TRAIL macros. - * If convenience macros like _GET16 or _NEXT32 are used, this function must be set. - * - * utrie_unserialize() sets a default function which simply returns - * the lead surrogate's value itself - which is the inverse of the default - * folding function used by utrie_serialize(). - * - * @see UTrieGetFoldingOffset - */ - UTrieGetFoldingOffset *getFoldingOffset; - - int32_t indexLength, dataLength; - uint32_t initialValue; - UBool isLatin1Linear; -}; - -#ifndef __UTRIE2_H__ -typedef struct UTrie UTrie; -#endif - -/** Internal trie getter from an offset (0 if c16 is a BMP/lead units) and a 16-bit unit */ -#define _UTRIE_GET_RAW(trie, data, offset, c16) \ - (trie)->data[ \ - ((int32_t)((trie)->index[(offset)+((c16)>>UTRIE_SHIFT)])<getFoldingOffset(result); \ -\ - /* get the real data from the folded lead/trail units */ \ - if(__offset>0) { \ - (result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \ - } else { \ - (result)=(resultType)((trie)->initialValue); \ - } \ -} - -/** Internal trie getter from a BMP code point, treating a lead surrogate as a normal code point */ -#define _UTRIE_GET_FROM_BMP(trie, data, c16) \ - _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16); - -/** - * Internal trie getter from a code point. - * Could be faster(?) but longer with - * if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); } - */ -#define _UTRIE_GET(trie, data, c32, result, resultType) \ - if((uint32_t)(c32)<=0xffff) { \ - /* BMP code points */ \ - (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \ - } else if((uint32_t)(c32)<=0x10ffff) { \ - /* supplementary code point */ \ - UChar __lead16=U16_LEAD(c32); \ - _UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \ - } else { \ - /* out of range */ \ - (result)=(resultType)((trie)->initialValue); \ - } - -/** Internal next-post-increment: get the next code point (c, c2) and its data */ -#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \ - (c)=*(src)++; \ - if(!U16_IS_LEAD(c)) { \ - (c2)=0; \ - (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \ - } else if((src)!=(limit) && U16_IS_TRAIL((c2)=*(src))) { \ - ++(src); \ - _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \ - } else { \ - /* unpaired lead surrogate code point */ \ - (c2)=0; \ - (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \ - } \ -} - -/** Internal previous: get the previous code point (c, c2) and its data */ -#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \ - (c)=*--(src); \ - if(!U16_IS_SURROGATE(c)) { \ - (c2)=0; \ - (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \ - } else if(!U16_IS_SURROGATE_LEAD(c)) { \ - /* trail surrogate */ \ - if((start)!=(src) && U16_IS_LEAD((c2)=*((src)-1))) { \ - --(src); \ - (result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \ - _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \ - } else { \ - /* unpaired trail surrogate code point */ \ - (c2)=0; \ - (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \ - } \ - } else { \ - /* unpaired lead surrogate code point */ \ - (c2)=0; \ - (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \ - } \ -} - -/* Public UTrie API ---------------------------------------------------------*/ - -/** - * Get a pointer to the contiguous part of the data array - * for the Latin-1 range (U+0000..U+00ff). - * Must be used only if the Latin-1 range is in fact linear - * (trie->isLatin1Linear). - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @return (const uint16_t *) pointer to values for Latin-1 code points - */ -#define UTRIE_GET16_LATIN1(trie) ((trie)->index+(trie)->indexLength+UTRIE_DATA_BLOCK_LENGTH) - -/** - * Get a pointer to the contiguous part of the data array - * for the Latin-1 range (U+0000..U+00ff). - * Must be used only if the Latin-1 range is in fact linear - * (trie->isLatin1Linear). - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @return (const uint32_t *) pointer to values for Latin-1 code points - */ -#define UTRIE_GET32_LATIN1(trie) ((trie)->data32+UTRIE_DATA_BLOCK_LENGTH) - -/** - * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff). - * c16 may be a lead surrogate, which may have a value including a folding offset. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c16 (UChar, in) the input BMP code point - * @return (uint16_t) trie lookup result - */ -#define UTRIE_GET16_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, index, 0, c16) - -/** - * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff). - * c16 may be a lead surrogate, which may have a value including a folding offset. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c16 (UChar, in) the input BMP code point - * @return (uint32_t) trie lookup result - */ -#define UTRIE_GET32_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, data32, 0, c16) - -/** - * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff). - * Even lead surrogate code points are treated as normal code points, - * with unfolded values that may differ from _FROM_LEAD() macro results for them. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c16 (UChar, in) the input BMP code point - * @return (uint16_t) trie lookup result - */ -#define UTRIE_GET16_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, index, c16) - -/** - * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff). - * Even lead surrogate code points are treated as normal code points, - * with unfolded values that may differ from _FROM_LEAD() macro results for them. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c16 (UChar, in) the input BMP code point - * @return (uint32_t) trie lookup result - */ -#define UTRIE_GET32_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, data32, c16) - -/** - * Get a 16-bit trie value from a code point. - * Even lead surrogate code points are treated as normal code points, - * with unfolded values that may differ from _FROM_LEAD() macro results for them. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c32 (UChar32, in) the input code point - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t) - -/** - * Get a 32-bit trie value from a code point. - * Even lead surrogate code points are treated as normal code points, - * with unfolded values that may differ from _FROM_LEAD() macro results for them. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c32 (UChar32, in) the input code point - * @param result (uint32_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t) - -/** - * Get the next code point (c, c2), post-increment src, - * and get a 16-bit value from the trie. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param src (const UChar *, in/out) the source text pointer - * @param limit (const UChar *, in) the limit pointer for the text, or NULL - * @param c (UChar, out) variable for the BMP or lead code unit - * @param c2 (UChar, out) variable for 0 or the trail code unit - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t) - -/** - * Get the next code point (c, c2), post-increment src, - * and get a 32-bit value from the trie. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param src (const UChar *, in/out) the source text pointer - * @param limit (const UChar *, in) the limit pointer for the text, or NULL - * @param c (UChar, out) variable for the BMP or lead code unit - * @param c2 (UChar, out) variable for 0 or the trail code unit - * @param result (uint32_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t) - -/** - * Get the previous code point (c, c2), pre-decrement src, - * and get a 16-bit value from the trie. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param start (const UChar *, in) the start pointer for the text, or NULL - * @param src (const UChar *, in/out) the source text pointer - * @param c (UChar, out) variable for the BMP or lead code unit - * @param c2 (UChar, out) variable for 0 or the trail code unit - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t) - -/** - * Get the previous code point (c, c2), pre-decrement src, - * and get a 32-bit value from the trie. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param start (const UChar *, in) the start pointer for the text, or NULL - * @param src (const UChar *, in/out) the source text pointer - * @param c (UChar, out) variable for the BMP or lead code unit - * @param c2 (UChar, out) variable for 0 or the trail code unit - * @param result (uint32_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t) - -/** - * Get a 16-bit trie value from a pair of surrogates. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c (UChar, in) a lead surrogate - * @param c2 (UChar, in) a trail surrogate - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t) - -/** - * Get a 32-bit trie value from a pair of surrogates. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param c (UChar, in) a lead surrogate - * @param c2 (UChar, in) a trail surrogate - * @param result (uint32_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t) - -/** - * Get a 16-bit trie value from a folding offset (from the value of a lead surrogate) - * and a trail surrogate. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param offset (int32_t, in) the folding offset from the value of a lead surrogate - * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant) - * @return (uint16_t) trie lookup result - */ -#define UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, index, offset, (c2)&0x3ff) - -/** - * Get a 32-bit trie value from a folding offset (from the value of a lead surrogate) - * and a trail surrogate. - * - * @param trie (const UTrie *, in) a pointer to the runtime trie structure - * @param offset (int32_t, in) the folding offset from the value of a lead surrogate - * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant) - * @return (uint32_t) trie lookup result - */ -#define UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, data32, offset, (c2)&0x3ff) - -/* enumeration callback types */ - -/** - * Callback from utrie_enum(), extracts a uint32_t value from a - * trie value. This value will be passed on to the UTrieEnumRange function. - * - * @param context an opaque pointer, as passed into utrie_enum() - * @param value a value from the trie - * @return the value that is to be passed on to the UTrieEnumRange function - */ -typedef uint32_t U_CALLCONV -UTrieEnumValue(const void *context, uint32_t value); - -/** - * Callback from utrie_enum(), is called for each contiguous range - * of code points with the same value as retrieved from the trie and - * transformed by the UTrieEnumValue function. - * - * The callback function can stop the enumeration by returning FALSE. - * - * @param context an opaque pointer, as passed into utrie_enum() - * @param start the first code point in a contiguous range with value - * @param limit one past the last code point in a contiguous range with value - * @param value the value that is set for all code points in [start..limit[ - * @return FALSE to stop the enumeration - */ -typedef UBool U_CALLCONV -UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value); - -/** - * Enumerate efficiently all values in a trie. - * For each entry in the trie, the value to be delivered is passed through - * the UTrieEnumValue function. - * The value is unchanged if that function pointer is NULL. - * - * For each contiguous range of code points with a given value, - * the UTrieEnumRange function is called. - * - * @param trie a pointer to the runtime trie structure - * @param enumValue a pointer to a function that may transform the trie entry value, - * or NULL if the values from the trie are to be used directly - * @param enumRange a pointer to a function that is called for each contiguous range - * of code points with the same value - * @param context an opaque pointer that is passed on to the callback functions - */ -U_CAPI void U_EXPORT2 -utrie_enum(const UTrie *trie, - UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context); - -/** - * Unserialize a trie from 32-bit-aligned memory. - * Inverse of utrie_serialize(). - * Fills the UTrie runtime trie structure with the settings for the trie data. - * - * @param trie a pointer to the runtime trie structure - * @param data a pointer to 32-bit-aligned memory containing trie data - * @param length the number of bytes available at data - * @param pErrorCode an in/out ICU UErrorCode - * @return the number of bytes at data taken up by the trie data - */ -U_CAPI int32_t U_EXPORT2 -utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode); - -/** - * "Unserialize" a dummy trie. - * A dummy trie is an empty runtime trie, used when a real data trie cannot - * be loaded. - * - * The input memory is filled so that the trie always returns the initialValue, - * or the leadUnitValue for lead surrogate code points. - * The Latin-1 part is always set up to be linear. - * - * @param trie a pointer to the runtime trie structure - * @param data a pointer to 32-bit-aligned memory to be filled with the dummy trie data - * @param length the number of bytes available at data (recommended to use UTRIE_DUMMY_SIZE) - * @param initialValue the initial value that is set for all code points - * @param leadUnitValue the value for lead surrogate code _units_ that do not - * have associated supplementary data - * @param pErrorCode an in/out ICU UErrorCode - * - * @see UTRIE_DUMMY_SIZE - * @see utrie_open - */ -U_CAPI int32_t U_EXPORT2 -utrie_unserializeDummy(UTrie *trie, - void *data, int32_t length, - uint32_t initialValue, uint32_t leadUnitValue, - UBool make16BitTrie, - UErrorCode *pErrorCode); - -/** - * Default implementation for UTrie.getFoldingOffset, set automatically by - * utrie_unserialize(). - * Simply returns the lead surrogate's value itself - which is the inverse - * of the default folding function used by utrie_serialize(). - * Exported for static const UTrie structures. - * - * @see UTrieGetFoldingOffset - */ -U_CAPI int32_t U_EXPORT2 -utrie_defaultGetFoldingOffset(uint32_t data); - -/* Building a trie ----------------------------------------------------------*/ - -/** - * Build-time trie structure. - * Opaque definition, here only to make fillIn parameters possible - * for utrie_open() and utrie_clone(). - */ -struct UNewTrie { - /** - * Index values at build-time are 32 bits wide for easier processing. - * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()). - */ - int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT]; - uint32_t *data; - - uint32_t leadUnitValue; - int32_t indexLength, dataCapacity, dataLength; - UBool isAllocated, isDataAllocated; - UBool isLatin1Linear, isCompacted; - - /** - * Map of adjusted indexes, used in utrie_compact(). - * Maps from original indexes to new ones. - */ - int32_t map[UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT]; -}; - -typedef struct UNewTrie UNewTrie; - -/** - * Build-time trie callback function, used with utrie_serialize(). - * This function calculates a lead surrogate's value including a folding offset - * from the 1024 supplementary code points [start..start+1024[ . - * It is U+10000 <= start <= U+10fc00 and (start&0x3ff)==0. - * - * The folding offset is provided by the caller. - * It is offset=UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. - * Instead of the offset itself, n can be stored in 10 bits - - * or fewer if it can be assumed that few lead surrogates have associated data. - * - * The returned value must be - * - not zero if and only if there is relevant data - * for the corresponding 1024 supplementary code points - * - such that UTrie.getFoldingOffset(UNewTrieGetFoldedValue(..., offset))==offset - * - * @return a folded value, or 0 if there is no relevant data for the lead surrogate. - */ -typedef uint32_t U_CALLCONV -UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset); - -/** - * Open a build-time trie structure. - * The size of the build-time data array is specified to avoid allocating a large - * array in all cases. The array itself can also be passed in. - * - * Although the trie is never fully expanded to a linear array, especially when - * utrie_setRange32() is used, the data array could be large during build time. - * The maximum length is - * UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400. - * (Number of Unicode code points + one all-initial-value block + - * possible duplicate entries for 1024 lead surrogates.) - * (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.) - * - * @param fillIn a pointer to a UNewTrie structure to be initialized (will not be released), or - * NULL if one is to be allocated - * @param aliasData a pointer to a data array to be used (will not be released), or - * NULL if one is to be allocated - * @param maxDataLength the capacity of aliasData (if not NULL) or - * the length of the data array to be allocated - * @param initialValue the initial value that is set for all code points - * @param leadUnitValue the value for lead surrogate code _units_ that do not - * have associated supplementary data - * @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and - * kept in a linear, contiguous part of the data array - * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie - */ -U_CAPI UNewTrie * U_EXPORT2 -utrie_open(UNewTrie *fillIn, - uint32_t *aliasData, int32_t maxDataLength, - uint32_t initialValue, uint32_t leadUnitValue, - UBool latin1Linear); - -/** - * Clone a build-time trie structure with all entries. - * - * @param fillIn like in utrie_open() - * @param other the build-time trie structure to clone - * @param aliasData like in utrie_open(), - * used if aliasDataLength>=(capacity of other's data array) - * @param aliasDataLength the length of aliasData - * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie - */ -U_CAPI UNewTrie * U_EXPORT2 -utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataLength); - -/** - * Close a build-time trie structure, and release memory - * that was allocated by utrie_open() or utrie_clone(). - * - * @param trie the build-time trie - */ -U_CAPI void U_EXPORT2 -utrie_close(UNewTrie *trie); - -/** - * Get the data array of a build-time trie. - * The data may be modified, but entries that are equal before - * must still be equal after modification. - * - * @param trie the build-time trie - * @param pLength (out) a pointer to a variable that receives the number - * of entries in the data array - * @return the data array - */ -U_CAPI uint32_t * U_EXPORT2 -utrie_getData(UNewTrie *trie, int32_t *pLength); - -/** - * Set a value for a code point. - * - * @param trie the build-time trie - * @param c the code point - * @param value the value - * @return FALSE if a failure occurred (illegal argument or data array overrun) - */ -U_CAPI UBool U_EXPORT2 -utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value); - -/** - * Get a value from a code point as stored in the build-time trie. - * - * @param trie the build-time trie - * @param c the code point - * @param pInBlockZero if not NULL, then *pInBlockZero is set to TRUE - * iff the value is retrieved from block 0; - * block 0 is the all-initial-value initial block - * @return the value - */ -U_CAPI uint32_t U_EXPORT2 -utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero); - -/** - * Set a value in a range of code points [start..limit[. - * All code points c with start<=c=UTRIE_DATA_BLOCK_LENGTH */ - int32_t dataLength; -} UTrieHeader; - -/** - * Constants for use with UTrieHeader.options. - * @internal - */ -enum { - /** Mask to get the UTRIE_SHIFT value from options. */ - UTRIE_OPTIONS_SHIFT_MASK=0xf, - - /** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */ - UTRIE_OPTIONS_INDEX_SHIFT=4, - - /** If set, then the data (stage 2) array is 32 bits wide. */ - UTRIE_OPTIONS_DATA_IS_32_BIT=0x100, - - /** - * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array - * as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH. - */ - UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200 -}; - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/utrie2.cpp b/deps/node/deps/icu-small/source/common/utrie2.cpp deleted file mode 100644 index 24ef5782..00000000 --- a/deps/node/deps/icu-small/source/common/utrie2.cpp +++ /dev/null @@ -1,663 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: utrie2.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2008aug16 (starting from a copy of utrie.c) -* created by: Markus W. Scherer -* -* This is a common implementation of a Unicode trie. -* It is a kind of compressed, serializable table of 16- or 32-bit values associated with -* Unicode code points (0..0x10ffff). -* This is the second common version of a Unicode trie (hence the name UTrie2). -* See utrie2.h for a comparison. -* -* This file contains only the runtime and enumeration code, for read-only access. -* See utrie2_builder.c for the builder code. -*/ -#include "unicode/utypes.h" -#ifdef UCPTRIE_DEBUG -#include "unicode/umutablecptrie.h" -#endif -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "utrie2.h" -#include "utrie2_impl.h" -#include "uassert.h" - -/* Public UTrie2 API implementation ----------------------------------------- */ - -static uint32_t -get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { - int32_t i2, block; - - if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { - return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; - } - - if(U_IS_LEAD(c) && fromLSCP) { - i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ - (c>>UTRIE2_SHIFT_2); - } else { - i2=trie->index1[c>>UTRIE2_SHIFT_1]+ - ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); - } - block=trie->index2[i2]; - return trie->data[block+(c&UTRIE2_DATA_MASK)]; -} - -U_CAPI uint32_t U_EXPORT2 -utrie2_get32(const UTrie2 *trie, UChar32 c) { - if(trie->data16!=NULL) { - return UTRIE2_GET16(trie, c); - } else if(trie->data32!=NULL) { - return UTRIE2_GET32(trie, c); - } else if((uint32_t)c>0x10ffff) { - return trie->errorValue; - } else { - return get32(trie->newTrie, c, TRUE); - } -} - -U_CAPI uint32_t U_EXPORT2 -utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { - if(!U_IS_LEAD(c)) { - return trie->errorValue; - } - if(trie->data16!=NULL) { - return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); - } else if(trie->data32!=NULL) { - return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); - } else { - return get32(trie->newTrie, c, FALSE); - } -} - -static inline int32_t -u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { - int32_t idx= - _UTRIE2_INDEX_FROM_CP( - trie, - trie->data32==NULL ? trie->indexLength : 0, - c); - return (idx<<3)|i; -} - -U_CAPI int32_t U_EXPORT2 -utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, - const uint8_t *src, const uint8_t *limit) { - int32_t i, length; - i=0; - /* support 64-bit pointers by avoiding cast of arbitrary difference */ - if((limit-src)<=7) { - length=(int32_t)(limit-src); - } else { - length=7; - } - c=utf8_nextCharSafeBody(src, &i, length, c, -1); - return u8Index(trie, c, i); -} - -U_CAPI int32_t U_EXPORT2 -utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, - const uint8_t *start, const uint8_t *src) { - int32_t i, length; - /* support 64-bit pointers by avoiding cast of arbitrary difference */ - if((src-start)<=7) { - i=length=(int32_t)(src-start); - } else { - i=length=7; - start=src-7; - } - c=utf8_prevCharSafeBody(start, 0, &i, c, -1); - i=length-i; /* number of bytes read backward from src */ - return u8Index(trie, c, i); -} - -U_CAPI UTrie2 * U_EXPORT2 -utrie2_openFromSerialized(UTrie2ValueBits valueBits, - const void *data, int32_t length, int32_t *pActualLength, - UErrorCode *pErrorCode) { - const UTrie2Header *header; - const uint16_t *p16; - int32_t actualLength; - - UTrie2 tempTrie; - UTrie2 *trie; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || - valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* enough data for a trie header? */ - if(length<(int32_t)sizeof(UTrie2Header)) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - - /* check the signature */ - header=(const UTrie2Header *)data; - if(header->signature!=UTRIE2_SIG) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - - /* get the options */ - if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - - /* get the length values and offsets */ - uprv_memset(&tempTrie, 0, sizeof(tempTrie)); - tempTrie.indexLength=header->indexLength; - tempTrie.dataLength=header->shiftedDataLength<index2NullOffset; - tempTrie.dataNullOffset=header->dataNullOffset; - - tempTrie.highStart=header->shiftedHighStart<memory=(uint32_t *)data; - trie->length=actualLength; - trie->isMemoryOwned=FALSE; -#ifdef UTRIE2_DEBUG - trie->name="fromSerialized"; -#endif - - /* set the pointers to its index and data arrays */ - p16=(const uint16_t *)(header+1); - trie->index=p16; - p16+=trie->indexLength; - - /* get the data */ - switch(valueBits) { - case UTRIE2_16_VALUE_BITS: - trie->data16=p16; - trie->data32=NULL; - trie->initialValue=trie->index[trie->dataNullOffset]; - trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; - break; - case UTRIE2_32_VALUE_BITS: - trie->data16=NULL; - trie->data32=(const uint32_t *)p16; - trie->initialValue=trie->data32[trie->dataNullOffset]; - trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; - break; - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - - if(pActualLength!=NULL) { - *pActualLength=actualLength; - } - return trie; -} - -U_CAPI UTrie2 * U_EXPORT2 -utrie2_openDummy(UTrie2ValueBits valueBits, - uint32_t initialValue, uint32_t errorValue, - UErrorCode *pErrorCode) { - UTrie2 *trie; - UTrie2Header *header; - uint32_t *p; - uint16_t *dest16; - int32_t indexLength, dataLength, length, i; - int32_t dataMove; /* >0 if the data is moved to the end of the index array */ - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* calculate the total length of the dummy trie data */ - indexLength=UTRIE2_INDEX_1_OFFSET; - dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; - length=(int32_t)sizeof(UTrie2Header)+indexLength*2; - if(valueBits==UTRIE2_16_VALUE_BITS) { - length+=dataLength*2; - } else { - length+=dataLength*4; - } - - /* allocate the trie */ - trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); - if(trie==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - uprv_memset(trie, 0, sizeof(UTrie2)); - trie->memory=uprv_malloc(length); - if(trie->memory==NULL) { - uprv_free(trie); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - trie->length=length; - trie->isMemoryOwned=TRUE; - - /* set the UTrie2 fields */ - if(valueBits==UTRIE2_16_VALUE_BITS) { - dataMove=indexLength; - } else { - dataMove=0; - } - - trie->indexLength=indexLength; - trie->dataLength=dataLength; - trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; - trie->dataNullOffset=(uint16_t)dataMove; - trie->initialValue=initialValue; - trie->errorValue=errorValue; - trie->highStart=0; - trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; -#ifdef UTRIE2_DEBUG - trie->name="dummy"; -#endif - - /* set the header fields */ - header=(UTrie2Header *)trie->memory; - - header->signature=UTRIE2_SIG; /* "Tri2" */ - header->options=(uint16_t)valueBits; - - header->indexLength=(uint16_t)indexLength; - header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); - header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; - header->dataNullOffset=(uint16_t)dataMove; - header->shiftedHighStart=0; - - /* fill the index and data arrays */ - dest16=(uint16_t *)(header+1); - trie->index=dest16; - - /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ - for(i=0; i>UTRIE2_INDEX_SHIFT); /* null data block */ - } - - /* write UTF-8 2-byte index-2 values, not right-shifted */ - for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ - *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); - } - for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ - *dest16++=(uint16_t)dataMove; - } - - /* write the 16/32-bit data array */ - switch(valueBits) { - case UTRIE2_16_VALUE_BITS: - /* write 16-bit data values */ - trie->data16=dest16; - trie->data32=NULL; - for(i=0; i<0x80; ++i) { - *dest16++=(uint16_t)initialValue; - } - for(; i<0xc0; ++i) { - *dest16++=(uint16_t)errorValue; - } - /* highValue and reserved values */ - for(i=0; idata16=NULL; - trie->data32=p; - for(i=0; i<0x80; ++i) { - *p++=initialValue; - } - for(; i<0xc0; ++i) { - *p++=errorValue; - } - /* highValue and reserved values */ - for(i=0; iisMemoryOwned) { - uprv_free(trie->memory); - } - if(trie->newTrie!=NULL) { - uprv_free(trie->newTrie->data); -#ifdef UCPTRIE_DEBUG - umutablecptrie_close(trie->newTrie->t3); -#endif - uprv_free(trie->newTrie); - } - uprv_free(trie); - } -} - -U_CAPI UBool U_EXPORT2 -utrie2_isFrozen(const UTrie2 *trie) { - return (UBool)(trie->newTrie==NULL); -} - -U_CAPI int32_t U_EXPORT2 -utrie2_serialize(const UTrie2 *trie, - void *data, int32_t capacity, - UErrorCode *pErrorCode) { - /* argument check */ - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL || - capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0))) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if(capacity>=trie->length) { - uprv_memcpy(data, trie->memory, trie->length); - } else { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return trie->length; -} - -/* enumeration -------------------------------------------------------------- */ - -#define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) - -/* default UTrie2EnumValue() returns the input value itself */ -static uint32_t U_CALLCONV -enumSameValue(const void * /*context*/, uint32_t value) { - return value; -} - -/** - * Enumerate all ranges of code points with the same relevant values. - * The values are transformed from the raw trie entries by the enumValue function. - * - * Currently requires startnewTrie==NULL) { - /* frozen trie */ - idx=trie->index; - U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ - data32=trie->data32; - - index2NullOffset=trie->index2NullOffset; - nullBlock=trie->dataNullOffset; - } else { - /* unfrozen, mutable trie */ - idx=NULL; - data32=trie->newTrie->data; - U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ - - index2NullOffset=trie->newTrie->index2NullOffset; - nullBlock=trie->newTrie->dataNullOffset; - } - - highStart=trie->highStart; - - /* get the enumeration value that corresponds to an initial-value trie data entry */ - initialValue=enumValue(context, trie->initialValue); - - /* set variables for previous range */ - prevI2Block=-1; - prevBlock=-1; - prev=start; - prevValue=0; - - /* enumerate index-2 blocks */ - for(c=start; c>UTRIE2_SHIFT_2; - } else if(U_IS_SURROGATE_LEAD(c)) { - /* - * Enumerate values for lead surrogate code points, not code units: - * This special block has half the normal length. - */ - i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; - tempLimit=MIN_VALUE(0xdc00, limit); - } else { - /* - * Switch back to the normal part of the index-2 table. - * Enumerate the second half of the surrogates block. - */ - i2Block=0xd800>>UTRIE2_SHIFT_2; - tempLimit=MIN_VALUE(0xe000, limit); - } - } else { - /* supplementary code points */ - if(idx!=NULL) { - i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ - (c>>UTRIE2_SHIFT_1)]; - } else { - i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; - } - if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { - /* - * The index-2 block is the same as the previous one, and filled with prevValue. - * Only possible for supplementary code points because the linear-BMP index-2 - * table creates unique i2Block values. - */ - c+=UTRIE2_CP_PER_INDEX_1_ENTRY; - continue; - } - } - prevI2Block=i2Block; - if(i2Block==index2NullOffset) { - /* this is the null index-2 block */ - if(prevValue!=initialValue) { - if(prev>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; - if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { - i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; - } else { - i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; - } - for(; i2newTrie->index2[i2Block+i2]; - } - if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { - /* the block is the same as the previous one, and filled with prevValue */ - c+=UTRIE2_DATA_BLOCK_LENGTH; - continue; - } - prevBlock=block; - if(block==nullBlock) { - /* this is the null data block */ - if(prevValue!=initialValue) { - if(prevlimit) { - c=limit; /* could be higher if in the index2NullOffset */ - } else if(chighValueIndex] : - idx[trie->highValueIndex]; - } else { - highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; - } - value=enumValue(context, highValue); - if(value!=prevValue) { - if(prev=codePointStart) { - codePoint=U_SENTINEL; - return static_cast(trie->errorValue); - } - uint16_t result; - UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); - return result; -} - -uint16_t ForwardUTrie2StringIterator::next16() { - codePointStart=codePointLimit; - if(codePointLimit==limit) { - codePoint=U_SENTINEL; - return static_cast(trie->errorValue); - } - uint16_t result; - UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); - return result; -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/utrie2.h b/deps/node/deps/icu-small/source/common/utrie2.h deleted file mode 100644 index 75028ee2..00000000 --- a/deps/node/deps/icu-small/source/common/utrie2.h +++ /dev/null @@ -1,955 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: utrie2.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2008aug16 (starting from a copy of utrie.h) -* created by: Markus W. Scherer -*/ - -#ifndef __UTRIE2_H__ -#define __UTRIE2_H__ - -#include "unicode/utypes.h" -#include "unicode/utf8.h" -#include "putilimp.h" - -U_CDECL_BEGIN - -struct UTrie; /* forward declaration */ -#ifndef __UTRIE_H__ -typedef struct UTrie UTrie; -#endif - -/** - * \file - * - * This is a common implementation of a Unicode trie. - * It is a kind of compressed, serializable table of 16- or 32-bit values associated with - * Unicode code points (0..0x10ffff). (A map from code points to integers.) - * - * This is the second common version of a Unicode trie (hence the name UTrie2). - * Compared with UTrie version 1: - * - Still splitting BMP code points 11:5 bits for index and data table lookups. - * - Still separate data for lead surrogate code _units_ vs. code _points_, - * but the lead surrogate code unit values are not required any more - * for data lookup for supplementary code points. - * - The "folding" mechanism is removed. In UTrie version 1, this somewhat - * hard-to-explain mechanism was meant to be used for optimized UTF-16 - * processing, with application-specific encoding of indexing bits - * in the lead surrogate data for the associated supplementary code points. - * - For the last single-value code point range (ending with U+10ffff), - * the starting code point ("highStart") and the value are stored. - * - For supplementary code points U+10000..highStart-1 a three-table lookup - * (two index tables and one data table) is used. The first index - * is truncated, omitting both the BMP portion and the high range. - * - There is a special small index for 2-byte UTF-8, and the initial data - * entries are designed for fast 1/2-byte UTF-8 lookup. - * Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all, - * and the associated 2-byte indexes are unused. - */ - -/** - * Trie structure. - * Use only with public API macros and functions. - */ -struct UTrie2; -typedef struct UTrie2 UTrie2; - -/* Public UTrie2 API functions: read-only access ---------------------------- */ - -/** - * Selectors for the width of a UTrie2 data value. - */ -enum UTrie2ValueBits { - /** 16 bits per UTrie2 data value. */ - UTRIE2_16_VALUE_BITS, - /** 32 bits per UTrie2 data value. */ - UTRIE2_32_VALUE_BITS, - /** Number of selectors for the width of UTrie2 data values. */ - UTRIE2_COUNT_VALUE_BITS -}; -typedef enum UTrie2ValueBits UTrie2ValueBits; - -/** - * Open a frozen trie from its serialized from, stored in 32-bit-aligned memory. - * Inverse of utrie2_serialize(). - * The memory must remain valid and unchanged as long as the trie is used. - * You must utrie2_close() the trie once you are done using it. - * - * @param valueBits selects the data entry size; results in an - * U_INVALID_FORMAT_ERROR if it does not match the serialized form - * @param data a pointer to 32-bit-aligned memory containing the serialized form of a UTrie2 - * @param length the number of bytes available at data; - * can be more than necessary - * @param pActualLength receives the actual number of bytes at data taken up by the trie data; - * can be NULL - * @param pErrorCode an in/out ICU UErrorCode - * @return the unserialized trie - * - * @see utrie2_open - * @see utrie2_serialize - */ -U_CAPI UTrie2 * U_EXPORT2 -utrie2_openFromSerialized(UTrie2ValueBits valueBits, - const void *data, int32_t length, int32_t *pActualLength, - UErrorCode *pErrorCode); - -/** - * Open a frozen, empty "dummy" trie. - * A dummy trie is an empty trie, used when a real data trie cannot - * be loaded. Equivalent to calling utrie2_open() and utrie2_freeze(), - * but without internally creating and compacting/serializing the - * builder data structure. - * - * The trie always returns the initialValue, - * or the errorValue for out-of-range code points and illegal UTF-8. - * - * You must utrie2_close() the trie once you are done using it. - * - * @param valueBits selects the data entry size - * @param initialValue the initial value that is set for all code points - * @param errorValue the value for out-of-range code points and illegal UTF-8 - * @param pErrorCode an in/out ICU UErrorCode - * @return the dummy trie - * - * @see utrie2_openFromSerialized - * @see utrie2_open - */ -U_CAPI UTrie2 * U_EXPORT2 -utrie2_openDummy(UTrie2ValueBits valueBits, - uint32_t initialValue, uint32_t errorValue, - UErrorCode *pErrorCode); - -/** - * Get a value from a code point as stored in the trie. - * Easier to use than UTRIE2_GET16() and UTRIE2_GET32() but slower. - * Easier to use because, unlike the macros, this function works on all UTrie2 - * objects, frozen or not, holding 16-bit or 32-bit data values. - * - * @param trie the trie - * @param c the code point - * @return the value - */ -U_CAPI uint32_t U_EXPORT2 -utrie2_get32(const UTrie2 *trie, UChar32 c); - -/* enumeration callback types */ - -/** - * Callback from utrie2_enum(), extracts a uint32_t value from a - * trie value. This value will be passed on to the UTrie2EnumRange function. - * - * @param context an opaque pointer, as passed into utrie2_enum() - * @param value a value from the trie - * @return the value that is to be passed on to the UTrie2EnumRange function - */ -typedef uint32_t U_CALLCONV -UTrie2EnumValue(const void *context, uint32_t value); - -/** - * Callback from utrie2_enum(), is called for each contiguous range - * of code points with the same value as retrieved from the trie and - * transformed by the UTrie2EnumValue function. - * - * The callback function can stop the enumeration by returning FALSE. - * - * @param context an opaque pointer, as passed into utrie2_enum() - * @param start the first code point in a contiguous range with value - * @param end the last code point in a contiguous range with value (inclusive) - * @param value the value that is set for all code points in [start..end] - * @return FALSE to stop the enumeration - */ -typedef UBool U_CALLCONV -UTrie2EnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value); - -/** - * Enumerate efficiently all values in a trie. - * Do not modify the trie during the enumeration. - * - * For each entry in the trie, the value to be delivered is passed through - * the UTrie2EnumValue function. - * The value is unchanged if that function pointer is NULL. - * - * For each contiguous range of code points with a given (transformed) value, - * the UTrie2EnumRange function is called. - * - * @param trie a pointer to the trie - * @param enumValue a pointer to a function that may transform the trie entry value, - * or NULL if the values from the trie are to be used directly - * @param enumRange a pointer to a function that is called for each contiguous range - * of code points with the same (transformed) value - * @param context an opaque pointer that is passed on to the callback functions - */ -U_CAPI void U_EXPORT2 -utrie2_enum(const UTrie2 *trie, - UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context); - -/* Building a trie ---------------------------------------------------------- */ - -/** - * Open an empty, writable trie. At build time, 32-bit data values are used. - * utrie2_freeze() takes a valueBits parameter - * which determines the data value width in the serialized and frozen forms. - * You must utrie2_close() the trie once you are done using it. - * - * @param initialValue the initial value that is set for all code points - * @param errorValue the value for out-of-range code points and illegal UTF-8 - * @param pErrorCode an in/out ICU UErrorCode - * @return a pointer to the allocated and initialized new trie - */ -U_CAPI UTrie2 * U_EXPORT2 -utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); - -/** - * Clone a trie. - * You must utrie2_close() the clone once you are done using it. - * - * @param other the trie to clone - * @param pErrorCode an in/out ICU UErrorCode - * @return a pointer to the new trie clone - */ -U_CAPI UTrie2 * U_EXPORT2 -utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode); - -/** - * Clone a trie. The clone will be mutable/writable even if the other trie - * is frozen. (See utrie2_freeze().) - * You must utrie2_close() the clone once you are done using it. - * - * @param other the trie to clone - * @param pErrorCode an in/out ICU UErrorCode - * @return a pointer to the new trie clone - */ -U_CAPI UTrie2 * U_EXPORT2 -utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode); - -/** - * Close a trie and release associated memory. - * - * @param trie the trie - */ -U_CAPI void U_EXPORT2 -utrie2_close(UTrie2 *trie); - -/** - * Set a value for a code point. - * - * @param trie the unfrozen trie - * @param c the code point - * @param value the value - * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: - * - U_NO_WRITE_PERMISSION if the trie is frozen - */ -U_CAPI void U_EXPORT2 -utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); - -/** - * Set a value in a range of code points [start..end]. - * All code points c with start<=c<=end will get the value if - * overwrite is TRUE or if the old value is the initial value. - * - * @param trie the unfrozen trie - * @param start the first code point to get the value - * @param end the last code point to get the value (inclusive) - * @param value the value - * @param overwrite flag for whether old non-initial values are to be overwritten - * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: - * - U_NO_WRITE_PERMISSION if the trie is frozen - */ -U_CAPI void U_EXPORT2 -utrie2_setRange32(UTrie2 *trie, - UChar32 start, UChar32 end, - uint32_t value, UBool overwrite, - UErrorCode *pErrorCode); - -/** - * Freeze a trie. Make it immutable (read-only) and compact it, - * ready for serialization and for use with fast macros. - * Functions to set values will fail after serializing. - * - * A trie can be frozen only once. If this function is called again with different - * valueBits then it will set a U_ILLEGAL_ARGUMENT_ERROR. - * - * @param trie the trie - * @param valueBits selects the data entry size; if smaller than 32 bits, then - * the values stored in the trie will be truncated - * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: - * - U_INDEX_OUTOFBOUNDS_ERROR if the compacted index or data arrays are too long - * for serialization - * (the trie will be immutable and usable, - * but not frozen and not usable with the fast macros) - * - * @see utrie2_cloneAsThawed - */ -U_CAPI void U_EXPORT2 -utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode); - -/** - * Test if the trie is frozen. (See utrie2_freeze().) - * - * @param trie the trie - * @return TRUE if the trie is frozen, that is, immutable, ready for serialization - * and for use with fast macros - */ -U_CAPI UBool U_EXPORT2 -utrie2_isFrozen(const UTrie2 *trie); - -/** - * Serialize a frozen trie into 32-bit aligned memory. - * If the trie is not frozen, then the function returns with a U_ILLEGAL_ARGUMENT_ERROR. - * A trie can be serialized multiple times. - * - * @param trie the frozen trie - * @param data a pointer to 32-bit-aligned memory to be filled with the trie data, - * can be NULL if capacity==0 - * @param capacity the number of bytes available at data, - * or 0 for preflighting - * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: - * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization - * - U_ILLEGAL_ARGUMENT_ERROR if the trie is not frozen or the data and capacity - * parameters are bad - * @return the number of bytes written or needed for the trie - * - * @see utrie2_openFromSerialized() - */ -U_CAPI int32_t U_EXPORT2 -utrie2_serialize(const UTrie2 *trie, - void *data, int32_t capacity, - UErrorCode *pErrorCode); - -/* Public UTrie2 API: miscellaneous functions ------------------------------- */ - -/** - * Build a UTrie2 (version 2) from a UTrie (version 1). - * Enumerates all values in the UTrie and builds a UTrie2 with the same values. - * The resulting UTrie2 will be frozen. - * - * @param trie1 the runtime UTrie structure to be enumerated - * @param errorValue the value for out-of-range code points and illegal UTF-8 - * @param pErrorCode an in/out ICU UErrorCode - * @return The frozen UTrie2 with the same values as the UTrie. - */ -U_CAPI UTrie2 * U_EXPORT2 -utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode); - -/* Public UTrie2 API macros ------------------------------------------------- */ - -/* - * These macros provide fast data lookup from a frozen trie. - * They will crash when used on an unfrozen trie. - */ - -/** - * Return a 16-bit trie value from a code point, with range checking. - * Returns trie->errorValue if c is not in the range 0..U+10ffff. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param c (UChar32, in) the input code point - * @return (uint16_t) The code point's trie value. - */ -#define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c)) - -/** - * Return a 32-bit trie value from a code point, with range checking. - * Returns trie->errorValue if c is not in the range 0..U+10ffff. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param c (UChar32, in) the input code point - * @return (uint32_t) The code point's trie value. - */ -#define UTRIE2_GET32(trie, c) _UTRIE2_GET((trie), data32, 0, (c)) - -/** - * UTF-16: Get the next code point (UChar32 c, out), post-increment src, - * and get a 16-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param src (const UChar *, in/out) the source text pointer - * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated - * @param c (UChar32, out) variable for the code point - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE2_U16_NEXT16(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, index, src, limit, c, result) - -/** - * UTF-16: Get the next code point (UChar32 c, out), post-increment src, - * and get a 32-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param src (const UChar *, in/out) the source text pointer - * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated - * @param c (UChar32, out) variable for the code point - * @param result (uint32_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE2_U16_NEXT32(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, data32, src, limit, c, result) - -/** - * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src, - * and get a 16-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param start (const UChar *, in) the start pointer for the text - * @param src (const UChar *, in/out) the source text pointer - * @param c (UChar32, out) variable for the code point - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE2_U16_PREV16(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, index, start, src, c, result) - -/** - * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src, - * and get a 32-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param start (const UChar *, in) the start pointer for the text - * @param src (const UChar *, in/out) the source text pointer - * @param c (UChar32, out) variable for the code point - * @param result (uint32_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE2_U16_PREV32(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, data32, start, src, c, result) - -/** - * UTF-8: Post-increment src and get a 16-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param src (const char *, in/out) the source text pointer - * @param limit (const char *, in) the limit pointer for the text (must not be NULL) - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE2_U8_NEXT16(trie, src, limit, result)\ - _UTRIE2_U8_NEXT(trie, data16, index, src, limit, result) - -/** - * UTF-8: Post-increment src and get a 32-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param src (const char *, in/out) the source text pointer - * @param limit (const char *, in) the limit pointer for the text (must not be NULL) - * @param result (uint16_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE2_U8_NEXT32(trie, src, limit, result) \ - _UTRIE2_U8_NEXT(trie, data32, data32, src, limit, result) - -/** - * UTF-8: Pre-decrement src and get a 16-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param start (const char *, in) the start pointer for the text - * @param src (const char *, in/out) the source text pointer - * @param result (uint16_t, out) uint16_t variable for the trie lookup result - */ -#define UTRIE2_U8_PREV16(trie, start, src, result) \ - _UTRIE2_U8_PREV(trie, data16, index, start, src, result) - -/** - * UTF-8: Pre-decrement src and get a 32-bit value from the trie. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param start (const char *, in) the start pointer for the text - * @param src (const char *, in/out) the source text pointer - * @param result (uint16_t, out) uint32_t variable for the trie lookup result - */ -#define UTRIE2_U8_PREV32(trie, start, src, result) \ - _UTRIE2_U8_PREV(trie, data32, data32, start, src, result) - -/* Public UTrie2 API: optimized UTF-16 access ------------------------------- */ - -/* - * The following functions and macros are used for highly optimized UTF-16 - * text processing. The UTRIE2_U16_NEXTxy() macros do not depend on these. - * - * A UTrie2 stores separate values for lead surrogate code _units_ vs. code _points_. - * UTF-16 text processing can be optimized by detecting surrogate pairs and - * assembling supplementary code points only when there is non-trivial data - * available. - * - * At build-time, use utrie2_enumForLeadSurrogate() to see if there - * is non-trivial (non-initialValue) data for any of the supplementary - * code points associated with a lead surrogate. - * If so, then set a special (application-specific) value for the - * lead surrogate code _unit_, with utrie2_set32ForLeadSurrogateCodeUnit(). - * - * At runtime, use UTRIE2_GET16_FROM_U16_SINGLE_LEAD() or - * UTRIE2_GET32_FROM_U16_SINGLE_LEAD() per code unit. If there is non-trivial - * data and the code unit is a lead surrogate, then check if a trail surrogate - * follows. If so, assemble the supplementary code point with - * U16_GET_SUPPLEMENTARY() and look up its value with UTRIE2_GET16_FROM_SUPP() - * or UTRIE2_GET32_FROM_SUPP(); otherwise reset the lead - * surrogate's value or do a code point lookup for it. - * - * If there is only trivial data for lead and trail surrogates, then processing - * can often skip them. For example, in normalization or case mapping - * all characters that do not have any mappings are simply copied as is. - */ - -/** - * Get a value from a lead surrogate code unit as stored in the trie. - * - * @param trie the trie - * @param c the code unit (U+D800..U+DBFF) - * @return the value - */ -U_CAPI uint32_t U_EXPORT2 -utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c); - -/** - * Enumerate the trie values for the 1024=0x400 code points - * corresponding to a given lead surrogate. - * For example, for the lead surrogate U+D87E it will enumerate the values - * for [U+2F800..U+2FC00[. - * Used by data builder code that sets special lead surrogate code unit values - * for optimized UTF-16 string processing. - * - * Do not modify the trie during the enumeration. - * - * Except for the limited code point range, this functions just like utrie2_enum(): - * For each entry in the trie, the value to be delivered is passed through - * the UTrie2EnumValue function. - * The value is unchanged if that function pointer is NULL. - * - * For each contiguous range of code points with a given (transformed) value, - * the UTrie2EnumRange function is called. - * - * @param trie a pointer to the trie - * @param enumValue a pointer to a function that may transform the trie entry value, - * or NULL if the values from the trie are to be used directly - * @param enumRange a pointer to a function that is called for each contiguous range - * of code points with the same (transformed) value - * @param context an opaque pointer that is passed on to the callback functions - */ -U_CAPI void U_EXPORT2 -utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, - UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, - const void *context); - -/** - * Set a value for a lead surrogate code unit. - * - * @param trie the unfrozen trie - * @param lead the lead surrogate code unit (U+D800..U+DBFF) - * @param value the value - * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: - * - U_NO_WRITE_PERMISSION if the trie is frozen - */ -U_CAPI void U_EXPORT2 -utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, - UChar32 lead, uint32_t value, - UErrorCode *pErrorCode); - -/** - * Return a 16-bit trie value from a UTF-16 single/lead code unit (<=U+ffff). - * Same as UTRIE2_GET16() if c is a BMP code point except for lead surrogates, - * but smaller and faster. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff - * @return (uint16_t) The code unit's trie value. - */ -#define UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), index, c) - -/** - * Return a 32-bit trie value from a UTF-16 single/lead code unit (<=U+ffff). - * Same as UTRIE2_GET32() if c is a BMP code point except for lead surrogates, - * but smaller and faster. - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff - * @return (uint32_t) The code unit's trie value. - */ -#define UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), data32, c) - -/** - * Return a 16-bit trie value from a supplementary code point (U+10000..U+10ffff). - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff - * @return (uint16_t) The code point's trie value. - */ -#define UTRIE2_GET16_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), index, c) - -/** - * Return a 32-bit trie value from a supplementary code point (U+10000..U+10ffff). - * - * @param trie (const UTrie2 *, in) a frozen trie - * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff - * @return (uint32_t) The code point's trie value. - */ -#define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c) - -U_CDECL_END - -/* C++ convenience wrappers ------------------------------------------------- */ - -#ifdef __cplusplus - -#include "unicode/utf.h" -#include "mutex.h" - -U_NAMESPACE_BEGIN - -// Use the Forward/Backward subclasses below. -class UTrie2StringIterator : public UMemory { -public: - UTrie2StringIterator(const UTrie2 *t, const UChar *p) : - trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {} - - const UTrie2 *trie; - const UChar *codePointStart, *codePointLimit; - UChar32 codePoint; -}; - -class BackwardUTrie2StringIterator : public UTrie2StringIterator { -public: - BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) : - UTrie2StringIterator(t, p), start(s) {} - - uint16_t previous16(); - - const UChar *start; -}; - -class ForwardUTrie2StringIterator : public UTrie2StringIterator { -public: - // Iteration limit l can be NULL. - // In that case, the caller must detect c==0 and stop. - ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) : - UTrie2StringIterator(t, p), limit(l) {} - - uint16_t next16(); - - const UChar *limit; -}; - -U_NAMESPACE_END - -#endif - -/* Internal definitions ----------------------------------------------------- */ - -U_CDECL_BEGIN - -/** Build-time trie structure. */ -struct UNewTrie2; -typedef struct UNewTrie2 UNewTrie2; - -/* - * Trie structure definition. - * - * Either the data table is 16 bits wide and accessed via the index - * pointer, with each index item increased by indexLength; - * in this case, data32==NULL, and data16 is used for direct ASCII access. - * - * Or the data table is 32 bits wide and accessed via the data32 pointer. - */ -struct UTrie2 { - /* protected: used by macros and functions for reading values */ - const uint16_t *index; - const uint16_t *data16; /* for fast UTF-8 ASCII access, if 16b data */ - const uint32_t *data32; /* NULL if 16b data is used via index */ - - int32_t indexLength, dataLength; - uint16_t index2NullOffset; /* 0xffff if there is no dedicated index-2 null block */ - uint16_t dataNullOffset; - uint32_t initialValue; - /** Value returned for out-of-range code points and illegal UTF-8. */ - uint32_t errorValue; - - /* Start of the last range which ends at U+10ffff, and its value. */ - UChar32 highStart; - int32_t highValueIndex; - - /* private: used by builder and unserialization functions */ - void *memory; /* serialized bytes; NULL if not frozen yet */ - int32_t length; /* number of serialized bytes at memory; 0 if not frozen yet */ - UBool isMemoryOwned; /* TRUE if the trie owns the memory */ - UBool padding1; - int16_t padding2; - UNewTrie2 *newTrie; /* builder object; NULL when frozen */ - -#ifdef UTRIE2_DEBUG - const char *name; -#endif -}; - -/** - * Trie constants, defining shift widths, index array lengths, etc. - * - * These are needed for the runtime macros but users can treat these as - * implementation details and skip to the actual public API further below. - */ -enum { - /** Shift size for getting the index-1 table offset. */ - UTRIE2_SHIFT_1=6+5, - - /** Shift size for getting the index-2 table offset. */ - UTRIE2_SHIFT_2=5, - - /** - * Difference between the two shift sizes, - * for getting an index-1 offset from an index-2 offset. 6=11-5 - */ - UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2, - - /** - * Number of index-1 entries for the BMP. 32=0x20 - * This part of the index-1 table is omitted from the serialized form. - */ - UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1, - - /** Number of code points per index-1 table entry. 2048=0x800 */ - UTRIE2_CP_PER_INDEX_1_ENTRY=1<>UTRIE2_SHIFT_2. - */ - UTRIE2_INDEX_2_OFFSET=0, - - /** - * The part of the index-2 table for U+D800..U+DBFF stores values for - * lead surrogate code _units_ not code _points_. - * Values for lead surrogate code _points_ are indexed with this portion of the table. - * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.) - */ - UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2, - UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2, - - /** Count the lengths of both BMP pieces. 2080=0x820 */ - UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH, - - /** - * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. - * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2. - */ - UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, - UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6, /* U+0800 is the first code point after 2-byte UTF-8 */ - - /** - * The index-1 table, only used for supplementary code points, at offset 2112=0x840. - * Variable length, for code points up to highStart, where the last single-value range starts. - * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1. - * (For 0x100000 supplementary code points U+10000..U+10ffff.) - * - * The part of the index-2 table for supplementary code points starts - * after this index-1 table. - * - * Both the index-1 table and the following part of the index-2 table - * are omitted completely if there is only BMP data. - */ - UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH, - UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1, - - /* - * Fixed layout of the first part of the data array. ----------------------- - * Starts with 4 blocks (128=0x80 entries) for ASCII. - */ - - /** - * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80. - * Used with linear access for single bytes 0..0xbf for simple error handling. - * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH. - */ - UTRIE2_BAD_UTF8_DATA_OFFSET=0x80, - - /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */ - UTRIE2_DATA_START_OFFSET=0xc0 -}; - -/* Internal functions and macros -------------------------------------------- */ - -/** - * Internal function for part of the UTRIE2_U8_NEXTxx() macro implementations. - * Do not call directly. - * @internal - */ -U_INTERNAL int32_t U_EXPORT2 -utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, - const uint8_t *src, const uint8_t *limit); - -/** - * Internal function for part of the UTRIE2_U8_PREVxx() macro implementations. - * Do not call directly. - * @internal - */ -U_INTERNAL int32_t U_EXPORT2 -utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, - const uint8_t *start, const uint8_t *src); - - -/** Internal low-level trie getter. Returns a data index. */ -#define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \ - (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \ - <>UTRIE2_SHIFT_2), trieIndex, c) - -/** Internal trie getter from a BMP code point. Returns the data index. */ -#define _UTRIE2_INDEX_FROM_BMP(trieIndex, c) \ - _UTRIE2_INDEX_RAW(U_IS_LEAD(c) ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \ - trieIndex, c) - -/** Internal trie getter from a supplementary code point below highStart. Returns the data index. */ -#define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \ - (((int32_t)((trieIndex)[ \ - (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \ - ((c)>>UTRIE2_SHIFT_1)]+ \ - (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \ - <index, c) : \ - (uint32_t)(c)<=0xffff ? \ - _UTRIE2_INDEX_RAW( \ - (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \ - (trie)->index, c) : \ - (uint32_t)(c)>0x10ffff ? \ - (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \ - (c)>=(trie)->highStart ? \ - (trie)->highValueIndex : \ - _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)) - -/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data. */ -#define _UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c) \ - (trie)->data[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD((trie)->index, c)] - -/** Internal trie getter from a supplementary code point. Returns the data. */ -#define _UTRIE2_GET_FROM_SUPP(trie, data, c) \ - (trie)->data[(c)>=(trie)->highStart ? (trie)->highValueIndex : \ - _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)] - -/** - * Internal trie getter from a code point, with checking that c is in 0..10FFFF. - * Returns the data. - */ -#define _UTRIE2_GET(trie, data, asciiOffset, c) \ - (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)] - -/** Internal next-post-increment: get the next code point (c) and its data. */ -#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) { \ - { \ - uint16_t __c2; \ - (c)=*(src)++; \ - if(!U16_IS_LEAD(c)) { \ - (result)=_UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c); \ - } else if((src)==(limit) || !U16_IS_TRAIL(__c2=*(src))) { \ - (result)=(trie)->data[_UTRIE2_INDEX_FROM_LSCP((trie)->index, c)]; \ - } else { \ - ++(src); \ - (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ - } \ - } \ -} - -/** Internal pre-decrement-previous: get the previous code point (c) and its data */ -#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) { \ - { \ - uint16_t __c2; \ - (c)=*--(src); \ - if(!U16_IS_TRAIL(c) || (src)==(start) || !U16_IS_LEAD(__c2=*((src)-1))) { \ - (result)=(trie)->data[_UTRIE2_INDEX_FROM_BMP((trie)->index, c)]; \ - } else { \ - --(src); \ - (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ - } \ - } \ -} - -/** Internal UTF-8 next-post-increment: get the next code point's data. */ -#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) { \ - uint8_t __lead=(uint8_t)*(src)++; \ - if(U8_IS_SINGLE(__lead)) { \ - (result)=(trie)->ascii[__lead]; \ - } else { \ - uint8_t __t1, __t2; \ - if( /* handle U+0800..U+FFFF inline */ \ - 0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \ - U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \ - (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \ - ) { \ - (src)+=2; \ - (result)=(trie)->data[ \ - ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \ - ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \ - <=0xc2 && (src)<(limit) && \ - (__t1=(uint8_t)(*(src)-0x80))<=0x3f \ - ) { \ - ++(src); \ - (result)=(trie)->data[ \ - (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \ - __t1]; \ - } else { \ - int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \ - (const uint8_t *)(limit)); \ - (src)+=__index&7; \ - (result)=(trie)->data[__index>>3]; \ - } \ - } \ -} - -/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */ -#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) { \ - uint8_t __b=(uint8_t)*--(src); \ - if(U8_IS_SINGLE(__b)) { \ - (result)=(trie)->ascii[__b]; \ - } else { \ - int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \ - (const uint8_t *)(src)); \ - (src)-=__index&7; \ - (result)=(trie)->data[__index>>3]; \ - } \ -} - -U_CDECL_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/utrie2_builder.cpp b/deps/node/deps/icu-small/source/common/utrie2_builder.cpp deleted file mode 100644 index 80e09c9c..00000000 --- a/deps/node/deps/icu-small/source/common/utrie2_builder.cpp +++ /dev/null @@ -1,1481 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2014, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: utrie2_builder.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2008sep26 (split off from utrie2.c) -* created by: Markus W. Scherer -* -* This is a common implementation of a Unicode trie. -* It is a kind of compressed, serializable table of 16- or 32-bit values associated with -* Unicode code points (0..0x10ffff). -* This is the second common version of a Unicode trie (hence the name UTrie2). -* See utrie2.h for a comparison. -* -* This file contains only the builder code. -* See utrie2.c for the runtime and enumeration code. -*/ -// #define UTRIE2_DEBUG -#ifdef UTRIE2_DEBUG -# include -#endif -// #define UCPTRIE_DEBUG - -#include "unicode/utypes.h" -#ifdef UCPTRIE_DEBUG -#include "unicode/ucptrie.h" -#include "unicode/umutablecptrie.h" -#include "ucptrie_impl.h" -#endif -#include "cmemory.h" -#include "utrie2.h" -#include "utrie2_impl.h" - -#include "utrie.h" // for utrie2_fromUTrie() - -/* Implementation notes ----------------------------------------------------- */ - -/* - * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values - * have been chosen to minimize trie sizes overall. - * Most of the code is flexible enough to work with a range of values, - * within certain limits. - * - * Exception: Support for separate values for lead surrogate code _units_ - * vs. code _points_ was added after the constants were fixed, - * and has not been tested nor particularly designed for different constant values. - * (Especially the utrie2_enum() code that jumps to the special LSCP index-2 - * part and back.) - * - * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data - * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH - * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction - * remapping) stops working. - * - * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate() - * assumes that a single index-2 block is used for 0x400 code points - * corresponding to one lead surrogate. - * - * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains - * more than one Unicode plane, and the split of the index-2 table into a BMP - * part and a supplementary part, with a gap in between, would not work. - * - * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because - * there is data with more than 64k distinct values, - * for example for Unihan collation with a separate collation weight per - * Han character. - */ - -/* Building a trie ----------------------------------------------------------*/ - -enum { - /** The null index-2 block, following the gap in the index-2 table. */ - UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH, - - /** The start of allocated index-2 blocks. */ - UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH, - - /** - * The null data block. - * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller, - * to work with 6-bit trail bytes from 2-byte UTF-8. - */ - UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET, - - /** The start of allocated data blocks. */ - UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40, - - /** - * The start of data blocks for U+0800 and above. - * Below, compaction uses a block length of 64 for 2-byte UTF-8. - * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH. - * Data values for 0x780 code points beyond ASCII. - */ - UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780 -}; - -/* Start with allocation of 16k data entries. */ -#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14) - -/* Grow about 8x each time. */ -#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17) - -static int32_t -allocIndex2Block(UNewTrie2 *trie); - -U_CAPI UTrie2 * U_EXPORT2 -utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { - UTrie2 *trie; - UNewTrie2 *newTrie; - uint32_t *data; - int32_t i, j; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - - trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); - newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); - data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4); - if(trie==NULL || newTrie==NULL || data==NULL) { - uprv_free(trie); - uprv_free(newTrie); - uprv_free(data); - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - uprv_memset(trie, 0, sizeof(UTrie2)); - trie->initialValue=initialValue; - trie->errorValue=errorValue; - trie->highStart=0x110000; - trie->newTrie=newTrie; -#ifdef UTRIE2_DEBUG - trie->name="open"; -#endif - - newTrie->data=data; -#ifdef UCPTRIE_DEBUG - newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode); -#endif - newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; - newTrie->initialValue=initialValue; - newTrie->errorValue=errorValue; - newTrie->highStart=0x110000; - newTrie->firstFreeBlock=0; /* no free block in the list */ - newTrie->isCompacted=FALSE; - - /* - * preallocate and reset - * - ASCII - * - the bad-UTF-8-data block - * - the null data block - */ - for(i=0; i<0x80; ++i) { - newTrie->data[i]=initialValue; - } - for(; i<0xc0; ++i) { - newTrie->data[i]=errorValue; - } - for(i=UNEWTRIE2_DATA_NULL_OFFSET; idata[i]=initialValue; - } - newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET; - newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET; - - /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */ - for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { - newTrie->index2[i]=j; - newTrie->map[i]=1; - } - /* reference counts for the bad-UTF-8-data block */ - for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { - newTrie->map[i]=0; - } - /* - * Reference counts for the null data block: all blocks except for the ASCII blocks. - * Plus 1 so that we don't drop this block during compaction. - * Plus as many as needed for lead surrogate code points. - */ - /* i==newTrie->dataNullOffset */ - newTrie->map[i++]= - (0x110000>>UTRIE2_SHIFT_2)- - (0x80>>UTRIE2_SHIFT_2)+ - 1+ - UTRIE2_LSCP_INDEX_2_LENGTH; - j+=UTRIE2_DATA_BLOCK_LENGTH; - for(; jmap[i]=0; - } - - /* - * set the remaining indexes in the BMP index-2 block - * to the null data block - */ - for(i=0x80>>UTRIE2_SHIFT_2; iindex2[i]=UNEWTRIE2_DATA_NULL_OFFSET; - } - - /* - * Fill the index gap with impossible values so that compaction - * does not overlap other index-2 blocks with the gap. - */ - for(i=0; iindex2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1; - } - - /* set the indexes in the null index-2 block */ - for(i=0; iindex2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET; - } - newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET; - newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET; - - /* set the index-1 indexes for the linear index-2 block */ - for(i=0, j=0; - iindex1[i]=j; - } - - /* set the remaining index-1 indexes to the null index-2 block */ - for(; iindex1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET; - } - - /* - * Preallocate and reset data for U+0080..U+07ff, - * for 2-byte UTF-8 which will be compacted in 64-blocks - * even if UTRIE2_DATA_BLOCK_LENGTH is smaller. - */ - for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { - utrie2_set32(trie, i, initialValue, pErrorCode); - } - - return trie; -} - -static UNewTrie2 * -cloneBuilder(const UNewTrie2 *other) { - UNewTrie2 *trie; - - trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); - if(trie==NULL) { - return NULL; - } - - trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4); - if(trie->data==NULL) { - uprv_free(trie); - return NULL; - } -#ifdef UCPTRIE_DEBUG - if(other->t3==nullptr) { - trie->t3=nullptr; - } else { - UErrorCode errorCode=U_ZERO_ERROR; - trie->t3=umutablecptrie_clone(other->t3, &errorCode); - } -#endif - trie->dataCapacity=other->dataCapacity; - - /* clone data */ - uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1)); - uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4); - trie->index2NullOffset=other->index2NullOffset; - trie->index2Length=other->index2Length; - - uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); - trie->dataNullOffset=other->dataNullOffset; - trie->dataLength=other->dataLength; - - /* reference counters */ - if(other->isCompacted) { - trie->firstFreeBlock=0; - } else { - uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4); - trie->firstFreeBlock=other->firstFreeBlock; - } - - trie->initialValue=other->initialValue; - trie->errorValue=other->errorValue; - trie->highStart=other->highStart; - trie->isCompacted=other->isCompacted; - - return trie; -} - -U_CAPI UTrie2 * U_EXPORT2 -utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) { - UTrie2 *trie; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); - if(trie==NULL) { - return NULL; - } - uprv_memcpy(trie, other, sizeof(UTrie2)); - - if(other->memory!=NULL) { - trie->memory=uprv_malloc(other->length); - if(trie->memory!=NULL) { - trie->isMemoryOwned=TRUE; - uprv_memcpy(trie->memory, other->memory, other->length); - - /* make the clone's pointers point to its own memory */ - trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory); - if(other->data16!=NULL) { - trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory); - } - if(other->data32!=NULL) { - trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory); - } - } - } else /* other->newTrie!=NULL */ { - trie->newTrie=cloneBuilder(other->newTrie); - } - - if(trie->memory==NULL && trie->newTrie==NULL) { - uprv_free(trie); - trie=NULL; - } - return trie; -} - -typedef struct NewTrieAndStatus { - UTrie2 *trie; - UErrorCode errorCode; - UBool exclusiveLimit; /* rather than inclusive range end */ -} NewTrieAndStatus; - -static UBool U_CALLCONV -copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - NewTrieAndStatus *nt=(NewTrieAndStatus *)context; - if(value!=nt->trie->initialValue) { - if(nt->exclusiveLimit) { - --end; - } - if(start==end) { - utrie2_set32(nt->trie, start, value, &nt->errorCode); - } else { - utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode); - } - return U_SUCCESS(nt->errorCode); - } else { - return TRUE; - } -} - -#ifdef UTRIE2_DEBUG -static long countInitial(const UTrie2 *trie) { - uint32_t initialValue=trie->initialValue; - int32_t length=trie->dataLength; - long count=0; - if(trie->data16!=nullptr) { - for(int32_t i=0; idata16[i]==initialValue) { ++count; } - } - } else { - for(int32_t i=0; idata32[i]==initialValue) { ++count; } - } - } - return count; -} - -static void -utrie_printLengths(const UTrie *trie) { - long indexLength=trie->indexLength; - long dataLength=(long)trie->dataLength; - long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); - printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n", - indexLength, dataLength, totalLength); -} - -static void -utrie2_printLengths(const UTrie2 *trie, const char *which) { - long indexLength=trie->indexLength; - long dataLength=(long)trie->dataLength; - long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); - printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n", - which, trie->name, indexLength, dataLength, countInitial(trie), totalLength); -} -#endif - -U_CAPI UTrie2 * U_EXPORT2 -utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) { - NewTrieAndStatus context; - UChar lead; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - if(other->newTrie!=NULL && !other->newTrie->isCompacted) { - return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */ - } - - /* Clone the frozen trie by enumerating it and building a new one. */ - context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - context.exclusiveLimit=FALSE; - context.errorCode=*pErrorCode; - utrie2_enum(other, NULL, copyEnumRange, &context); - *pErrorCode=context.errorCode; - for(lead=0xd800; lead<0xdc00; ++lead) { - uint32_t value; - if(other->data32==NULL) { - value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead); - } else { - value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead); - } - if(value!=other->initialValue) { - utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); - } - } - if(U_FAILURE(*pErrorCode)) { - utrie2_close(context.trie); - context.trie=NULL; - } - return context.trie; -} - -/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */ -U_CAPI UTrie2 * U_EXPORT2 -utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { - NewTrieAndStatus context; - UChar lead; - - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - if(trie1==NULL) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return NULL; - } - context.exclusiveLimit=TRUE; - context.errorCode=*pErrorCode; - utrie_enum(trie1, NULL, copyEnumRange, &context); - *pErrorCode=context.errorCode; - for(lead=0xd800; lead<0xdc00; ++lead) { - uint32_t value; - if(trie1->data32==NULL) { - value=UTRIE_GET16_FROM_LEAD(trie1, lead); - } else { - value=UTRIE_GET32_FROM_LEAD(trie1, lead); - } - if(value!=trie1->initialValue) { - utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); - } - } - if(U_SUCCESS(*pErrorCode)) { - utrie2_freeze(context.trie, - trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS, - pErrorCode); - } -#ifdef UTRIE2_DEBUG - if(U_SUCCESS(*pErrorCode)) { - utrie_printLengths(trie1); - utrie2_printLengths(context.trie, "fromUTrie"); - } -#endif - if(U_FAILURE(*pErrorCode)) { - utrie2_close(context.trie); - context.trie=NULL; - } - return context.trie; -} - -static inline UBool -isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { - int32_t i2, block; - - if(U_IS_LEAD(c) && forLSCP) { - i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ - (c>>UTRIE2_SHIFT_2); - } else { - i2=trie->index1[c>>UTRIE2_SHIFT_1]+ - ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); - } - block=trie->index2[i2]; - return (UBool)(block==trie->dataNullOffset); -} - -static int32_t -allocIndex2Block(UNewTrie2 *trie) { - int32_t newBlock, newTop; - - newBlock=trie->index2Length; - newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; - if(newTop>UPRV_LENGTHOF(trie->index2)) { - /* - * Should never occur. - * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, - * or the code writes more values than should be possible. - */ - return -1; - } - trie->index2Length=newTop; - uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4); - return newBlock; -} - -static int32_t -getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { - int32_t i1, i2; - - if(U_IS_LEAD(c) && forLSCP) { - return UTRIE2_LSCP_INDEX_2_OFFSET; - } - - i1=c>>UTRIE2_SHIFT_1; - i2=trie->index1[i1]; - if(i2==trie->index2NullOffset) { - i2=allocIndex2Block(trie); - if(i2<0) { - return -1; /* program error */ - } - trie->index1[i1]=i2; - } - return i2; -} - -static int32_t -allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { - int32_t newBlock, newTop; - - if(trie->firstFreeBlock!=0) { - /* get the first free block */ - newBlock=trie->firstFreeBlock; - trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2]; - } else { - /* get a new block from the high end */ - newBlock=trie->dataLength; - newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; - if(newTop>trie->dataCapacity) { - /* out of memory in the data array */ - int32_t capacity; - uint32_t *data; - - if(trie->dataCapacitydataCapacitydata, (size_t)trie->dataLength*4); - uprv_free(trie->data); - trie->data=data; - trie->dataCapacity=capacity; - } - trie->dataLength=newTop; - } - uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4); - trie->map[newBlock>>UTRIE2_SHIFT_2]=0; - return newBlock; -} - -/* call when the block's reference counter reaches 0 */ -static void -releaseDataBlock(UNewTrie2 *trie, int32_t block) { - /* put this block at the front of the free-block chain */ - trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock; - trie->firstFreeBlock=block; -} - -static inline UBool -isWritableBlock(UNewTrie2 *trie, int32_t block) { - return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]); -} - -static inline void -setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) { - int32_t oldBlock; - ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ - oldBlock=trie->index2[i2]; - if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) { - releaseDataBlock(trie, oldBlock); - } - trie->index2[i2]=block; -} - -/** - * No error checking for illegal arguments. - * - * @return -1 if no new data block available (out of memory in data array) - * @internal - */ -static int32_t -getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { - int32_t i2, oldBlock, newBlock; - - i2=getIndex2Block(trie, c, forLSCP); - if(i2<0) { - return -1; /* program error */ - } - - i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; - oldBlock=trie->index2[i2]; - if(isWritableBlock(trie, oldBlock)) { - return oldBlock; - } - - /* allocate a new data block */ - newBlock=allocDataBlock(trie, oldBlock); - if(newBlock<0) { - /* out of memory in the data array */ - return -1; - } - setIndex2Entry(trie, i2, newBlock); - return newBlock; -} - -/** - * @return TRUE if the value was successfully set - */ -static void -set32(UNewTrie2 *trie, - UChar32 c, UBool forLSCP, uint32_t value, - UErrorCode *pErrorCode) { - int32_t block; - - if(trie==NULL || trie->isCompacted) { - *pErrorCode=U_NO_WRITE_PERMISSION; - return; - } -#ifdef UCPTRIE_DEBUG - umutablecptrie_set(trie->t3, c, value, pErrorCode); -#endif - - block=getDataBlock(trie, c, forLSCP); - if(block<0) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - - trie->data[block+(c&UTRIE2_DATA_MASK)]=value; -} - -U_CAPI void U_EXPORT2 -utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - if((uint32_t)c>0x10ffff) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - set32(trie->newTrie, c, TRUE, value, pErrorCode); -} - -U_CAPI void U_EXPORT2 -utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, - UChar32 c, uint32_t value, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return; - } - if(!U_IS_LEAD(c)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - set32(trie->newTrie, c, FALSE, value, pErrorCode); -} - -static void -writeBlock(uint32_t *block, uint32_t value) { - uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH; - while(block0x10ffff || (uint32_t)end>0x10ffff || start>end) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - newTrie=trie->newTrie; - if(newTrie==NULL || newTrie->isCompacted) { - *pErrorCode=U_NO_WRITE_PERMISSION; - return; - } -#ifdef UCPTRIE_DEBUG - umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode); -#endif - if(!overwrite && value==newTrie->initialValue) { - return; /* nothing to do */ - } - - limit=end+1; - if(start&UTRIE2_DATA_MASK) { - UChar32 nextStart; - - /* set partial block at [start..following block boundary[ */ - block=getDataBlock(newTrie, start, TRUE); - if(block<0) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - - nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK; - if(nextStart<=limit) { - fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, - value, newTrie->initialValue, overwrite); - start=nextStart; - } else { - fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, - value, newTrie->initialValue, overwrite); - return; - } - } - - /* number of positions in the last, partial block */ - rest=limit&UTRIE2_DATA_MASK; - - /* round down limit to a block boundary */ - limit&=~UTRIE2_DATA_MASK; - - /* iterate over all-value blocks */ - if(value==newTrie->initialValue) { - repeatBlock=newTrie->dataNullOffset; - } else { - repeatBlock=-1; - } - - while(startinitialValue && isInNullBlock(newTrie, start, TRUE)) { - start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */ - continue; - } - - /* get index value */ - i2=getIndex2Block(newTrie, start, TRUE); - if(i2<0) { - *pErrorCode=U_INTERNAL_PROGRAM_ERROR; - return; - } - i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; - block=newTrie->index2[i2]; - if(isWritableBlock(newTrie, block)) { - /* already allocated */ - if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { - /* - * We overwrite all values, and it's not a - * protected (ASCII-linear or 2-byte UTF-8) block: - * replace with the repeatBlock. - */ - setRepeatBlock=TRUE; - } else { - /* !overwrite, or protected block: just write the values into this block */ - fillBlock(newTrie->data+block, - 0, UTRIE2_DATA_BLOCK_LENGTH, - value, newTrie->initialValue, overwrite); - } - } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) { - /* - * Set the repeatBlock instead of the null block or previous repeat block: - * - * If !isWritableBlock() then all entries in the block have the same value - * because it's the null block or a range block (the repeatBlock from a previous - * call to utrie2_setRange32()). - * No other blocks are used multiple times before compacting. - * - * The null block is the only non-writable block with the initialValue because - * of the repeatBlock initialization above. (If value==initialValue, then - * the repeatBlock will be the null data block.) - * - * We set our repeatBlock if the desired value differs from the block's value, - * and if we overwrite any data or if the data is all initial values - * (which is the same as the block being the null block, see above). - */ - setRepeatBlock=TRUE; - } - if(setRepeatBlock) { - if(repeatBlock>=0) { - setIndex2Entry(newTrie, i2, repeatBlock); - } else { - /* create and set and fill the repeatBlock */ - repeatBlock=getDataBlock(newTrie, start, TRUE); - if(repeatBlock<0) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - writeBlock(newTrie->data+repeatBlock, value); - } - } - - start+=UTRIE2_DATA_BLOCK_LENGTH; - } - - if(rest>0) { - /* set partial block at [last block boundary..limit[ */ - block=getDataBlock(newTrie, start, TRUE); - if(block<0) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - - fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite); - } - - return; -} - -/* compaction --------------------------------------------------------------- */ - -static inline UBool -equal_int32(const int32_t *s, const int32_t *t, int32_t length) { - while(length>0 && *s==*t) { - ++s; - ++t; - --length; - } - return (UBool)(length==0); -} - -static inline UBool -equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { - while(length>0 && *s==*t) { - ++s; - ++t; - --length; - } - return (UBool)(length==0); -} - -static int32_t -findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) { - int32_t block; - - /* ensure that we do not even partially get past index2Length */ - index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH; - - for(block=0; block<=index2Length; ++block) { - if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { - return block; - } - } - return -1; -} - -static int32_t -findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) { - int32_t block; - - /* ensure that we do not even partially get past dataLength */ - dataLength-=blockLength; - - for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { - if(equal_uint32(data+block, data+otherBlock, blockLength)) { - return block; - } - } - return -1; -} - -/* - * Find the start of the last range in the trie by enumerating backward. - * Indexes for supplementary code points higher than this will be omitted. - */ -static UChar32 -findHighStart(UNewTrie2 *trie, uint32_t highValue) { - const uint32_t *data32; - - uint32_t value, initialValue; - UChar32 c, prev; - int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; - - data32=trie->data; - initialValue=trie->initialValue; - - index2NullOffset=trie->index2NullOffset; - nullBlock=trie->dataNullOffset; - - /* set variables for previous range */ - if(highValue==initialValue) { - prevI2Block=index2NullOffset; - prevBlock=nullBlock; - } else { - prevI2Block=-1; - prevBlock=-1; - } - prev=0x110000; - - /* enumerate index-2 blocks */ - i1=UNEWTRIE2_INDEX_1_LENGTH; - c=prev; - while(c>0) { - i2Block=trie->index1[--i1]; - if(i2Block==prevI2Block) { - /* the index-2 block is the same as the previous one, and filled with highValue */ - c-=UTRIE2_CP_PER_INDEX_1_ENTRY; - continue; - } - prevI2Block=i2Block; - if(i2Block==index2NullOffset) { - /* this is the null index-2 block */ - if(highValue!=initialValue) { - return c; - } - c-=UTRIE2_CP_PER_INDEX_1_ENTRY; - } else { - /* enumerate data blocks for one index-2 block */ - for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { - block=trie->index2[i2Block+ --i2]; - if(block==prevBlock) { - /* the block is the same as the previous one, and filled with highValue */ - c-=UTRIE2_DATA_BLOCK_LENGTH; - continue; - } - prevBlock=block; - if(block==nullBlock) { - /* this is the null data block */ - if(highValue!=initialValue) { - return c; - } - c-=UTRIE2_DATA_BLOCK_LENGTH; - } else { - for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { - value=data32[block+ --j]; - if(value!=highValue) { - return c; - } - --c; - } - } - } - } - } - - /* deliver last range */ - return 0; -} - -/* - * Compact a build-time trie. - * - * The compaction - * - removes blocks that are identical with earlier ones - * - overlaps adjacent blocks as much as possible (if overlap==TRUE) - * - moves blocks in steps of the data granularity - * - moves and overlaps blocks that overlap with multiple values in the overlap region - * - * It does not - * - try to move and overlap blocks that are not already adjacent - */ -static void -compactData(UNewTrie2 *trie) { -#ifdef UTRIE2_DEBUG - int32_t countSame=0, sumOverlaps=0; -#endif - - int32_t start, newStart, movedStart; - int32_t blockLength, overlap; - int32_t i, mapIndex, blockCount; - - /* do not compact linear-ASCII data */ - newStart=UTRIE2_DATA_START_OFFSET; - for(start=0, i=0; startmap[i]=start; - } - - /* - * Start with a block length of 64 for 2-byte UTF-8, - * then switch to UTRIE2_DATA_BLOCK_LENGTH. - */ - blockLength=64; - blockCount=blockLength>>UTRIE2_SHIFT_2; - for(start=newStart; startdataLength;) { - /* - * start: index of first entry of current block - * newStart: index where the current block is to be moved - * (right after current end of already-compacted data) - */ - if(start==UNEWTRIE2_DATA_0800_OFFSET) { - blockLength=UTRIE2_DATA_BLOCK_LENGTH; - blockCount=1; - } - - /* skip blocks that are not used */ - if(trie->map[start>>UTRIE2_SHIFT_2]<=0) { - /* advance start to the next block */ - start+=blockLength; - - /* leave newStart with the previous block! */ - continue; - } - - /* search for an identical block */ - if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) - >=0 - ) { -#ifdef UTRIE2_DEBUG - ++countSame; -#endif - /* found an identical block, set the other block's index value for the current block */ - for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { - trie->map[mapIndex++]=movedStart; - movedStart+=UTRIE2_DATA_BLOCK_LENGTH; - } - - /* advance start to the next block */ - start+=blockLength; - - /* leave newStart with the previous block! */ - continue; - } - - /* see if the beginning of this block can be overlapped with the end of the previous block */ - /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ - for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; - overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); - overlap-=UTRIE2_DATA_GRANULARITY) {} - -#ifdef UTRIE2_DEBUG - sumOverlaps+=overlap; -#endif - if(overlap>0 || newStart>UTRIE2_SHIFT_2; i>0; --i) { - trie->map[mapIndex++]=movedStart; - movedStart+=UTRIE2_DATA_BLOCK_LENGTH; - } - - /* move the non-overlapping indexes to their new positions */ - start+=overlap; - for(i=blockLength-overlap; i>0; --i) { - trie->data[newStart++]=trie->data[start++]; - } - } else /* no overlap && newStart==start */ { - for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { - trie->map[mapIndex++]=start; - start+=UTRIE2_DATA_BLOCK_LENGTH; - } - newStart=start; - } - } - - /* now adjust the index-2 table */ - for(i=0; iindex2Length; ++i) { - if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { - /* Gap indexes are invalid (-1). Skip over the gap. */ - i+=UNEWTRIE2_INDEX_GAP_LENGTH; - } - trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2]; - } - trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2]; - - /* ensure dataLength alignment */ - while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { - trie->data[newStart++]=trie->initialValue; - } - -#ifdef UTRIE2_DEBUG - /* we saved some space */ - printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", - (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps); -#endif - - trie->dataLength=newStart; -} - -static void -compactIndex2(UNewTrie2 *trie) { - int32_t i, start, newStart, movedStart, overlap; - - /* do not compact linear-BMP index-2 blocks */ - newStart=UTRIE2_INDEX_2_BMP_LENGTH; - for(start=0, i=0; startmap[i]=start; - } - - /* Reduce the index table gap to what will be needed at runtime. */ - newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1); - - for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; startindex2Length;) { - /* - * start: index of first entry of current block - * newStart: index where the current block is to be moved - * (right after current end of already-compacted data) - */ - - /* search for an identical block */ - if( (movedStart=findSameIndex2Block(trie->index2, newStart, start)) - >=0 - ) { - /* found an identical block, set the other block's index value for the current block */ - trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart; - - /* advance start to the next block */ - start+=UTRIE2_INDEX_2_BLOCK_LENGTH; - - /* leave newStart with the previous block! */ - continue; - } - - /* see if the beginning of this block can be overlapped with the end of the previous block */ - /* look for maximum overlap with the previous, adjacent block */ - for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; - overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap); - --overlap) {} - - if(overlap>0 || newStartmap[start>>UTRIE2_SHIFT_1_2]=newStart-overlap; - - /* move the non-overlapping indexes to their new positions */ - start+=overlap; - for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { - trie->index2[newStart++]=trie->index2[start++]; - } - } else /* no overlap && newStart==start */ { - trie->map[start>>UTRIE2_SHIFT_1_2]=start; - start+=UTRIE2_INDEX_2_BLOCK_LENGTH; - newStart=start; - } - } - - /* now adjust the index-1 table */ - for(i=0; iindex1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2]; - } - trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2]; - - /* - * Ensure data table alignment: - * Needs to be granularity-aligned for 16-bit trie - * (so that dataMove will be down-shiftable), - * and 2-aligned for uint32_t data. - */ - while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { - /* Arbitrary value: 0x3fffc not possible for real data. */ - trie->index2[newStart++]=(int32_t)0xffff<%lu\n", - (long)trie->index2Length, (long)newStart); -#endif - - trie->index2Length=newStart; -} - -static void -compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { - UNewTrie2 *newTrie; - UChar32 highStart, suppHighStart; - uint32_t highValue; - - newTrie=trie->newTrie; - - /* find highStart and round it up */ - highValue=utrie2_get32(trie, 0x10ffff); - highStart=findHighStart(newTrie, highValue); - highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); - if(highStart==0x110000) { - highValue=trie->errorValue; - } - - /* - * Set trie->highStart only after utrie2_get32(trie, highStart). - * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. - */ - trie->highStart=newTrie->highStart=highStart; - -#ifdef UTRIE2_DEBUG - printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", - (long)highStart, (long)highValue, (long)trie->initialValue); -#endif - - if(highStart<0x110000) { - /* Blank out [highStart..10ffff] to release associated data blocks. */ - suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; - utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - - compactData(newTrie); - if(highStart>0x10000) { - compactIndex2(newTrie); -#ifdef UTRIE2_DEBUG - } else { - printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n", - (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); -#endif - } - - /* - * Store the highValue in the data array and round up the dataLength. - * Must be done after compactData() because that assumes that dataLength - * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. - */ - newTrie->data[newTrie->dataLength++]=highValue; - while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { - newTrie->data[newTrie->dataLength++]=trie->initialValue; - } - - newTrie->isCompacted=TRUE; -} - -/* serialization ------------------------------------------------------------ */ - -/** - * Maximum length of the runtime index array. - * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. - * (The actual maximum length is lower, - * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) - */ -#define UTRIE2_MAX_INDEX_LENGTH 0xffff - -/** - * Maximum length of the runtime data array. - * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, - * and by uint16_t UTrie2Header.shiftedDataLength. - */ -#define UTRIE2_MAX_DATA_LENGTH (0xffff<0 if the data is moved to the end of the index array */ - UChar32 highStart; - - /* argument check */ - if(U_FAILURE(*pErrorCode)) { - return; - } - if( trie==NULL || - valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - newTrie=trie->newTrie; - if(newTrie==NULL) { - /* already frozen */ - UTrie2ValueBits frozenValueBits= - trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS; - if(valueBits!=frozenValueBits) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - return; - } - - /* compact if necessary */ - if(!newTrie->isCompacted) { - compactTrie(trie, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - highStart=trie->highStart; - - if(highStart<=0x10000) { - allIndexesLength=UTRIE2_INDEX_1_OFFSET; - } else { - allIndexesLength=newTrie->index2Length; - } - if(valueBits==UTRIE2_16_VALUE_BITS) { - dataMove=allIndexesLength; - } else { - dataMove=0; - } - - /* are indexLength and dataLength within limits? */ - if( /* for unshifted indexLength */ - allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || - /* for unshifted dataNullOffset */ - (dataMove+newTrie->dataNullOffset)>0xffff || - /* for unshifted 2-byte UTF-8 index-2 values */ - (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || - /* for shiftedDataLength */ - (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH - ) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - - /* calculate the total serialized length */ - length=sizeof(UTrie2Header)+allIndexesLength*2; - if(valueBits==UTRIE2_16_VALUE_BITS) { - length+=newTrie->dataLength*2; - } else { - length+=newTrie->dataLength*4; - } - - trie->memory=uprv_malloc(length); - if(trie->memory==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - trie->length=length; - trie->isMemoryOwned=TRUE; - - trie->indexLength=allIndexesLength; - trie->dataLength=newTrie->dataLength; - if(highStart<=0x10000) { - trie->index2NullOffset=0xffff; - } else { - trie->index2NullOffset=static_cast(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset); - } - trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset); - trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY; - - /* set the header fields */ - header=(UTrie2Header *)trie->memory; - - header->signature=UTRIE2_SIG; /* "Tri2" */ - header->options=(uint16_t)valueBits; - - header->indexLength=(uint16_t)trie->indexLength; - header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT); - header->index2NullOffset=trie->index2NullOffset; - header->dataNullOffset=trie->dataNullOffset; - header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1); - - /* fill the index and data arrays */ - dest16=(uint16_t *)(header+1); - trie->index=dest16; - - /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ - p=(uint32_t *)newTrie->index2; - for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) { - *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); - } - - /* write UTF-8 2-byte index-2 values, not right-shifted */ - for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ - *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); - } - for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ - *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]); - } - - if(highStart>0x10000) { - int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1; - int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length; - - /* write 16-bit index-1 values for supplementary code points */ - p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; - for(i=index1Length; i>0; --i) { - *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++); - } - - /* - * write the index-2 array values for supplementary code points, - * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove - */ - p=(uint32_t *)newTrie->index2+index2Offset; - for(i=newTrie->index2Length-index2Offset; i>0; --i) { - *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); - } - } - - /* write the 16/32-bit data array */ - switch(valueBits) { - case UTRIE2_16_VALUE_BITS: - /* write 16-bit data values */ - trie->data16=dest16; - trie->data32=NULL; - p=newTrie->data; - for(i=newTrie->dataLength; i>0; --i) { - *dest16++=(uint16_t)*p++; - } - break; - case UTRIE2_32_VALUE_BITS: - /* write 32-bit data values */ - trie->data16=NULL; - trie->data32=(uint32_t *)dest16; - uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4); - break; - default: - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - -#ifdef UTRIE2_DEBUG - utrie2_printLengths(trie, ""); -#endif - -#ifdef UCPTRIE_DEBUG - umutablecptrie_setName(newTrie->t3, trie->name); - ucptrie_close( - umutablecptrie_buildImmutable( - newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode)); -#endif - /* Delete the UNewTrie2. */ - uprv_free(newTrie->data); - uprv_free(newTrie); - trie->newTrie=NULL; -} diff --git a/deps/node/deps/icu-small/source/common/utrie2_impl.h b/deps/node/deps/icu-small/source/common/utrie2_impl.h deleted file mode 100644 index 2a14db3a..00000000 --- a/deps/node/deps/icu-small/source/common/utrie2_impl.h +++ /dev/null @@ -1,175 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2001-2008, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: utrie2_impl.h -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2008sep26 (split off from utrie2.c) -* created by: Markus W. Scherer -* -* Definitions needed for both runtime and builder code for UTrie2, -* used by utrie2.c and utrie2_builder.c. -*/ - -#ifndef __UTRIE2_IMPL_H__ -#define __UTRIE2_IMPL_H__ - -#ifdef UCPTRIE_DEBUG -#include "unicode/umutablecptrie.h" -#endif -#include "utrie2.h" - -/* Public UTrie2 API implementation ----------------------------------------- */ - -/* - * These definitions are mostly needed by utrie2.cpp, - * but also by utrie2_serialize() and utrie2_swap(). - */ - -// UTrie2 signature values, in platform endianness and opposite endianness. -// The UTrie2 signature ASCII byte values spell "Tri2". -#define UTRIE2_SIG 0x54726932 -#define UTRIE2_OE_SIG 0x32697254 - -/** - * Trie data structure in serialized form: - * - * UTrie2Header header; - * uint16_t index[header.index2Length]; - * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] - * @internal - */ -typedef struct UTrie2Header { - /** "Tri2" in big-endian US-ASCII (0x54726932) */ - uint32_t signature; - - /** - * options bit field: - * 15.. 4 reserved (0) - * 3.. 0 UTrie2ValueBits valueBits - */ - uint16_t options; - - /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ - uint16_t indexLength; - - /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ - uint16_t shiftedDataLength; - - /** Null index and data blocks, not shifted. */ - uint16_t index2NullOffset, dataNullOffset; - - /** - * First code point of the single-value range ending with U+10ffff, - * rounded up and then shifted right by UTRIE2_SHIFT_1. - */ - uint16_t shiftedHighStart; -} UTrie2Header; - -/** - * Constants for use with UTrie2Header.options. - * @internal - */ -enum { - /** Mask to get the UTrie2ValueBits valueBits from options. */ - UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf -}; - -/* Building a trie ---------------------------------------------------------- */ - -/* - * These definitions are mostly needed by utrie2_builder.c, but also by - * utrie2_get32() and utrie2_enum(). - */ - -enum { - /** - * At build time, leave a gap in the index-2 table, - * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table - * and the supplementary index-1 table. - * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. - */ - UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, - UNEWTRIE2_INDEX_GAP_LENGTH= - ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)& - ~UTRIE2_INDEX_2_MASK, - - /** - * Maximum length of the build-time index-2 array. - * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, - * plus the part of the index-2 table for lead surrogate code points, - * plus the build-time index gap, - * plus the null index-2 block. - */ - UNEWTRIE2_MAX_INDEX_2_LENGTH= - (0x110000>>UTRIE2_SHIFT_2)+ - UTRIE2_LSCP_INDEX_2_LENGTH+ - UNEWTRIE2_INDEX_GAP_LENGTH+ - UTRIE2_INDEX_2_BLOCK_LENGTH, - - UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1 -}; - -/** - * Maximum length of the build-time data array. - * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, - * plus values for the 0x400 surrogate code units. - */ -#define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400) - -/* - * Build-time trie structure. - * - * Just using a boolean flag for "repeat use" could lead to data array overflow - * because we would not be able to detect when a data block becomes unused. - * It also leads to orphan data blocks that are kept through serialization. - * - * Need to use reference counting for data blocks, - * and allocDataBlock() needs to look for a free block before increasing dataLength. - * - * This scheme seems like overkill for index-2 blocks since the whole index array is - * preallocated anyway (unlike the growable data array). - * Just allocating multiple index-2 blocks as needed. - */ -struct UNewTrie2 { - int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; - int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; - uint32_t *data; -#ifdef UCPTRIE_DEBUG - UMutableCPTrie *t3; -#endif - - uint32_t initialValue, errorValue; - int32_t index2Length, dataCapacity, dataLength; - int32_t firstFreeBlock; - int32_t index2NullOffset, dataNullOffset; - UChar32 highStart; - UBool isCompacted; - - /** - * Multi-purpose per-data-block table. - * - * Before compacting: - * - * Per-data-block reference counters/free-block list. - * 0: unused - * >0: reference counter (number of index-2 entries pointing here) - * <0: next free data block in free-block list - * - * While compacting: - * - * Map of adjusted indexes, used in compactData() and compactIndex2(). - * Maps from original indexes to new ones. - */ - int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; -}; - -#endif diff --git a/deps/node/deps/icu-small/source/common/utrie_swap.cpp b/deps/node/deps/icu-small/source/common/utrie_swap.cpp deleted file mode 100644 index 5abe7bd5..00000000 --- a/deps/node/deps/icu-small/source/common/utrie_swap.cpp +++ /dev/null @@ -1,344 +0,0 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// utrie_swap.cpp -// created: 2018aug08 Markus W. Scherer - -#include "unicode/utypes.h" -#include "cmemory.h" -#include "ucptrie_impl.h" -#include "udataswp.h" -#include "utrie.h" -#include "utrie2_impl.h" - -// These functions for swapping different generations of ICU code point tries are here -// so that their implementation files need not depend on swapper code, -// need not depend on each other, and so that other swapper code -// need not depend on other trie code. - -namespace { - -constexpr int32_t ASCII_LIMIT = 0x80; - -} // namespace - -U_CAPI int32_t U_EXPORT2 -utrie_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UTrieHeader *inTrie; - UTrieHeader trie; - int32_t size; - UBool dataIs32; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - if(length>=0 && (uint32_t)lengthreadUInt32(inTrie->signature); - trie.options=ds->readUInt32(inTrie->options); - trie.indexLength=udata_readInt32(ds, inTrie->indexLength); - trie.dataLength=udata_readInt32(ds, inTrie->dataLength); - - if( trie.signature!=0x54726965 || - (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || - ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || - trie.indexLength=0) { - UTrieHeader *outTrie; - - if(lengthswapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); - - /* swap the index and the data */ - if(dataIs32) { - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, - (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); - } else { - ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); - } - } - - return size; -} - -U_CAPI int32_t U_EXPORT2 -utrie2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UTrie2Header *inTrie; - UTrie2Header trie; - int32_t dataLength, size; - UTrie2ValueBits valueBits; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - inTrie=(const UTrie2Header *)inData; - trie.signature=ds->readUInt32(inTrie->signature); - trie.options=ds->readUInt16(inTrie->options); - trie.indexLength=ds->readUInt16(inTrie->indexLength); - trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); - - valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); - dataLength=(int32_t)trie.shiftedDataLength<=0) { - UTrie2Header *outTrie; - - if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); - ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); - - /* swap the index and the data */ - switch(valueBits) { - case UTRIE2_16_VALUE_BITS: - ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); - break; - case UTRIE2_32_VALUE_BITS: - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, - (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); - break; - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - } - - return size; -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UCPTrieHeader *inTrie; - UCPTrieHeader trie; - int32_t dataLength, size; - UCPTrieValueWidth valueWidth; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - inTrie=(const UCPTrieHeader *)inData; - trie.signature=ds->readUInt32(inTrie->signature); - trie.options=ds->readUInt16(inTrie->options); - trie.indexLength=ds->readUInt16(inTrie->indexLength); - trie.dataLength = ds->readUInt16(inTrie->dataLength); - - UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); - valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); - dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; - - int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? - UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; - if( trie.signature!=UCPTRIE_SIG || - type > UCPTRIE_TYPE_SMALL || - (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || - valueWidth > UCPTRIE_VALUE_BITS_8 || - trie.indexLength < minIndexLength || - dataLength < ASCII_LIMIT - ) { - *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ - return 0; - } - - size=sizeof(UCPTrieHeader)+trie.indexLength*2; - switch(valueWidth) { - case UCPTRIE_VALUE_BITS_16: - size+=dataLength*2; - break; - case UCPTRIE_VALUE_BITS_32: - size+=dataLength*4; - break; - case UCPTRIE_VALUE_BITS_8: - size+=dataLength; - break; - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - - if(length>=0) { - UCPTrieHeader *outTrie; - - if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); - ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); - - /* swap the index and the data */ - switch(valueWidth) { - case UCPTRIE_VALUE_BITS_16: - ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); - break; - case UCPTRIE_VALUE_BITS_32: - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, - (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); - break; - case UCPTRIE_VALUE_BITS_8: - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - if(inTrie!=outTrie) { - uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength); - } - break; - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - } - - return size; -} - -namespace { - -/** - * Gets the trie version from 32-bit-aligned memory containing the serialized form - * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). - * - * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie - * @param length the number of bytes available at data; - * can be more than necessary (see return value) - * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. - * If TRUE, opposite-endian serialized forms are recognized as well. - * @return the trie version of the serialized form, or 0 if it is not - * recognized as a serialized trie - */ -int32_t -getVersion(const void *data, int32_t length, UBool anyEndianOk) { - uint32_t signature; - if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { - return 0; - } - signature=*(const uint32_t *)data; - if(signature==UCPTRIE_SIG) { - return 3; - } - if(anyEndianOk && signature==UCPTRIE_OE_SIG) { - return 3; - } - if(signature==UTRIE2_SIG) { - return 2; - } - if(anyEndianOk && signature==UTRIE2_OE_SIG) { - return 2; - } - if(signature==UTRIE_SIG) { - return 1; - } - if(anyEndianOk && signature==UTRIE_OE_SIG) { - return 1; - } - return 0; -} - -} // namespace - -U_CAPI int32_t U_EXPORT2 -utrie_swapAnyVersion(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { return 0; } - switch(getVersion(inData, length, TRUE)) { - case 1: - return utrie_swap(ds, inData, length, outData, pErrorCode); - case 2: - return utrie2_swap(ds, inData, length, outData, pErrorCode); - case 3: - return ucptrie_swap(ds, inData, length, outData, pErrorCode); - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } -} diff --git a/deps/node/deps/icu-small/source/common/uts46.cpp b/deps/node/deps/icu-small/source/common/uts46.cpp deleted file mode 100644 index b9e6cb02..00000000 --- a/deps/node/deps/icu-small/source/common/uts46.cpp +++ /dev/null @@ -1,1484 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: uts46.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010mar09 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_IDNA - -#include "unicode/idna.h" -#include "unicode/normalizer2.h" -#include "unicode/uscript.h" -#include "unicode/ustring.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "cstring.h" -#include "punycode.h" -#include "ubidi_props.h" -#include "ustr_imp.h" - -// Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: -// -// The domain name length limit is 255 octets in an internal DNS representation -// where the last ("root") label is the empty label -// represented by length byte 0 alone. -// In a conventional string, this translates to 253 characters, or 254 -// if there is a trailing dot for the root label. - -U_NAMESPACE_BEGIN - -// Severe errors which usually result in a U+FFFD replacement character in the result string. -const uint32_t severeErrors= - UIDNA_ERROR_LEADING_COMBINING_MARK| - UIDNA_ERROR_DISALLOWED| - UIDNA_ERROR_PUNYCODE| - UIDNA_ERROR_LABEL_HAS_DOT| - UIDNA_ERROR_INVALID_ACE_LABEL; - -static inline UBool -isASCIIString(const UnicodeString &dest) { - const UChar *s=dest.getBuffer(); - const UChar *limit=s+dest.length(); - while(s0x7f) { - return FALSE; - } - } - return TRUE; -} - -static UBool -isASCIIOkBiDi(const UChar *s, int32_t length); - -static UBool -isASCIIOkBiDi(const char *s, int32_t length); - -// IDNA class default implementations -------------------------------------- *** - -IDNA::~IDNA() {} - -void -IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - if(U_SUCCESS(errorCode)) { - UnicodeString destString; - labelToASCII(UnicodeString::fromUTF8(label), destString, - info, errorCode).toUTF8(dest); - } -} - -void -IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - if(U_SUCCESS(errorCode)) { - UnicodeString destString; - labelToUnicode(UnicodeString::fromUTF8(label), destString, - info, errorCode).toUTF8(dest); - } -} - -void -IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - if(U_SUCCESS(errorCode)) { - UnicodeString destString; - nameToASCII(UnicodeString::fromUTF8(name), destString, - info, errorCode).toUTF8(dest); - } -} - -void -IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - if(U_SUCCESS(errorCode)) { - UnicodeString destString; - nameToUnicode(UnicodeString::fromUTF8(name), destString, - info, errorCode).toUTF8(dest); - } -} - -// UTS46 class declaration ------------------------------------------------- *** - -class UTS46 : public IDNA { -public: - UTS46(uint32_t options, UErrorCode &errorCode); - virtual ~UTS46(); - - virtual UnicodeString & - labelToASCII(const UnicodeString &label, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - virtual UnicodeString & - labelToUnicode(const UnicodeString &label, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - virtual UnicodeString & - nameToASCII(const UnicodeString &name, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - virtual UnicodeString & - nameToUnicode(const UnicodeString &name, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - virtual void - labelToASCII_UTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - virtual void - labelToUnicodeUTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - virtual void - nameToASCII_UTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - virtual void - nameToUnicodeUTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - -private: - UnicodeString & - process(const UnicodeString &src, - UBool isLabel, UBool toASCII, - UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - void - processUTF8(StringPiece src, - UBool isLabel, UBool toASCII, - ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - UnicodeString & - processUnicode(const UnicodeString &src, - int32_t labelStart, int32_t mappingStart, - UBool isLabel, UBool toASCII, - UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const; - - // returns the new dest.length() - int32_t - mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, - UErrorCode &errorCode) const; - - // returns the new label length - int32_t - processLabel(UnicodeString &dest, - int32_t labelStart, int32_t labelLength, - UBool toASCII, - IDNAInfo &info, UErrorCode &errorCode) const; - int32_t - markBadACELabel(UnicodeString &dest, - int32_t labelStart, int32_t labelLength, - UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; - - void - checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; - - UBool - isLabelOkContextJ(const UChar *label, int32_t labelLength) const; - - void - checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; - - const Normalizer2 &uts46Norm2; // uts46.nrm - uint32_t options; -}; - -IDNA * -IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) { - if(U_SUCCESS(errorCode)) { - IDNA *idna=new UTS46(options, errorCode); - if(idna==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } else if(U_FAILURE(errorCode)) { - delete idna; - idna=NULL; - } - return idna; - } else { - return NULL; - } -} - -// UTS46 implementation ---------------------------------------------------- *** - -UTS46::UTS46(uint32_t opt, UErrorCode &errorCode) - : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)), - options(opt) {} - -UTS46::~UTS46() {} - -UnicodeString & -UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - return process(label, TRUE, TRUE, dest, info, errorCode); -} - -UnicodeString & -UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - return process(label, TRUE, FALSE, dest, info, errorCode); -} - -UnicodeString & -UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - process(name, FALSE, TRUE, dest, info, errorCode); - if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 && - isASCIIString(dest) && - (dest.length()>254 || dest[253]!=0x2e) - ) { - info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; - } - return dest; -} - -UnicodeString & -UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - return process(name, FALSE, FALSE, dest, info, errorCode); -} - -void -UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - processUTF8(label, TRUE, TRUE, dest, info, errorCode); -} - -void -UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - processUTF8(label, TRUE, FALSE, dest, info, errorCode); -} - -void -UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - processUTF8(name, FALSE, TRUE, dest, info, errorCode); -} - -void -UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - processUTF8(name, FALSE, FALSE, dest, info, errorCode); -} - -// UTS #46 data for ASCII characters. -// The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase -// and passes through all other ASCII characters. -// If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed -// using this data. -// The ASCII fastpath also uses this data. -// Values: -1=disallowed 0==valid 1==mapped (lowercase) -static const int8_t asciiData[128]={ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - // 002D..002E; valid # HYPHEN-MINUS..FULL STOP - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, - // 0030..0039; valid # DIGIT ZERO..DIGIT NINE - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, - // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z - -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, - // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z - -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1 -}; - -UnicodeString & -UTS46::process(const UnicodeString &src, - UBool isLabel, UBool toASCII, - UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - // uts46Norm2.normalize() would do all of this error checking and setup, - // but with the ASCII fastpath we do not always call it, and do not - // call it first. - if(U_FAILURE(errorCode)) { - dest.setToBogus(); - return dest; - } - const UChar *srcArray=src.getBuffer(); - if(&dest==&src || srcArray==NULL) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - dest.setToBogus(); - return dest; - } - // Arguments are fine, reset output values. - dest.remove(); - info.reset(); - int32_t srcLength=src.length(); - if(srcLength==0) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - return dest; - } - UChar *destArray=dest.getBuffer(srcLength); - if(destArray==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return dest; - } - // ASCII fastpath - UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; - int32_t labelStart=0; - int32_t i; - for(i=0;; ++i) { - if(i==srcLength) { - if(toASCII) { - if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - // There is a trailing dot if labelStart==i. - if(!isLabel && i>=254 && (i>254 || labelStart0x7f) { - break; - } - int cData=asciiData[c]; - if(cData>0) { - destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. - } else if(cData<0 && disallowNonLDHDot) { - break; // Replacing with U+FFFD can be complicated for toASCII. - } else { - destArray[i]=c; - if(c==0x2d) { // hyphen - if(i==(labelStart+3) && srcArray[i-1]==0x2d) { - // "??--..." is Punycode or forbidden. - ++i; // '-' was copied to dest already - break; - } - if(i==labelStart) { - // label starts with "-" - info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; - } - if((i+1)==srcLength || srcArray[i+1]==0x2e) { - // label ends with "-" - info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; - } - } else if(c==0x2e) { // dot - if(isLabel) { - // Replacing with U+FFFD can be complicated for toASCII. - ++i; // '.' was copied to dest already - break; - } - if(i==labelStart) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } - if(toASCII && (i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - info.errors|=info.labelErrors; - info.labelErrors=0; - labelStart=i+1; - } - } - } - info.errors|=info.labelErrors; - dest.releaseBuffer(i); - processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); - if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && - (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart))) - ) { - info.errors|=UIDNA_ERROR_BIDI; - } - return dest; -} - -void -UTS46::processUTF8(StringPiece src, - UBool isLabel, UBool toASCII, - ByteSink &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return; - } - const char *srcArray=src.data(); - int32_t srcLength=src.length(); - if(srcArray==NULL && srcLength!=0) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - // Arguments are fine, reset output values. - info.reset(); - if(srcLength==0) { - info.errors|=UIDNA_ERROR_EMPTY_LABEL; - dest.Flush(); - return; - } - UnicodeString destString; - int32_t labelStart=0; - if(srcLength<=256) { // length of stackArray[] - // ASCII fastpath - char stackArray[256]; - int32_t destCapacity; - char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, - stackArray, UPRV_LENGTHOF(stackArray), &destCapacity); - UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; - int32_t i; - for(i=0;; ++i) { - if(i==srcLength) { - if(toASCII) { - if((i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - // There is a trailing dot if labelStart==i. - if(!isLabel && i>=254 && (i>254 || labelStart0x7f - break; - } - int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char. - if(cData>0) { - destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. - } else if(cData<0 && disallowNonLDHDot) { - break; // Replacing with U+FFFD can be complicated for toASCII. - } else { - destArray[i]=c; - if(c==0x2d) { // hyphen - if(i==(labelStart+3) && srcArray[i-1]==0x2d) { - // "??--..." is Punycode or forbidden. - break; - } - if(i==labelStart) { - // label starts with "-" - info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; - } - if((i+1)==srcLength || srcArray[i+1]==0x2e) { - // label ends with "-" - info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; - } - } else if(c==0x2e) { // dot - if(isLabel) { - break; // Replacing with U+FFFD can be complicated for toASCII. - } - if(i==labelStart) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - } - if(toASCII && (i-labelStart)>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - info.errors|=info.labelErrors; - info.labelErrors=0; - labelStart=i+1; - } - } - } - info.errors|=info.labelErrors; - // Convert the processed ASCII prefix of the current label to UTF-16. - int32_t mappingStart=i-labelStart; - destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart)); - // Output the previous ASCII labels and process the rest of src in UTF-16. - dest.Append(destArray, labelStart); - processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart, - isLabel, toASCII, - destString, info, errorCode); - } else { - // src is too long for the ASCII fastpath implementation. - processUnicode(UnicodeString::fromUTF8(src), 0, 0, - isLabel, toASCII, - destString, info, errorCode); - } - destString.toUTF8(dest); // calls dest.Flush() - if(toASCII && !isLabel) { - // length==labelStart==254 means that there is a trailing dot (ok) and - // destString is empty (do not index at 253-labelStart). - int32_t length=labelStart+destString.length(); - if( length>=254 && isASCIIString(destString) && - (length>254 || - (labelStart<254 && destString[253-labelStart]!=0x2e)) - ) { - info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; - } - } - if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && - (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart))) - ) { - info.errors|=UIDNA_ERROR_BIDI; - } -} - -UnicodeString & -UTS46::processUnicode(const UnicodeString &src, - int32_t labelStart, int32_t mappingStart, - UBool isLabel, UBool toASCII, - UnicodeString &dest, - IDNAInfo &info, UErrorCode &errorCode) const { - if(mappingStart==0) { - uts46Norm2.normalize(src, dest, errorCode); - } else { - uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode); - } - if(U_FAILURE(errorCode)) { - return dest; - } - UBool doMapDevChars= - toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 : - (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0; - const UChar *destArray=dest.getBuffer(); - int32_t destLength=dest.length(); - int32_t labelLimit=labelStart; - while(labelLimit=0x200c)) { - info.isTransDiff=TRUE; - if(doMapDevChars) { - destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); - if(U_FAILURE(errorCode)) { - return dest; - } - destArray=dest.getBuffer(); - // All deviation characters have been mapped, no need to check for them again. - doMapDevChars=FALSE; - // Do not increment labelLimit in case c was removed. - continue; - } - } else if(U16_IS_SURROGATE(c)) { - if(U16_IS_SURROGATE_LEAD(c) ? - (labelLimit+1)==destLength || !U16_IS_TRAIL(destArray[labelLimit+1]) : - labelLimit==labelStart || !U16_IS_LEAD(destArray[labelLimit-1])) { - // Map an unpaired surrogate to U+FFFD before normalization so that when - // that removes characters we do not turn two unpaired ones into a pair. - info.labelErrors|=UIDNA_ERROR_DISALLOWED; - dest.setCharAt(labelLimit, 0xfffd); - destArray=dest.getBuffer(); - } - } - ++labelLimit; - } - // Permit an empty label at the end (0=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) { - // Label starts with "xn--", try to un-Punycode it. - wasPunycode=TRUE; - UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit - if(unicodeBuffer==NULL) { - // Should never occur if we used capacity==-1 which uses the internal buffer. - errorCode=U_MEMORY_ALLOCATION_ERROR; - return labelLength; - } - UErrorCode punycodeErrorCode=U_ZERO_ERROR; - int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4, - unicodeBuffer, fromPunycode.getCapacity(), - NULL, &punycodeErrorCode); - if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) { - fromPunycode.releaseBuffer(0); - unicodeBuffer=fromPunycode.getBuffer(unicodeLength); - if(unicodeBuffer==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return labelLength; - } - punycodeErrorCode=U_ZERO_ERROR; - unicodeLength=u_strFromPunycode(label+4, labelLength-4, - unicodeBuffer, fromPunycode.getCapacity(), - NULL, &punycodeErrorCode); - } - fromPunycode.releaseBuffer(unicodeLength); - if(U_FAILURE(punycodeErrorCode)) { - info.labelErrors|=UIDNA_ERROR_PUNYCODE; - return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); - } - // Check for NFC, and for characters that are not - // valid or deviation characters according to the normalizer. - // If there is something wrong, then the string will change. - // Note that the normalizer passes through non-LDH ASCII and deviation characters. - // Deviation characters are ok in Punycode even in transitional processing. - // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES - // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. - UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); - if(U_FAILURE(errorCode)) { - return labelLength; - } - if(!isValid) { - info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; - return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); - } - labelString=&fromPunycode; - label=fromPunycode.getBuffer(); - labelStart=0; - labelLength=fromPunycode.length(); - } else { - wasPunycode=FALSE; - labelString=&dest; - } - // Validity check - if(labelLength==0) { - info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; - return replaceLabel(dest, destLabelStart, destLabelLength, - *labelString, labelLength, errorCode); - } - // labelLength>0 - if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { - // label starts with "??--" - info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; - } - if(label[0]==0x2d) { - // label starts with "-" - info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; - } - if(label[labelLength-1]==0x2d) { - // label ends with "-" - info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; - } - // If the label was not a Punycode label, then it was the result of - // mapping, normalization and label segmentation. - // If the label was in Punycode, then we mapped it again above - // and checked its validity. - // Now we handle the STD3 restriction to LDH characters (if set) - // and we look for U+FFFD which indicates disallowed characters - // in a non-Punycode label or U+FFFD itself in a Punycode label. - // We also check for dots which can come from the input to a single-label function. - // Ok to cast away const because we own the UnicodeString. - UChar *s=(UChar *)label; - const UChar *limit=label+labelLength; - UChar oredChars=0; - // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. - UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; - do { - UChar c=*s; - if(c<=0x7f) { - if(c==0x2e) { - info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; - *s=0xfffd; - } else if(disallowNonLDHDot && asciiData[c]<0) { - info.labelErrors|=UIDNA_ERROR_DISALLOWED; - *s=0xfffd; - } - } else { - oredChars|=c; - if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { - info.labelErrors|=UIDNA_ERROR_DISALLOWED; - *s=0xfffd; - } else if(c==0xfffd) { - info.labelErrors|=UIDNA_ERROR_DISALLOWED; - } - } - ++s; - } while(sreplace(labelStart, cpLength, (UChar)0xfffd); - label=labelString->getBuffer()+labelStart; - labelLength+=1-cpLength; - if(labelString==&dest) { - destLabelLength=labelLength; - } - } - if((info.labelErrors&severeErrors)==0) { - // Do contextual checks only if we do not have U+FFFD from a severe error - // because U+FFFD can make these checks fail. - if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) { - checkLabelBiDi(label, labelLength, info); - } - if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c && - !isLabelOkContextJ(label, labelLength) - ) { - info.labelErrors|=UIDNA_ERROR_CONTEXTJ; - } - if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { - checkLabelContextO(label, labelLength, info); - } - if(toASCII) { - if(wasPunycode) { - // Leave a Punycode label unchanged if it has no severe errors. - if(destLabelLength>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - return destLabelLength; - } else if(oredChars>=0x80) { - // Contains non-ASCII characters. - UnicodeString punycode; - UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length - if(buffer==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return destLabelLength; - } - buffer[0]=0x78; // Write "xn--". - buffer[1]=0x6e; - buffer[2]=0x2d; - buffer[3]=0x2d; - int32_t punycodeLength=u_strToPunycode(label, labelLength, - buffer+4, punycode.getCapacity()-4, - NULL, &errorCode); - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - errorCode=U_ZERO_ERROR; - punycode.releaseBuffer(4); - buffer=punycode.getBuffer(4+punycodeLength); - if(buffer==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return destLabelLength; - } - punycodeLength=u_strToPunycode(label, labelLength, - buffer+4, punycode.getCapacity()-4, - NULL, &errorCode); - } - punycodeLength+=4; - punycode.releaseBuffer(punycodeLength); - if(U_FAILURE(errorCode)) { - return destLabelLength; - } - if(punycodeLength>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - return replaceLabel(dest, destLabelStart, destLabelLength, - punycode, punycodeLength, errorCode); - } else { - // all-ASCII label - if(labelLength>63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - } - } - } else { - // If a Punycode label has severe errors, - // then leave it but make sure it does not look valid. - if(wasPunycode) { - info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; - return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode); - } - } - return replaceLabel(dest, destLabelStart, destLabelLength, - *labelString, labelLength, errorCode); -} - -// Make sure an ACE label does not look valid. -// Append U+FFFD if the label has only LDH characters. -// If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD. -int32_t -UTS46::markBadACELabel(UnicodeString &dest, - int32_t labelStart, int32_t labelLength, - UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return 0; - } - UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; - UBool isASCII=TRUE; - UBool onlyLDH=TRUE; - const UChar *label=dest.getBuffer()+labelStart; - // Ok to cast away const because we own the UnicodeString. - UChar *s=(UChar *)label+4; // After the initial "xn--". - const UChar *limit=label+labelLength; - do { - UChar c=*s; - if(c<=0x7f) { - if(c==0x2e) { - info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; - *s=0xfffd; - isASCII=onlyLDH=FALSE; - } else if(asciiData[c]<0) { - onlyLDH=FALSE; - if(disallowNonLDHDot) { - *s=0xfffd; - isASCII=FALSE; - } - } - } else { - isASCII=onlyLDH=FALSE; - } - } while(++s63) { - info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; - } - } - return labelLength; -} - -const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); -const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC); -const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK; - -const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER); - -const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER); -const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK; -const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER); - -const uint32_t ES_CS_ET_ON_BN_NSM_MASK= - U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)| - U_MASK(U_COMMON_NUMBER_SEPARATOR)| - U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)| - U_MASK(U_OTHER_NEUTRAL)| - U_MASK(U_BOUNDARY_NEUTRAL)| - U_MASK(U_DIR_NON_SPACING_MARK); -const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK; -const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK; - -// We scan the whole label and check both for whether it contains RTL characters -// and whether it passes the BiDi Rule. -// In a BiDi domain name, all labels must pass the BiDi Rule, but we might find -// that a domain name is a BiDi domain name (has an RTL label) only after -// processing several earlier labels. -void -UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const { - // IDNA2008 BiDi rule - // Get the directionality of the first character. - UChar32 c; - int32_t i=0; - U16_NEXT_UNSAFE(label, i, c); - uint32_t firstMask=U_MASK(u_charDirection(c)); - // 1. The first character must be a character with BIDI property L, R - // or AL. If it has the R or AL property, it is an RTL label; if it - // has the L property, it is an LTR label. - if((firstMask&~L_R_AL_MASK)!=0) { - info.isOkBiDi=FALSE; - } - // Get the directionality of the last non-NSM character. - uint32_t lastMask; - for(;;) { - if(i>=labelLength) { - lastMask=firstMask; - break; - } - U16_PREV_UNSAFE(label, labelLength, c); - UCharDirection dir=u_charDirection(c); - if(dir!=U_DIR_NON_SPACING_MARK) { - lastMask=U_MASK(dir); - break; - } - } - // 3. In an RTL label, the end of the label must be a character with - // BIDI property R, AL, EN or AN, followed by zero or more - // characters with BIDI property NSM. - // 6. In an LTR label, the end of the label must be a character with - // BIDI property L or EN, followed by zero or more characters with - // BIDI property NSM. - if( (firstMask&L_MASK)!=0 ? - (lastMask&~L_EN_MASK)!=0 : - (lastMask&~R_AL_EN_AN_MASK)!=0 - ) { - info.isOkBiDi=FALSE; - } - // Add the directionalities of the intervening characters. - uint32_t mask=firstMask|lastMask; - while(ilabelStart) { - c=s[i-1]; - if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) { - // Last character in the label is not an L or EN. - return FALSE; - } - } - labelStart=i+1; - } else if(i==labelStart) { - if(!(0x61<=c && c<=0x7a)) { - // First character in the label is not an L. - return FALSE; - } - } else { - if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { - // Intermediate character in the label is a B, S or WS. - return FALSE; - } - } - } - return TRUE; -} - -// UTF-8 version, called for source ASCII prefix. -// Can contain uppercase A-Z. -// s[length-1] must be the trailing dot. -static UBool -isASCIIOkBiDi(const char *s, int32_t length) { - int32_t labelStart=0; - for(int32_t i=0; ilabelStart) { - c=s[i-1]; - if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) { - // Last character in the label is not an L or EN. - return FALSE; - } - } - labelStart=i+1; - } else if(i==labelStart) { - if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) { - // First character in the label is not an L. - return FALSE; - } - } else { - if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { - // Intermediate character in the label is a B, S or WS. - return FALSE; - } - } - } - return TRUE; -} - -UBool -UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { - // [IDNA2008-Tables] - // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER - for(int32_t i=0; i0) { - info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; - } - arabicDigits=-1; - } else if(0x6f0<=c) { - if(arabicDigits<0) { - info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; - } - arabicDigits=1; - } - } - } else if(c==0x30fb) { - // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) - // Rule Set: - // False; - // For All Characters: - // If Script(cp) .in. {Hiragana, Katakana, Han} Then True; - // End For; - UErrorCode errorCode=U_ZERO_ERROR; - for(int j=0;;) { - if(j>labelEnd) { - info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; - break; - } - U16_NEXT(label, j, labelLength, c); - UScriptCode script=uscript_getScript(c, &errorCode); - if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { - break; - } - } - } - } -} - -U_NAMESPACE_END - -// C API ------------------------------------------------------------------- *** - -U_NAMESPACE_USE - -U_CAPI UIDNA * U_EXPORT2 -uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { - return reinterpret_cast(IDNA::createUTS46Instance(options, *pErrorCode)); -} - -U_CAPI void U_EXPORT2 -uidna_close(UIDNA *idna) { - delete reinterpret_cast(idna); -} - -static UBool -checkArgs(const void *label, int32_t length, - void *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { - return FALSE; - } - // sizeof(UIDNAInfo)=16 in the first API version. - if(pInfo==NULL || pInfo->size<16) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - if( (label==NULL ? length!=0 : length<-1) || - (dest==NULL ? capacity!=0 : capacity<0) || - (dest==label && label!=NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - // Set all *pInfo bytes to 0 except for the size field itself. - uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size)); - return TRUE; -} - -static void -idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { - pInfo->isTransitionalDifferent=info.isTransitionalDifferent(); - pInfo->errors=info.getErrors(); -} - -U_CAPI int32_t U_EXPORT2 -uidna_labelToASCII(const UIDNA *idna, - const UChar *label, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - UnicodeString src((UBool)(length<0), label, length); - UnicodeString destString(dest, 0, capacity); - IDNAInfo info; - reinterpret_cast(idna)->labelToASCII(src, destString, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return destString.extract(dest, capacity, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uidna_labelToUnicode(const UIDNA *idna, - const UChar *label, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - UnicodeString src((UBool)(length<0), label, length); - UnicodeString destString(dest, 0, capacity); - IDNAInfo info; - reinterpret_cast(idna)->labelToUnicode(src, destString, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return destString.extract(dest, capacity, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uidna_nameToASCII(const UIDNA *idna, - const UChar *name, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - UnicodeString src((UBool)(length<0), name, length); - UnicodeString destString(dest, 0, capacity); - IDNAInfo info; - reinterpret_cast(idna)->nameToASCII(src, destString, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return destString.extract(dest, capacity, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uidna_nameToUnicode(const UIDNA *idna, - const UChar *name, int32_t length, - UChar *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - UnicodeString src((UBool)(length<0), name, length); - UnicodeString destString(dest, 0, capacity); - IDNAInfo info; - reinterpret_cast(idna)->nameToUnicode(src, destString, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return destString.extract(dest, capacity, *pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uidna_labelToASCII_UTF8(const UIDNA *idna, - const char *label, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - StringPiece src(label, length<0 ? static_cast(uprv_strlen(label)) : length); - CheckedArrayByteSink sink(dest, capacity); - IDNAInfo info; - reinterpret_cast(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uidna_labelToUnicodeUTF8(const UIDNA *idna, - const char *label, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - StringPiece src(label, length<0 ? static_cast(uprv_strlen(label)) : length); - CheckedArrayByteSink sink(dest, capacity); - IDNAInfo info; - reinterpret_cast(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uidna_nameToASCII_UTF8(const UIDNA *idna, - const char *name, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - StringPiece src(name, length<0 ? static_cast(uprv_strlen(name)) : length); - CheckedArrayByteSink sink(dest, capacity); - IDNAInfo info; - reinterpret_cast(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -uidna_nameToUnicodeUTF8(const UIDNA *idna, - const char *name, int32_t length, - char *dest, int32_t capacity, - UIDNAInfo *pInfo, UErrorCode *pErrorCode) { - if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { - return 0; - } - StringPiece src(name, length<0 ? static_cast(uprv_strlen(name)) : length); - CheckedArrayByteSink sink(dest, capacity); - IDNAInfo info; - reinterpret_cast(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode); - idnaInfoToStruct(info, pInfo); - return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); -} - -#endif // UCONFIG_NO_IDNA diff --git a/deps/node/deps/icu-small/source/common/utypeinfo.h b/deps/node/deps/icu-small/source/common/utypeinfo.h deleted file mode 100644 index c6663734..00000000 --- a/deps/node/deps/icu-small/source/common/utypeinfo.h +++ /dev/null @@ -1,32 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 2012-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -*/ - -#ifndef __UTYPEINFO_H__ -#define __UTYPEINFO_H__ - -// Windows header does not define 'exception' in 'std' namespace. -// Therefore, a project using ICU cannot be compiled with _HAS_EXCEPTIONS -// set to 0 on Windows with Visual Studio. To work around that, we have to -// include explicitly and add using statement below. -// Whenever 'typeid' is used, this header has to be included -// instead of . -// Visual Studio 10 emits warning 4275 with this change. If you compile -// with exception disabled, you have to suppress warning 4275. -#if defined(_MSC_VER) && _HAS_EXCEPTIONS == 0 -#include -using std::exception; -#endif -#if defined(__GLIBCXX__) -namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364 -#endif -#include // for 'typeid' to work - -#endif diff --git a/deps/node/deps/icu-small/source/common/utypes.cpp b/deps/node/deps/icu-small/source/common/utypes.cpp deleted file mode 100644 index 7531e465..00000000 --- a/deps/node/deps/icu-small/source/common/utypes.cpp +++ /dev/null @@ -1,226 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1997-2015, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : utypes.c (previously putil.c) -* -* Date Name Description -* 10/07/2004 grhoten split from putil.c -****************************************************************************** -*/ - -#include "unicode/utypes.h" - -/* u_errorName() ------------------------------------------------------------ */ - -static const char * const -_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={ - "U_USING_FALLBACK_WARNING", - "U_USING_DEFAULT_WARNING", - "U_SAFECLONE_ALLOCATED_WARNING", - "U_STATE_OLD_WARNING", - "U_STRING_NOT_TERMINATED_WARNING", - "U_SORT_KEY_TOO_SHORT_WARNING", - "U_AMBIGUOUS_ALIAS_WARNING", - "U_DIFFERENT_UCA_VERSION", - "U_PLUGIN_CHANGED_LEVEL_WARNING", -}; - -static const char * const -_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={ - "U_BAD_VARIABLE_DEFINITION", - "U_MALFORMED_RULE", - "U_MALFORMED_SET", - "U_MALFORMED_SYMBOL_REFERENCE", - "U_MALFORMED_UNICODE_ESCAPE", - "U_MALFORMED_VARIABLE_DEFINITION", - "U_MALFORMED_VARIABLE_REFERENCE", - "U_MISMATCHED_SEGMENT_DELIMITERS", - "U_MISPLACED_ANCHOR_START", - "U_MISPLACED_CURSOR_OFFSET", - "U_MISPLACED_QUANTIFIER", - "U_MISSING_OPERATOR", - "U_MISSING_SEGMENT_CLOSE", - "U_MULTIPLE_ANTE_CONTEXTS", - "U_MULTIPLE_CURSORS", - "U_MULTIPLE_POST_CONTEXTS", - "U_TRAILING_BACKSLASH", - "U_UNDEFINED_SEGMENT_REFERENCE", - "U_UNDEFINED_VARIABLE", - "U_UNQUOTED_SPECIAL", - "U_UNTERMINATED_QUOTE", - "U_RULE_MASK_ERROR", - "U_MISPLACED_COMPOUND_FILTER", - "U_MULTIPLE_COMPOUND_FILTERS", - "U_INVALID_RBT_SYNTAX", - "U_INVALID_PROPERTY_PATTERN", - "U_MALFORMED_PRAGMA", - "U_UNCLOSED_SEGMENT", - "U_ILLEGAL_CHAR_IN_SEGMENT", - "U_VARIABLE_RANGE_EXHAUSTED", - "U_VARIABLE_RANGE_OVERLAP", - "U_ILLEGAL_CHARACTER", - "U_INTERNAL_TRANSLITERATOR_ERROR", - "U_INVALID_ID", - "U_INVALID_FUNCTION" -}; - -static const char * const -_uErrorName[U_STANDARD_ERROR_LIMIT]={ - "U_ZERO_ERROR", - - "U_ILLEGAL_ARGUMENT_ERROR", - "U_MISSING_RESOURCE_ERROR", - "U_INVALID_FORMAT_ERROR", - "U_FILE_ACCESS_ERROR", - "U_INTERNAL_PROGRAM_ERROR", - "U_MESSAGE_PARSE_ERROR", - "U_MEMORY_ALLOCATION_ERROR", - "U_INDEX_OUTOFBOUNDS_ERROR", - "U_PARSE_ERROR", - "U_INVALID_CHAR_FOUND", - "U_TRUNCATED_CHAR_FOUND", - "U_ILLEGAL_CHAR_FOUND", - "U_INVALID_TABLE_FORMAT", - "U_INVALID_TABLE_FILE", - "U_BUFFER_OVERFLOW_ERROR", - "U_UNSUPPORTED_ERROR", - "U_RESOURCE_TYPE_MISMATCH", - "U_ILLEGAL_ESCAPE_SEQUENCE", - "U_UNSUPPORTED_ESCAPE_SEQUENCE", - "U_NO_SPACE_AVAILABLE", - "U_CE_NOT_FOUND_ERROR", - "U_PRIMARY_TOO_LONG_ERROR", - "U_STATE_TOO_OLD_ERROR", - "U_TOO_MANY_ALIASES_ERROR", - "U_ENUM_OUT_OF_SYNC_ERROR", - "U_INVARIANT_CONVERSION_ERROR", - "U_INVALID_STATE_ERROR", - "U_COLLATOR_VERSION_MISMATCH", - "U_USELESS_COLLATOR_ERROR", - "U_NO_WRITE_PERMISSION" -}; -static const char * const -_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { - "U_UNEXPECTED_TOKEN", - "U_MULTIPLE_DECIMAL_SEPARATORS", - "U_MULTIPLE_EXPONENTIAL_SYMBOLS", - "U_MALFORMED_EXPONENTIAL_PATTERN", - "U_MULTIPLE_PERCENT_SYMBOLS", - "U_MULTIPLE_PERMILL_SYMBOLS", - "U_MULTIPLE_PAD_SPECIFIERS", - "U_PATTERN_SYNTAX_ERROR", - "U_ILLEGAL_PAD_POSITION", - "U_UNMATCHED_BRACES", - "U_UNSUPPORTED_PROPERTY", - "U_UNSUPPORTED_ATTRIBUTE", - "U_ARGUMENT_TYPE_MISMATCH", - "U_DUPLICATE_KEYWORD", - "U_UNDEFINED_KEYWORD", - "U_DEFAULT_KEYWORD_MISSING", - "U_DECIMAL_NUMBER_SYNTAX_ERROR", - "U_FORMAT_INEXACT_ERROR", - "U_NUMBER_ARG_OUTOFBOUNDS_ERROR", - "U_NUMBER_SKELETON_SYNTAX_ERROR", -}; - -static const char * const -_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = { - "U_BRK_INTERNAL_ERROR", - "U_BRK_HEX_DIGITS_EXPECTED", - "U_BRK_SEMICOLON_EXPECTED", - "U_BRK_RULE_SYNTAX", - "U_BRK_UNCLOSED_SET", - "U_BRK_ASSIGN_ERROR", - "U_BRK_VARIABLE_REDFINITION", - "U_BRK_MISMATCHED_PAREN", - "U_BRK_NEW_LINE_IN_QUOTED_STRING", - "U_BRK_UNDEFINED_VARIABLE", - "U_BRK_INIT_ERROR", - "U_BRK_RULE_EMPTY_SET", - "U_BRK_UNRECOGNIZED_OPTION", - "U_BRK_MALFORMED_RULE_TAG" -}; - -static const char * const -_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = { - "U_REGEX_INTERNAL_ERROR", - "U_REGEX_RULE_SYNTAX", - "U_REGEX_INVALID_STATE", - "U_REGEX_BAD_ESCAPE_SEQUENCE", - "U_REGEX_PROPERTY_SYNTAX", - "U_REGEX_UNIMPLEMENTED", - "U_REGEX_MISMATCHED_PAREN", - "U_REGEX_NUMBER_TOO_BIG", - "U_REGEX_BAD_INTERVAL", - "U_REGEX_MAX_LT_MIN", - "U_REGEX_INVALID_BACK_REF", - "U_REGEX_INVALID_FLAG", - "U_REGEX_LOOK_BEHIND_LIMIT", - "U_REGEX_SET_CONTAINS_STRING", - "U_REGEX_OCTAL_TOO_BIG", - "U_REGEX_MISSING_CLOSE_BRACKET", - "U_REGEX_INVALID_RANGE", - "U_REGEX_STACK_OVERFLOW", - "U_REGEX_TIME_OUT", - "U_REGEX_STOPPED_BY_CALLER", - "U_REGEX_PATTERN_TOO_BIG", - "U_REGEX_INVALID_CAPTURE_GROUP_NAME" -}; - -static const char * const -_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = { - "U_STRINGPREP_PROHIBITED_ERROR", - "U_STRINGPREP_UNASSIGNED_ERROR", - "U_STRINGPREP_CHECK_BIDI_ERROR", - "U_IDNA_STD3_ASCII_RULES_ERROR", - "U_IDNA_ACE_PREFIX_ERROR", - "U_IDNA_VERIFICATION_ERROR", - "U_IDNA_LABEL_TOO_LONG_ERROR", - "U_IDNA_ZERO_LENGTH_LABEL_ERROR", - "U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR" -}; - -static const char * const -_uPluginErrorName[U_PLUGIN_ERROR_LIMIT - U_PLUGIN_ERROR_START] = { - "U_PLUGIN_TOO_HIGH", - "U_PLUGIN_DIDNT_SET_LEVEL", -}; - -U_CAPI const char * U_EXPORT2 -u_errorName(UErrorCode code) { - if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) { - return _uErrorName[code]; - } else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) { - return _uErrorInfoName[code - U_ERROR_WARNING_START]; - } else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){ - return _uTransErrorName[code - U_PARSE_ERROR_START]; - } else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){ - return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START]; - } else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){ - return _uBrkErrorName[code - U_BRK_ERROR_START]; - } else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) { - return _uRegexErrorName[code - U_REGEX_ERROR_START]; - } else if(U_IDNA_ERROR_START <= code && code < U_IDNA_ERROR_LIMIT) { - return _uIDNAErrorName[code - U_IDNA_ERROR_START]; - } else if(U_PLUGIN_ERROR_START <= code && code < U_PLUGIN_ERROR_LIMIT) { - return _uPluginErrorName[code - U_PLUGIN_ERROR_START]; - } else { - return "[BOGUS UErrorCode]"; - } -} - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ diff --git a/deps/node/deps/icu-small/source/common/uvector.cpp b/deps/node/deps/icu-small/source/common/uvector.cpp deleted file mode 100644 index ad3a813e..00000000 --- a/deps/node/deps/icu-small/source/common/uvector.cpp +++ /dev/null @@ -1,566 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1999-2013, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* Date Name Description -* 10/22/99 alan Creation. -********************************************************************** -*/ - -#include "uvector.h" -#include "cmemory.h" -#include "uarrsort.h" -#include "uelement.h" - -U_NAMESPACE_BEGIN - -#define DEFAULT_CAPACITY 8 - -/* - * Constants for hinting whether a key is an integer - * or a pointer. If a hint bit is zero, then the associated - * token is assumed to be an integer. This is needed for iSeries - */ -#define HINT_KEY_POINTER (1) -#define HINT_KEY_INTEGER (0) - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector) - -UVector::UVector(UErrorCode &status) : - count(0), - capacity(0), - elements(0), - deleter(0), - comparer(0) -{ - _init(DEFAULT_CAPACITY, status); -} - -UVector::UVector(int32_t initialCapacity, UErrorCode &status) : - count(0), - capacity(0), - elements(0), - deleter(0), - comparer(0) -{ - _init(initialCapacity, status); -} - -UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) : - count(0), - capacity(0), - elements(0), - deleter(d), - comparer(c) -{ - _init(DEFAULT_CAPACITY, status); -} - -UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) : - count(0), - capacity(0), - elements(0), - deleter(d), - comparer(c) -{ - _init(initialCapacity, status); -} - -void UVector::_init(int32_t initialCapacity, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - // Fix bogus initialCapacity values; avoid malloc(0) and integer overflow - if ((initialCapacity < 1) || (initialCapacity > (int32_t)(INT32_MAX / sizeof(UElement)))) { - initialCapacity = DEFAULT_CAPACITY; - } - elements = (UElement *)uprv_malloc(sizeof(UElement)*initialCapacity); - if (elements == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - capacity = initialCapacity; - } -} - -UVector::~UVector() { - removeAllElements(); - uprv_free(elements); - elements = 0; -} - -/** - * Assign this object to another (make this a copy of 'other'). - * Use the 'assign' function to assign each element. - */ -void UVector::assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec) { - if (ensureCapacity(other.count, ec)) { - setSize(other.count, ec); - if (U_SUCCESS(ec)) { - for (int32_t i=0; iindex; --i) { - elements[i] = elements[i-1]; - } - elements[index].pointer = obj; - ++count; - } - /* else index out of range */ -} - -void UVector::insertElementAt(int32_t elem, int32_t index, UErrorCode &status) { - // must have 0 <= index <= count - if (0 <= index && index <= count && ensureCapacity(count + 1, status)) { - for (int32_t i=count; i>index; --i) { - elements[i] = elements[i-1]; - } - elements[index].pointer = NULL; - elements[index].integer = elem; - ++count; - } - /* else index out of range */ -} - -void* UVector::elementAt(int32_t index) const { - return (0 <= index && index < count) ? elements[index].pointer : 0; -} - -int32_t UVector::elementAti(int32_t index) const { - return (0 <= index && index < count) ? elements[index].integer : 0; -} - -UBool UVector::containsAll(const UVector& other) const { - for (int32_t i=0; i= 0) { - return FALSE; - } - } - return TRUE; -} - -UBool UVector::removeAll(const UVector& other) { - UBool changed = FALSE; - for (int32_t i=0; i= 0) { - removeElementAt(j); - changed = TRUE; - } - } - return changed; -} - -UBool UVector::retainAll(const UVector& other) { - UBool changed = FALSE; - for (int32_t j=size()-1; j>=0; --j) { - int32_t i = other.indexOf(elements[j]); - if (i < 0) { - removeElementAt(j); - changed = TRUE; - } - } - return changed; -} - -void UVector::removeElementAt(int32_t index) { - void* e = orphanElementAt(index); - if (e != 0 && deleter != 0) { - (*deleter)(e); - } -} - -UBool UVector::removeElement(void* obj) { - int32_t i = indexOf(obj); - if (i >= 0) { - removeElementAt(i); - return TRUE; - } - return FALSE; -} - -void UVector::removeAllElements(void) { - if (deleter != 0) { - for (int32_t i=0; icount != other.count) { - return FALSE; - } - if (comparer == 0) { - for (i=0; i (INT32_MAX - 1) / 2) { // integer overflow check - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - int32_t newCap = capacity * 2; - if (newCap < minimumCapacity) { - newCap = minimumCapacity; - } - if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) { // integer overflow check - // We keep the original memory contents on bad minimumCapacity. - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap); - if (newElems == NULL) { - // We keep the original contents on the memory failure on realloc or bad minimumCapacity. - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - elements = newElems; - capacity = newCap; - } - return TRUE; -} - -/** - * Change the size of this vector as follows: If newSize is smaller, - * then truncate the array, possibly deleting held elements for i >= - * newSize. If newSize is larger, grow the array, filling in new - * slots with NULL. - */ -void UVector::setSize(int32_t newSize, UErrorCode &status) { - int32_t i; - if (newSize < 0) { - return; - } - if (newSize > count) { - if (!ensureCapacity(newSize, status)) { - return; - } - UElement empty; - empty.pointer = NULL; - empty.integer = 0; - for (i=count; i=newSize; --i) { - removeElementAt(i); - } - } - count = newSize; -} - -/** - * Fill in the given array with all elements of this vector. - */ -void** UVector::toArray(void** result) const { - void** a = result; - for (int i=0; i 0) { - max = probe; - } else { - // assert(c <= 0); - min = probe + 1; - } - } - if (ensureCapacity(count + 1, ec)) { - for (int32_t i=count; i>min; --i) { - elements[i] = elements[i-1]; - } - elements[min] = e; - ++count; - } -} - -/** - * Array sort comparator function. - * Used from UVector::sort() - * Conforms to function signature required for uprv_sortArray(). - * This function is essentially just a wrapper, to make a - * UVector style comparator function usable with uprv_sortArray(). - * - * The context pointer to this function is a pointer back - * (with some extra indirection) to the user supplied comparator. - * - */ -static int32_t U_CALLCONV -sortComparator(const void *context, const void *left, const void *right) { - UElementComparator *compare = *static_cast(context); - UElement e1 = *static_cast(left); - UElement e2 = *static_cast(right); - int32_t result = (*compare)(e1, e2); - return result; -} - - -/** - * Array sort comparison function for use from UVector::sorti() - * Compares int32_t vector elements. - */ -static int32_t U_CALLCONV -sortiComparator(const void * /*context */, const void *left, const void *right) { - const UElement *e1 = static_cast(left); - const UElement *e2 = static_cast(right); - int32_t result = e1->integer < e2->integer? -1 : - e1->integer == e2->integer? 0 : 1; - return result; -} - -/** - * Sort the vector, assuming it constains ints. - * (A more general sort would take a comparison function, but it's - * not clear whether UVector's UElementComparator or - * UComparator from uprv_sortAray would be more appropriate.) - */ -void UVector::sorti(UErrorCode &ec) { - if (U_SUCCESS(ec)) { - uprv_sortArray(elements, count, sizeof(UElement), - sortiComparator, NULL, FALSE, &ec); - } -} - - -/** - * Sort with a user supplied comparator. - * - * The comparator function handling is confusing because the function type - * for UVector (as defined for sortedInsert()) is different from the signature - * required by uprv_sortArray(). This is handled by passing the - * the UVector sort function pointer via the context pointer to a - * sortArray() comparator function, which can then call back to - * the original user functtion. - * - * An additional twist is that it's not safe to pass a pointer-to-function - * as a (void *) data pointer, so instead we pass a (data) pointer to a - * pointer-to-function variable. - */ -void UVector::sort(UElementComparator *compare, UErrorCode &ec) { - if (U_SUCCESS(ec)) { - uprv_sortArray(elements, count, sizeof(UElement), - sortComparator, &compare, FALSE, &ec); - } -} - - -/** - * Stable sort with a user supplied comparator of type UComparator. - */ -void UVector::sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec) { - if (U_SUCCESS(ec)) { - uprv_sortArray(elements, count, sizeof(UElement), - compare, context, TRUE, &ec); - } -} - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/uvector.h b/deps/node/deps/icu-small/source/common/uvector.h deleted file mode 100644 index ad75e234..00000000 --- a/deps/node/deps/icu-small/source/common/uvector.h +++ /dev/null @@ -1,415 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 10/22/99 alan Creation. This is an internal header. -* It should not be exported. -********************************************************************** -*/ - -#ifndef UVECTOR_H -#define UVECTOR_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "cmemory.h" -#include "uarrsort.h" -#include "uelement.h" - -U_NAMESPACE_BEGIN - -/** - *

Ultralightweight C++ implementation of a void* vector - * that is (mostly) compatible with java.util.Vector. - * - *

This is a very simple implementation, written to satisfy an - * immediate porting need. As such, it is not completely fleshed out, - * and it aims for simplicity and conformity. Nonetheless, it serves - * its purpose (porting code from java that uses java.util.Vector) - * well, and it could be easily made into a more robust vector class. - * - *

Design notes - * - *

There is index bounds checking, but little is done about it. If - * indices are out of bounds, either nothing happens, or zero is - * returned. We do avoid indexing off into the weeds. - * - *

There is detection of out of memory, but the handling is very - * coarse-grained -- similar to UnicodeString's protocol, but even - * coarser. The class contains one static flag that is set - * when any call to new returns zero. This allows the caller - * to use several vectors and make just one check at the end to see if - * a memory failure occurred. This is more efficient than making a - * check after each call on each vector when doing many operations on - * multiple vectors. The single static flag works best when memory - * failures are infrequent, and when recovery options are limited or - * nonexistent. - * - *

Since we don't have garbage collection, UVector was given the - * option to ownits contents. To employ this, set a deleter - * function. The deleter is called on a void* pointer when that - * pointer is released by the vector, either when the vector itself is - * destructed, or when a call to setElementAt() overwrites an element, - * or when a call to remove() or one of its variants explicitly - * removes an element. If no deleter is set, or the deleter is set to - * zero, then it is assumed that the caller will delete elements as - * needed. - * - *

In order to implement methods such as contains() and indexOf(), - * UVector needs a way to compare objects for equality. To do so, it - * uses a comparison function, or "comparer." If the comparer is not - * set, or is set to zero, then all such methods will act as if the - * vector contains no element. That is, indexOf() will always return - * -1, contains() will always return FALSE, etc. - * - *

To do - * - *

Improve the handling of index out of bounds errors. - * - * @author Alan Liu - */ -class U_COMMON_API UVector : public UObject { - // NOTE: UVector uses the UHashKey (union of void* and int32_t) as - // its basic storage type. It uses UElementsAreEqual as its - // comparison function. It uses UObjectDeleter as its deleter - // function. These are named for hashtables, but used here as-is - // rather than duplicating the type. This allows sharing of - // support functions. - -private: - int32_t count; - - int32_t capacity; - - UElement* elements; - - UObjectDeleter *deleter; - - UElementsAreEqual *comparer; - -public: - UVector(UErrorCode &status); - - UVector(int32_t initialCapacity, UErrorCode &status); - - UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status); - - UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status); - - virtual ~UVector(); - - /** - * Assign this object to another (make this a copy of 'other'). - * Use the 'assign' function to assign each element. - */ - void assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec); - - /** - * Compare this vector with another. They will be considered - * equal if they are of the same size and all elements are equal, - * as compared using this object's comparer. - */ - UBool operator==(const UVector& other); - - /** - * Equivalent to !operator==() - */ - inline UBool operator!=(const UVector& other); - - //------------------------------------------------------------ - // java.util.Vector API - //------------------------------------------------------------ - - void addElement(void* obj, UErrorCode &status); - - void addElement(int32_t elem, UErrorCode &status); - - void setElementAt(void* obj, int32_t index); - - void setElementAt(int32_t elem, int32_t index); - - void insertElementAt(void* obj, int32_t index, UErrorCode &status); - - void insertElementAt(int32_t elem, int32_t index, UErrorCode &status); - - void* elementAt(int32_t index) const; - - int32_t elementAti(int32_t index) const; - - UBool equals(const UVector &other) const; - - void* firstElement(void) const; - - void* lastElement(void) const; - - int32_t lastElementi(void) const; - - int32_t indexOf(void* obj, int32_t startIndex = 0) const; - - int32_t indexOf(int32_t obj, int32_t startIndex = 0) const; - - UBool contains(void* obj) const; - - UBool contains(int32_t obj) const; - - UBool containsAll(const UVector& other) const; - - UBool removeAll(const UVector& other); - - UBool retainAll(const UVector& other); - - void removeElementAt(int32_t index); - - UBool removeElement(void* obj); - - void removeAllElements(); - - int32_t size(void) const; - - UBool isEmpty(void) const; - - UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status); - - /** - * Change the size of this vector as follows: If newSize is - * smaller, then truncate the array, possibly deleting held - * elements for i >= newSize. If newSize is larger, grow the - * array, filling in new slots with NULL. - */ - void setSize(int32_t newSize, UErrorCode &status); - - /** - * Fill in the given array with all elements of this vector. - */ - void** toArray(void** result) const; - - //------------------------------------------------------------ - // New API - //------------------------------------------------------------ - - UObjectDeleter *setDeleter(UObjectDeleter *d); - - UElementsAreEqual *setComparer(UElementsAreEqual *c); - - void* operator[](int32_t index) const; - - /** - * Removes the element at the given index from this vector and - * transfer ownership of it to the caller. After this call, the - * caller owns the result and must delete it and the vector entry - * at 'index' is removed, shifting all subsequent entries back by - * one index and shortening the size of the vector by one. If the - * index is out of range or if there is no item at the given index - * then 0 is returned and the vector is unchanged. - */ - void* orphanElementAt(int32_t index); - - /** - * Returns true if this vector contains none of the elements - * of the given vector. - * @param other vector to be checked for containment - * @return true if the test condition is met - */ - UBool containsNone(const UVector& other) const; - - /** - * Insert the given object into this vector at its sorted position - * as defined by 'compare'. The current elements are assumed to - * be sorted already. - */ - void sortedInsert(void* obj, UElementComparator *compare, UErrorCode& ec); - - /** - * Insert the given integer into this vector at its sorted position - * as defined by 'compare'. The current elements are assumed to - * be sorted already. - */ - void sortedInsert(int32_t obj, UElementComparator *compare, UErrorCode& ec); - - /** - * Sort the contents of the vector, assuming that the contents of the - * vector are of type int32_t. - */ - void sorti(UErrorCode &ec); - - /** - * Sort the contents of this vector, using a caller-supplied function - * to do the comparisons. (It's confusing that - * UVector's UElementComparator function is different from the - * UComparator function type defined in uarrsort.h) - */ - void sort(UElementComparator *compare, UErrorCode &ec); - - /** - * Stable sort the contents of this vector using a caller-supplied function - * of type UComparator to do the comparison. Provides more flexibility - * than UVector::sort() because an additional user parameter can be passed to - * the comparison function. - */ - void sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - */ - virtual UClassID getDynamicClassID() const; - -private: - void _init(int32_t initialCapacity, UErrorCode &status); - - int32_t indexOf(UElement key, int32_t startIndex = 0, int8_t hint = 0) const; - - void sortedInsert(UElement e, UElementComparator *compare, UErrorCode& ec); - - // Disallow - UVector(const UVector&); - - // Disallow - UVector& operator=(const UVector&); - -}; - - -/** - *

Ultralightweight C++ implementation of a void* stack - * that is (mostly) compatible with java.util.Stack. As in java, this - * is merely a paper thin layer around UVector. See the UVector - * documentation for further information. - * - *

Design notes - * - *

The element at index n-1 is (of course) the top of the - * stack. - * - *

The poorly named empty() method doesn't empty the - * stack; it determines if the stack is empty. - * - * @author Alan Liu - */ -class U_COMMON_API UStack : public UVector { -public: - UStack(UErrorCode &status); - - UStack(int32_t initialCapacity, UErrorCode &status); - - UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status); - - UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status); - - virtual ~UStack(); - - // It's okay not to have a virtual destructor (in UVector) - // because UStack has no special cleanup to do. - - UBool empty(void) const; - - void* peek(void) const; - - int32_t peeki(void) const; - - void* pop(void); - - int32_t popi(void); - - void* push(void* obj, UErrorCode &status); - - int32_t push(int32_t i, UErrorCode &status); - - /* - If the object o occurs as an item in this stack, - this method returns the 1-based distance from the top of the stack. - */ - int32_t search(void* obj) const; - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - */ - virtual UClassID getDynamicClassID() const; - -private: - // Disallow - UStack(const UStack&); - - // Disallow - UStack& operator=(const UStack&); -}; - - -// UVector inlines - -inline int32_t UVector::size(void) const { - return count; -} - -inline UBool UVector::isEmpty(void) const { - return count == 0; -} - -inline UBool UVector::contains(void* obj) const { - return indexOf(obj) >= 0; -} - -inline UBool UVector::contains(int32_t obj) const { - return indexOf(obj) >= 0; -} - -inline void* UVector::firstElement(void) const { - return elementAt(0); -} - -inline void* UVector::lastElement(void) const { - return elementAt(count-1); -} - -inline int32_t UVector::lastElementi(void) const { - return elementAti(count-1); -} - -inline void* UVector::operator[](int32_t index) const { - return elementAt(index); -} - -inline UBool UVector::operator!=(const UVector& other) { - return !operator==(other); -} - -// UStack inlines - -inline UBool UStack::empty(void) const { - return isEmpty(); -} - -inline void* UStack::peek(void) const { - return lastElement(); -} - -inline int32_t UStack::peeki(void) const { - return lastElementi(); -} - -inline void* UStack::push(void* obj, UErrorCode &status) { - addElement(obj, status); - return obj; -} - -inline int32_t UStack::push(int32_t i, UErrorCode &status) { - addElement(i, status); - return i; -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/uvectr32.cpp b/deps/node/deps/icu-small/source/common/uvectr32.cpp deleted file mode 100644 index 6e0760d7..00000000 --- a/deps/node/deps/icu-small/source/common/uvectr32.cpp +++ /dev/null @@ -1,334 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1999-2015, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -* Date Name Description -* 10/22/99 alan Creation. -********************************************************************** -*/ - -#include "uvectr32.h" -#include "cmemory.h" -#include "putilimp.h" - -U_NAMESPACE_BEGIN - -#define DEFAULT_CAPACITY 8 - -/* - * Constants for hinting whether a key is an integer - * or a pointer. If a hint bit is zero, then the associated - * token is assumed to be an integer. This is needed for iSeries - */ - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector32) - -UVector32::UVector32(UErrorCode &status) : - count(0), - capacity(0), - maxCapacity(0), - elements(NULL) -{ - _init(DEFAULT_CAPACITY, status); -} - -UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) : - count(0), - capacity(0), - maxCapacity(0), - elements(0) -{ - _init(initialCapacity, status); -} - - - -void UVector32::_init(int32_t initialCapacity, UErrorCode &status) { - // Fix bogus initialCapacity values; avoid malloc(0) - if (initialCapacity < 1) { - initialCapacity = DEFAULT_CAPACITY; - } - if (maxCapacity>0 && maxCapacity (int32_t)(INT32_MAX / sizeof(int32_t))) { - initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity); - } - elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity); - if (elements == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - capacity = initialCapacity; - } -} - -UVector32::~UVector32() { - uprv_free(elements); - elements = 0; -} - -/** - * Assign this object to another (make this a copy of 'other'). - */ -void UVector32::assign(const UVector32& other, UErrorCode &ec) { - if (ensureCapacity(other.count, ec)) { - setSize(other.count); - for (int32_t i=0; iindex; --i) { - elements[i] = elements[i-1]; - } - elements[index] = elem; - ++count; - } - /* else index out of range */ -} - -UBool UVector32::containsAll(const UVector32& other) const { - for (int32_t i=0; i= 0) { - return FALSE; - } - } - return TRUE; -} - -UBool UVector32::removeAll(const UVector32& other) { - UBool changed = FALSE; - for (int32_t i=0; i= 0) { - removeElementAt(j); - changed = TRUE; - } - } - return changed; -} - -UBool UVector32::retainAll(const UVector32& other) { - UBool changed = FALSE; - for (int32_t j=size()-1; j>=0; --j) { - int32_t i = other.indexOf(elements[j]); - if (i < 0) { - removeElementAt(j); - changed = TRUE; - } - } - return changed; -} - -void UVector32::removeElementAt(int32_t index) { - if (index >= 0) { - for (int32_t i=index; icount != other.count) { - return FALSE; - } - for (i=0; i= minimumCapacity) { - return TRUE; - } - if (maxCapacity>0 && minimumCapacity>maxCapacity) { - status = U_BUFFER_OVERFLOW_ERROR; - return FALSE; - } - if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - int32_t newCap = capacity * 2; - if (newCap < minimumCapacity) { - newCap = minimumCapacity; - } - if (maxCapacity > 0 && newCap > maxCapacity) { - newCap = maxCapacity; - } - if (newCap > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check - // We keep the original memory contents on bad minimumCapacity/maxCapacity. - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*newCap); - if (newElems == NULL) { - // We keep the original contents on the memory failure on realloc. - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - elements = newElems; - capacity = newCap; - return TRUE; -} - -void UVector32::setMaxCapacity(int32_t limit) { - U_ASSERT(limit >= 0); - if (limit < 0) { - limit = 0; - } - if (limit > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check for realloc - // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged - return; - } - maxCapacity = limit; - if (capacity <= maxCapacity || maxCapacity == 0) { - // Current capacity is within the new limit. - return; - } - - // New maximum capacity is smaller than the current size. - // Realloc the storage to the new, smaller size. - int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*maxCapacity); - if (newElems == NULL) { - // Realloc to smaller failed. - // Just keep what we had. No need to call it a failure. - return; - } - elements = newElems; - capacity = maxCapacity; - if (count > capacity) { - count = capacity; - } -} - -/** - * Change the size of this vector as follows: If newSize is smaller, - * then truncate the array, possibly deleting held elements for i >= - * newSize. If newSize is larger, grow the array, filling in new - * slots with NULL. - */ -void UVector32::setSize(int32_t newSize) { - int32_t i; - if (newSize < 0) { - return; - } - if (newSize > count) { - UErrorCode ec = U_ZERO_ERROR; - if (!ensureCapacity(newSize, ec)) { - return; - } - for (i=count; i 0) { - if (elements[probe] > tok) { - max = probe; - } else { - // assert(c <= 0); - min = probe + 1; - } - } - if (ensureCapacity(count + 1, ec)) { - for (int32_t i=count; i>min; --i) { - elements[i] = elements[i-1]; - } - elements[min] = tok; - ++count; - } -} - - - - - -U_NAMESPACE_END diff --git a/deps/node/deps/icu-small/source/common/uvectr32.h b/deps/node/deps/icu-small/source/common/uvectr32.h deleted file mode 100644 index 3174e94c..00000000 --- a/deps/node/deps/icu-small/source/common/uvectr32.h +++ /dev/null @@ -1,306 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -// -// UVector32 is a class implementing a vector of 32 bit integers. -// It is similar to UVector, but holds int32_t values rather than pointers. -// Most of the code is unchanged from UVector. -// - -#ifndef UVECTOR32_H -#define UVECTOR32_H - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "uhash.h" -#include "uassert.h" - -U_NAMESPACE_BEGIN - - - -/** - *

Ultralightweight C++ implementation of a void* vector - * that is (mostly) compatible with java.util.Vector. - * - *

This is a very simple implementation, written to satisfy an - * immediate porting need. As such, it is not completely fleshed out, - * and it aims for simplicity and conformity. Nonetheless, it serves - * its purpose (porting code from java that uses java.util.Vector) - * well, and it could be easily made into a more robust vector class. - * - *

Design notes - * - *

There is index bounds checking, but little is done about it. If - * indices are out of bounds, either nothing happens, or zero is - * returned. We do avoid indexing off into the weeds. - * - *

There is detection of out of memory, but the handling is very - * coarse-grained -- similar to UnicodeString's protocol, but even - * coarser. The class contains one static flag that is set - * when any call to new returns zero. This allows the caller - * to use several vectors and make just one check at the end to see if - * a memory failure occurred. This is more efficient than making a - * check after each call on each vector when doing many operations on - * multiple vectors. The single static flag works best when memory - * failures are infrequent, and when recovery options are limited or - * nonexistent. - * - *

To do - * - *

Improve the handling of index out of bounds errors. - * - * @author Alan Liu - */ -class U_COMMON_API UVector32 : public UObject { -private: - int32_t count; - - int32_t capacity; - - int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. - - int32_t* elements; - -public: - UVector32(UErrorCode &status); - - UVector32(int32_t initialCapacity, UErrorCode &status); - - virtual ~UVector32(); - - /** - * Assign this object to another (make this a copy of 'other'). - * Use the 'assign' function to assign each element. - */ - void assign(const UVector32& other, UErrorCode &ec); - - /** - * Compare this vector with another. They will be considered - * equal if they are of the same size and all elements are equal, - * as compared using this object's comparer. - */ - UBool operator==(const UVector32& other); - - /** - * Equivalent to !operator==() - */ - inline UBool operator!=(const UVector32& other); - - //------------------------------------------------------------ - // java.util.Vector API - //------------------------------------------------------------ - - void addElement(int32_t elem, UErrorCode &status); - - void setElementAt(int32_t elem, int32_t index); - - void insertElementAt(int32_t elem, int32_t index, UErrorCode &status); - - int32_t elementAti(int32_t index) const; - - UBool equals(const UVector32 &other) const; - - int32_t lastElementi(void) const; - - int32_t indexOf(int32_t elem, int32_t startIndex = 0) const; - - UBool contains(int32_t elem) const; - - UBool containsAll(const UVector32& other) const; - - UBool removeAll(const UVector32& other); - - UBool retainAll(const UVector32& other); - - void removeElementAt(int32_t index); - - void removeAllElements(); - - int32_t size(void) const; - - UBool isEmpty(void) const; - - // Inline. Use this one for speedy size check. - inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status); - - // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary. - UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status); - - /** - * Change the size of this vector as follows: If newSize is - * smaller, then truncate the array, possibly deleting held - * elements for i >= newSize. If newSize is larger, grow the - * array, filling in new slows with zero. - */ - void setSize(int32_t newSize); - - //------------------------------------------------------------ - // New API - //------------------------------------------------------------ - - /** - * Returns true if this vector contains none of the elements - * of the given vector. - * @param other vector to be checked for containment - * @return true if the test condition is met - */ - UBool containsNone(const UVector32& other) const; - - - /** - * Insert the given integer into this vector at its sorted position. - * The current elements are assumed to be sorted already. - */ - void sortedInsert(int32_t elem, UErrorCode& ec); - - /** - * Returns a pointer to the internal array holding the vector. - */ - int32_t *getBuffer() const; - - /** - * Set the maximum allowed buffer capacity for this vector/stack. - * Default with no limit set is unlimited, go until malloc() fails. - * A Limit of zero means unlimited capacity. - * Units are vector elements (32 bits each), not bytes. - */ - void setMaxCapacity(int32_t limit); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - */ - virtual UClassID getDynamicClassID() const; - -private: - void _init(int32_t initialCapacity, UErrorCode &status); - - // Disallow - UVector32(const UVector32&); - - // Disallow - UVector32& operator=(const UVector32&); - - - // API Functions for Stack operations. - // In the original UVector, these were in a separate derived class, UStack. - // Here in UVector32, they are all together. -public: - UBool empty(void) const; // TODO: redundant, same as empty(). Remove it? - - int32_t peeki(void) const; - - int32_t popi(void); - - int32_t push(int32_t i, UErrorCode &status); - - int32_t *reserveBlock(int32_t size, UErrorCode &status); - int32_t *popFrame(int32_t size); -}; - - -// UVector32 inlines - -inline UBool UVector32::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { - if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) { - return TRUE; - } else { - return expandCapacity(minimumCapacity, status); - } -} - -inline int32_t UVector32::elementAti(int32_t index) const { - return (index >= 0 && count > 0 && count - index > 0) ? elements[index] : 0; -} - - -inline void UVector32::addElement(int32_t elem, UErrorCode &status) { - if (ensureCapacity(count + 1, status)) { - elements[count] = elem; - count++; - } -} - -inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) { - if (ensureCapacity(count+size, status) == FALSE) { - return NULL; - } - int32_t *rp = elements+count; - count += size; - return rp; -} - -inline int32_t *UVector32::popFrame(int32_t size) { - U_ASSERT(count >= size); - count -= size; - if (count < 0) { - count = 0; - } - return elements+count-size; -} - - - -inline int32_t UVector32::size(void) const { - return count; -} - -inline UBool UVector32::isEmpty(void) const { - return count == 0; -} - -inline UBool UVector32::contains(int32_t obj) const { - return indexOf(obj) >= 0; -} - -inline int32_t UVector32::lastElementi(void) const { - return elementAti(count-1); -} - -inline UBool UVector32::operator!=(const UVector32& other) { - return !operator==(other); -} - -inline int32_t *UVector32::getBuffer() const { - return elements; -} - - -// UStack inlines - -inline UBool UVector32::empty(void) const { - return isEmpty(); -} - -inline int32_t UVector32::peeki(void) const { - return lastElementi(); -} - -inline int32_t UVector32::push(int32_t i, UErrorCode &status) { - addElement(i, status); - return i; -} - -inline int32_t UVector32::popi(void) { - int32_t result = 0; - if (count > 0) { - count--; - result = elements[count]; - } - return result; -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/uvectr64.cpp b/deps/node/deps/icu-small/source/common/uvectr64.cpp deleted file mode 100644 index 145db246..00000000 --- a/deps/node/deps/icu-small/source/common/uvectr64.cpp +++ /dev/null @@ -1,213 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1999-2015, International Business Machines Corporation and -* others. All Rights Reserved. -****************************************************************************** -*/ - -#include "uvectr64.h" -#include "cmemory.h" -#include "putilimp.h" - -U_NAMESPACE_BEGIN - -#define DEFAULT_CAPACITY 8 - -/* - * Constants for hinting whether a key is an integer - * or a pointer. If a hint bit is zero, then the associated - * token is assumed to be an integer. This is needed for iSeries - */ - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector64) - -UVector64::UVector64(UErrorCode &status) : - count(0), - capacity(0), - maxCapacity(0), - elements(NULL) -{ - _init(DEFAULT_CAPACITY, status); -} - -UVector64::UVector64(int32_t initialCapacity, UErrorCode &status) : - count(0), - capacity(0), - maxCapacity(0), - elements(0) -{ - _init(initialCapacity, status); -} - - - -void UVector64::_init(int32_t initialCapacity, UErrorCode &status) { - // Fix bogus initialCapacity values; avoid malloc(0) - if (initialCapacity < 1) { - initialCapacity = DEFAULT_CAPACITY; - } - if (maxCapacity>0 && maxCapacity (int32_t)(INT32_MAX / sizeof(int64_t))) { - initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity); - } - elements = (int64_t *)uprv_malloc(sizeof(int64_t)*initialCapacity); - if (elements == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - capacity = initialCapacity; - } -} - -UVector64::~UVector64() { - uprv_free(elements); - elements = 0; -} - -/** - * Assign this object to another (make this a copy of 'other'). - */ -void UVector64::assign(const UVector64& other, UErrorCode &ec) { - if (ensureCapacity(other.count, ec)) { - setSize(other.count); - for (int32_t i=0; iindex; --i) { - elements[i] = elements[i-1]; - } - elements[index] = elem; - ++count; - } - /* else index out of range */ -} - -void UVector64::removeAllElements(void) { - count = 0; -} - -UBool UVector64::expandCapacity(int32_t minimumCapacity, UErrorCode &status) { - if (U_FAILURE(status)) { - return FALSE; - } - if (minimumCapacity < 0) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - if (capacity >= minimumCapacity) { - return TRUE; - } - if (maxCapacity>0 && minimumCapacity>maxCapacity) { - status = U_BUFFER_OVERFLOW_ERROR; - return FALSE; - } - if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - int32_t newCap = capacity * 2; - if (newCap < minimumCapacity) { - newCap = minimumCapacity; - } - if (maxCapacity > 0 && newCap > maxCapacity) { - newCap = maxCapacity; - } - if (newCap > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check - // We keep the original memory contents on bad minimumCapacity/maxCapacity. - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*newCap); - if (newElems == NULL) { - // We keep the original contents on the memory failure on realloc. - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - elements = newElems; - capacity = newCap; - return TRUE; -} - -void UVector64::setMaxCapacity(int32_t limit) { - U_ASSERT(limit >= 0); - if (limit < 0) { - limit = 0; - } - if (limit > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check for realloc - // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged - return; - } - maxCapacity = limit; - if (capacity <= maxCapacity || maxCapacity == 0) { - // Current capacity is within the new limit. - return; - } - - // New maximum capacity is smaller than the current size. - // Realloc the storage to the new, smaller size. - int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*maxCapacity); - if (newElems == NULL) { - // Realloc to smaller failed. - // Just keep what we had. No need to call it a failure. - return; - } - elements = newElems; - capacity = maxCapacity; - if (count > capacity) { - count = capacity; - } -} - -/** - * Change the size of this vector as follows: If newSize is smaller, - * then truncate the array, possibly deleting held elements for i >= - * newSize. If newSize is larger, grow the array, filling in new - * slots with NULL. - */ -void UVector64::setSize(int32_t newSize) { - int32_t i; - if (newSize < 0) { - return; - } - if (newSize > count) { - UErrorCode ec = U_ZERO_ERROR; - if (!ensureCapacity(newSize, ec)) { - return; - } - for (i=count; iUltralightweight C++ implementation of an int64_t vector - * that has a subset of methods from UVector32 - * - *

This is a very simple implementation, written to satisfy an - * immediate porting need. As such, it is not completely fleshed out, - * and it aims for simplicity and conformity. Nonetheless, it serves - * its purpose (porting code from java that uses java.util.Vector) - * well, and it could be easily made into a more robust vector class. - * - *

Design notes - * - *

There is index bounds checking, but little is done about it. If - * indices are out of bounds, either nothing happens, or zero is - * returned. We do avoid indexing off into the weeds. - * - *

There is detection of out of memory, but the handling is very - * coarse-grained -- similar to UnicodeString's protocol, but even - * coarser. The class contains one static flag that is set - * when any call to new returns zero. This allows the caller - * to use several vectors and make just one check at the end to see if - * a memory failure occurred. This is more efficient than making a - * check after each call on each vector when doing many operations on - * multiple vectors. The single static flag works best when memory - * failures are infrequent, and when recovery options are limited or - * nonexistent. - * - *

To do - * - *

Improve the handling of index out of bounds errors. - * - */ -class U_COMMON_API UVector64 : public UObject { -private: - int32_t count; - - int32_t capacity; - - int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. - - int64_t* elements; - -public: - UVector64(UErrorCode &status); - - UVector64(int32_t initialCapacity, UErrorCode &status); - - virtual ~UVector64(); - - /** - * Assign this object to another (make this a copy of 'other'). - * Use the 'assign' function to assign each element. - */ - void assign(const UVector64& other, UErrorCode &ec); - - /** - * Compare this vector with another. They will be considered - * equal if they are of the same size and all elements are equal, - * as compared using this object's comparer. - */ - UBool operator==(const UVector64& other); - - /** - * Equivalent to !operator==() - */ - inline UBool operator!=(const UVector64& other); - - //------------------------------------------------------------ - // subset of java.util.Vector API - //------------------------------------------------------------ - - void addElement(int64_t elem, UErrorCode &status); - - void setElementAt(int64_t elem, int32_t index); - - void insertElementAt(int64_t elem, int32_t index, UErrorCode &status); - - int64_t elementAti(int32_t index) const; - - //UBool equals(const UVector64 &other) const; - - int64_t lastElementi(void) const; - - //int32_t indexOf(int64_t elem, int32_t startIndex = 0) const; - - //UBool contains(int64_t elem) const; - - //UBool containsAll(const UVector64& other) const; - - //UBool removeAll(const UVector64& other); - - //UBool retainAll(const UVector64& other); - - //void removeElementAt(int32_t index); - - void removeAllElements(); - - int32_t size(void) const; - - inline UBool isEmpty(void) const { return count == 0; } - - // Inline. Use this one for speedy size check. - inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status); - - // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary. - UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status); - - /** - * Change the size of this vector as follows: If newSize is - * smaller, then truncate the array, possibly deleting held - * elements for i >= newSize. If newSize is larger, grow the - * array, filling in new slows with zero. - */ - void setSize(int32_t newSize); - - //------------------------------------------------------------ - // New API - //------------------------------------------------------------ - - //UBool containsNone(const UVector64& other) const; - - - //void sortedInsert(int64_t elem, UErrorCode& ec); - - /** - * Returns a pointer to the internal array holding the vector. - */ - int64_t *getBuffer() const; - - /** - * Set the maximum allowed buffer capacity for this vector/stack. - * Default with no limit set is unlimited, go until malloc() fails. - * A Limit of zero means unlimited capacity. - * Units are vector elements (64 bits each), not bytes. - */ - void setMaxCapacity(int32_t limit); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - */ - virtual UClassID getDynamicClassID() const; - -private: - void _init(int32_t initialCapacity, UErrorCode &status); - - // Disallow - UVector64(const UVector64&); - - // Disallow - UVector64& operator=(const UVector64&); - - - // API Functions for Stack operations. - // In the original UVector, these were in a separate derived class, UStack. - // Here in UVector64, they are all together. -public: - //UBool empty(void) const; // TODO: redundant, same as empty(). Remove it? - - //int64_t peeki(void) const; - - int64_t popi(void); - - int64_t push(int64_t i, UErrorCode &status); - - int64_t *reserveBlock(int32_t size, UErrorCode &status); - int64_t *popFrame(int32_t size); -}; - - -// UVector64 inlines - -inline UBool UVector64::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { - if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) { - return TRUE; - } else { - return expandCapacity(minimumCapacity, status); - } -} - -inline int64_t UVector64::elementAti(int32_t index) const { - return (0 <= index && index < count) ? elements[index] : 0; -} - - -inline void UVector64::addElement(int64_t elem, UErrorCode &status) { - if (ensureCapacity(count + 1, status)) { - elements[count] = elem; - count++; - } -} - -inline int64_t *UVector64::reserveBlock(int32_t size, UErrorCode &status) { - if (ensureCapacity(count+size, status) == FALSE) { - return NULL; - } - int64_t *rp = elements+count; - count += size; - return rp; -} - -inline int64_t *UVector64::popFrame(int32_t size) { - U_ASSERT(count >= size); - count -= size; - if (count < 0) { - count = 0; - } - return elements+count-size; -} - - - -inline int32_t UVector64::size(void) const { - return count; -} - -inline int64_t UVector64::lastElementi(void) const { - return elementAti(count-1); -} - -inline UBool UVector64::operator!=(const UVector64& other) { - return !operator==(other); -} - -inline int64_t *UVector64::getBuffer() const { - return elements; -} - - -// UStack inlines - -inline int64_t UVector64::push(int64_t i, UErrorCode &status) { - addElement(i, status); - return i; -} - -inline int64_t UVector64::popi(void) { - int64_t result = 0; - if (count > 0) { - count--; - result = elements[count]; - } - return result; -} - -U_NAMESPACE_END - -#endif diff --git a/deps/node/deps/icu-small/source/common/wintz.cpp b/deps/node/deps/icu-small/source/common/wintz.cpp deleted file mode 100644 index 764d99d0..00000000 --- a/deps/node/deps/icu-small/source/common/wintz.cpp +++ /dev/null @@ -1,126 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 2005-2015, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -* -* File WINTZ.CPP -* -******************************************************************************** -*/ - -#include "unicode/utypes.h" - -#if U_PLATFORM_USES_ONLY_WIN32_API - -#include "wintz.h" -#include "cmemory.h" -#include "cstring.h" - -#include "unicode/ures.h" -#include "unicode/ustring.h" -#include "uresimp.h" - -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif -# define VC_EXTRALEAN -# define NOUSER -# define NOSERVICE -# define NOIME -# define NOMCX -#include - -U_NAMESPACE_BEGIN - -// The value of MAX_TIMEZONE_ID_LENGTH is 128, which is defined in DYNAMIC_TIME_ZONE_INFORMATION -#define MAX_TIMEZONE_ID_LENGTH 128 - -/** -* Main Windows time zone detection function. -* Returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure. -* Note: We use the Win32 API GetDynamicTimeZoneInformation to get the current time zone info. -* This API returns a non-localized time zone name, which we can then map to an ICU time zone name. -*/ -U_CFUNC const char* U_EXPORT2 -uprv_detectWindowsTimeZone() -{ - UErrorCode status = U_ZERO_ERROR; - char* icuid = nullptr; - char dynamicTZKeyName[MAX_TIMEZONE_ID_LENGTH]; - char tmpid[MAX_TIMEZONE_ID_LENGTH]; - int32_t len; - int id = GEOID_NOT_AVAILABLE; - int errorCode; - wchar_t ISOcodeW[3] = {}; /* 2 letter ISO code in UTF-16 */ - char ISOcode[3] = {}; /* 2 letter ISO code in UTF-8 */ - - DYNAMIC_TIME_ZONE_INFORMATION dynamicTZI; - uprv_memset(&dynamicTZI, 0, sizeof(dynamicTZI)); - uprv_memset(dynamicTZKeyName, 0, sizeof(dynamicTZKeyName)); - uprv_memset(tmpid, 0, sizeof(tmpid)); - - /* Obtain TIME_ZONE_INFORMATION from the API and get the non-localized time zone name. */ - if (TIME_ZONE_ID_INVALID == GetDynamicTimeZoneInformation(&dynamicTZI)) { - return nullptr; - } - - id = GetUserGeoID(GEOCLASS_NATION); - errorCode = GetGeoInfoW(id, GEO_ISO2, ISOcodeW, 3, 0); - - // convert from wchar_t* (UTF-16 on Windows) to char* (UTF-8). - u_strToUTF8(ISOcode, UPRV_LENGTHOF(ISOcode), nullptr, - reinterpret_cast(ISOcodeW), UPRV_LENGTHOF(ISOcodeW), &status); - - LocalUResourceBundlePointer bundle(ures_openDirect(nullptr, "windowsZones", &status)); - ures_getByKey(bundle.getAlias(), "mapTimezones", bundle.getAlias(), &status); - - // convert from wchar_t* (UTF-16 on Windows) to char* (UTF-8). - u_strToUTF8(dynamicTZKeyName, UPRV_LENGTHOF(dynamicTZKeyName), nullptr, - reinterpret_cast(dynamicTZI.TimeZoneKeyName), UPRV_LENGTHOF(dynamicTZI.TimeZoneKeyName), &status); - - if (U_FAILURE(status)) { - return nullptr; - } - - if (dynamicTZI.TimeZoneKeyName[0] != 0) { - UResourceBundle winTZ; - ures_initStackObject(&winTZ); - ures_getByKey(bundle.getAlias(), dynamicTZKeyName, &winTZ, &status); - - if (U_SUCCESS(status)) { - const UChar* icuTZ = nullptr; - if (errorCode != 0) { - icuTZ = ures_getStringByKey(&winTZ, ISOcode, &len, &status); - } - if (errorCode == 0 || icuTZ == nullptr) { - /* fallback to default "001" and reset status */ - status = U_ZERO_ERROR; - icuTZ = ures_getStringByKey(&winTZ, "001", &len, &status); - } - - if (U_SUCCESS(status)) { - int index = 0; - - while (!(*icuTZ == '\0' || *icuTZ == ' ')) { - // time zone IDs only contain ASCII invariant characters. - tmpid[index++] = (char)(*icuTZ++); - } - tmpid[index] = '\0'; - } - } - ures_close(&winTZ); - } - - // Copy the timezone ID to icuid to be returned. - if (tmpid[0] != 0) { - icuid = uprv_strdup(tmpid); - } - - return icuid; -} - -U_NAMESPACE_END -#endif /* U_PLATFORM_USES_ONLY_WIN32_API */ diff --git a/deps/node/deps/icu-small/source/common/wintz.h b/deps/node/deps/icu-small/source/common/wintz.h deleted file mode 100644 index 0625bb20..00000000 --- a/deps/node/deps/icu-small/source/common/wintz.h +++ /dev/null @@ -1,36 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************** -* Copyright (C) 2005-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************** -* -* File WINTZ.H -* -******************************************************************************** -*/ - -#ifndef __WINTZ -#define __WINTZ - -#include "unicode/utypes.h" - -#if U_PLATFORM_USES_ONLY_WIN32_API - -/** - * \file - * \brief C API: Utilities for dealing w/ Windows time zones. - */ - -U_CDECL_BEGIN -/* Forward declarations for Windows types... */ -typedef struct _TIME_ZONE_INFORMATION TIME_ZONE_INFORMATION; -U_CDECL_END - -U_CFUNC const char* U_EXPORT2 -uprv_detectWindowsTimeZone(); - -#endif /* U_PLATFORM_USES_ONLY_WIN32_API */ - -#endif /* __WINTZ */ -- cgit v1.2.3